owiedotch commited on
Commit
d24bcbd
1 Parent(s): d888fa7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -18
app.py CHANGED
@@ -29,17 +29,6 @@ def encode_audio(audio_file_path):
29
  # Load the audio file
30
  waveform, sample_rate = torchaudio.load(audio_file_path)
31
 
32
- # Resample to 32kHz if necessary
33
- if sample_rate != 32000:
34
- resampler = torchaudio.transforms.Resample(sample_rate, 32000)
35
- waveform = resampler(waveform)
36
-
37
- # Convert to 32 channels if necessary
38
- if waveform.size(0) < 32:
39
- waveform = waveform.repeat(32, 1)[:32, :]
40
- elif waveform.size(0) > 32:
41
- waveform = waveform[:32, :]
42
-
43
  # Encode the audio
44
  audio = waveform.unsqueeze(0).to(torch_device)
45
  with torch.no_grad():
@@ -50,9 +39,12 @@ def encode_audio(audio_file_path):
50
  temp_fd, temp_file_path = tempfile.mkstemp(suffix=".owie")
51
  os.close(temp_fd) # Close the file descriptor to avoid issues with os.fdopen
52
  with open(temp_file_path, 'wb') as temp_file:
 
 
 
53
  compressed_data = lz4.frame.compress(z_numpy.tobytes())
54
  temp_file.write(compressed_data)
55
-
56
  return temp_file_path
57
 
58
  except Exception as e:
@@ -61,8 +53,9 @@ def encode_audio(audio_file_path):
61
  @spaces.GPU(duration=180)
62
  def decode_audio(encoded_file_path):
63
  try:
64
- # Load encoded data from the .owie file
65
  with open(encoded_file_path, 'rb') as temp_file:
 
66
  compressed_data = temp_file.read()
67
  z_numpy_bytes = lz4.frame.decompress(compressed_data)
68
  z_numpy = np.frombuffer(z_numpy_bytes, dtype=np.float32).reshape(1, 32, -1)
@@ -74,7 +67,7 @@ def decode_audio(encoded_file_path):
74
 
75
  # Save to a temporary WAV file
76
  temp_wav_path = tempfile.mktemp(suffix=".wav")
77
- torchaudio.save(temp_wav_path, reconstructed_audio.squeeze(0).cpu(), 32000)
78
  return temp_wav_path
79
 
80
  except Exception as e:
@@ -83,16 +76,16 @@ def decode_audio(encoded_file_path):
83
  @spaces.GPU(duration=180)
84
  def stream_decode_audio(encoded_file_path) -> Generator[tuple, None, None]:
85
  try:
86
- # Load encoded data from the .owie file
87
  with open(encoded_file_path, 'rb') as temp_file:
 
88
  compressed_data = temp_file.read()
89
  z_numpy_bytes = lz4.frame.decompress(compressed_data)
90
  z_numpy = np.frombuffer(z_numpy_bytes, dtype=np.float32).reshape(1, 32, -1)
91
  z = torch.from_numpy(z_numpy).to(torch_device)
92
 
93
  # Decode the audio in chunks
94
- chunk_size = 32000 # 1 second of audio at 32kHz
95
- sample_rate = 32000 # AGC model's output sample rate
96
  with torch.no_grad():
97
  for i in range(0, z.shape[2], chunk_size):
98
  z_chunk = z[:, :, i:i+chunk_size]
@@ -105,9 +98,10 @@ def stream_decode_audio(encoded_file_path) -> Generator[tuple, None, None]:
105
  print(f"Streaming decoding error: {e}")
106
  yield (sample_rate, np.zeros((chunk_size, 32), dtype=np.float32)) # Return silence
107
 
 
108
  # Gradio Interface
109
  with gr.Blocks() as demo:
110
- gr.Markdown("## Audio Compression with AGC (GPU/CPU) - 32 channels, 32kHz")
111
 
112
  with gr.Tab("Encode"):
113
  input_audio = gr.Audio(label="Input Audio", type="filepath")
 
29
  # Load the audio file
30
  waveform, sample_rate = torchaudio.load(audio_file_path)
31
 
 
 
 
 
 
 
 
 
 
 
 
32
  # Encode the audio
33
  audio = waveform.unsqueeze(0).to(torch_device)
34
  with torch.no_grad():
 
39
  temp_fd, temp_file_path = tempfile.mkstemp(suffix=".owie")
40
  os.close(temp_fd) # Close the file descriptor to avoid issues with os.fdopen
41
  with open(temp_file_path, 'wb') as temp_file:
42
+ # Store the sample rate as the first 4 bytes
43
+ temp_file.write(sample_rate.to_bytes(4, byteorder='little'))
44
+ # Compress and write the encoded data
45
  compressed_data = lz4.frame.compress(z_numpy.tobytes())
46
  temp_file.write(compressed_data)
47
+
48
  return temp_file_path
49
 
50
  except Exception as e:
 
53
  @spaces.GPU(duration=180)
54
  def decode_audio(encoded_file_path):
55
  try:
56
+ # Load encoded data and sample rate from the .owie file
57
  with open(encoded_file_path, 'rb') as temp_file:
58
+ sample_rate = int.from_bytes(temp_file.read(4), byteorder='little')
59
  compressed_data = temp_file.read()
60
  z_numpy_bytes = lz4.frame.decompress(compressed_data)
61
  z_numpy = np.frombuffer(z_numpy_bytes, dtype=np.float32).reshape(1, 32, -1)
 
67
 
68
  # Save to a temporary WAV file
69
  temp_wav_path = tempfile.mktemp(suffix=".wav")
70
+ torchaudio.save(temp_wav_path, reconstructed_audio.squeeze(0).cpu(), sample_rate)
71
  return temp_wav_path
72
 
73
  except Exception as e:
 
76
  @spaces.GPU(duration=180)
77
  def stream_decode_audio(encoded_file_path) -> Generator[tuple, None, None]:
78
  try:
79
+ # Load encoded data and sample rate from the .owie file
80
  with open(encoded_file_path, 'rb') as temp_file:
81
+ sample_rate = int.from_bytes(temp_file.read(4), byteorder='little')
82
  compressed_data = temp_file.read()
83
  z_numpy_bytes = lz4.frame.decompress(compressed_data)
84
  z_numpy = np.frombuffer(z_numpy_bytes, dtype=np.float32).reshape(1, 32, -1)
85
  z = torch.from_numpy(z_numpy).to(torch_device)
86
 
87
  # Decode the audio in chunks
88
+ chunk_size = sample_rate # Use the stored sample rate as chunk size
 
89
  with torch.no_grad():
90
  for i in range(0, z.shape[2], chunk_size):
91
  z_chunk = z[:, :, i:i+chunk_size]
 
98
  print(f"Streaming decoding error: {e}")
99
  yield (sample_rate, np.zeros((chunk_size, 32), dtype=np.float32)) # Return silence
100
 
101
+
102
  # Gradio Interface
103
  with gr.Blocks() as demo:
104
+ gr.Markdown("## Audio Compression with AGC (GPU/CPU)")
105
 
106
  with gr.Tab("Encode"):
107
  input_audio = gr.Audio(label="Input Audio", type="filepath")