Spaces:

owiedotch
/

dac

Sleeping

App Files Files Community

owiedotch commited on Aug 26

Commit

d24bcbd

•

1 Parent(s): d888fa7

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -18

app.py CHANGED Viewed

@@ -29,17 +29,6 @@ def encode_audio(audio_file_path):
         # Load the audio file
         waveform, sample_rate = torchaudio.load(audio_file_path)
-        # Resample to 32kHz if necessary
-        if sample_rate != 32000:
-            resampler = torchaudio.transforms.Resample(sample_rate, 32000)
-            waveform = resampler(waveform)
-        # Convert to 32 channels if necessary
-        if waveform.size(0) < 32:
-            waveform = waveform.repeat(32, 1)[:32, :]
-        elif waveform.size(0) > 32:
-            waveform = waveform[:32, :]
         # Encode the audio
         audio = waveform.unsqueeze(0).to(torch_device)
         with torch.no_grad():
@@ -50,9 +39,12 @@ def encode_audio(audio_file_path):
         temp_fd, temp_file_path = tempfile.mkstemp(suffix=".owie")
         os.close(temp_fd)  # Close the file descriptor to avoid issues with os.fdopen
         with open(temp_file_path, 'wb') as temp_file:
             compressed_data = lz4.frame.compress(z_numpy.tobytes())
             temp_file.write(compressed_data)
         return temp_file_path
     except Exception as e:
@@ -61,8 +53,9 @@ def encode_audio(audio_file_path):
 @spaces.GPU(duration=180)
 def decode_audio(encoded_file_path):
     try:
-        # Load encoded data from the .owie file
         with open(encoded_file_path, 'rb') as temp_file:
             compressed_data = temp_file.read()
             z_numpy_bytes = lz4.frame.decompress(compressed_data)
             z_numpy = np.frombuffer(z_numpy_bytes, dtype=np.float32).reshape(1, 32, -1)
@@ -74,7 +67,7 @@ def decode_audio(encoded_file_path):
         # Save to a temporary WAV file
         temp_wav_path = tempfile.mktemp(suffix=".wav")
-        torchaudio.save(temp_wav_path, reconstructed_audio.squeeze(0).cpu(), 32000)
         return temp_wav_path
     except Exception as e:
@@ -83,16 +76,16 @@ def decode_audio(encoded_file_path):
 @spaces.GPU(duration=180)
 def stream_decode_audio(encoded_file_path) -> Generator[tuple, None, None]:
     try:
-        # Load encoded data from the .owie file
         with open(encoded_file_path, 'rb') as temp_file:
             compressed_data = temp_file.read()
             z_numpy_bytes = lz4.frame.decompress(compressed_data)
             z_numpy = np.frombuffer(z_numpy_bytes, dtype=np.float32).reshape(1, 32, -1)
             z = torch.from_numpy(z_numpy).to(torch_device)
         # Decode the audio in chunks
-        chunk_size = 32000  # 1 second of audio at 32kHz
-        sample_rate = 32000  # AGC model's output sample rate
         with torch.no_grad():
             for i in range(0, z.shape[2], chunk_size):
                 z_chunk = z[:, :, i:i+chunk_size]
@@ -105,9 +98,10 @@ def stream_decode_audio(encoded_file_path) -> Generator[tuple, None, None]:
         print(f"Streaming decoding error: {e}")
         yield (sample_rate, np.zeros((chunk_size, 32), dtype=np.float32))  # Return silence
 # Gradio Interface
 with gr.Blocks() as demo:
-    gr.Markdown("## Audio Compression with AGC (GPU/CPU) - 32 channels, 32kHz")
     with gr.Tab("Encode"):
         input_audio = gr.Audio(label="Input Audio", type="filepath")

         # Load the audio file
         waveform, sample_rate = torchaudio.load(audio_file_path)
         # Encode the audio
         audio = waveform.unsqueeze(0).to(torch_device)
         with torch.no_grad():
         temp_fd, temp_file_path = tempfile.mkstemp(suffix=".owie")
         os.close(temp_fd)  # Close the file descriptor to avoid issues with os.fdopen
         with open(temp_file_path, 'wb') as temp_file:
+            # Store the sample rate as the first 4 bytes
+            temp_file.write(sample_rate.to_bytes(4, byteorder='little'))
+            # Compress and write the encoded data
             compressed_data = lz4.frame.compress(z_numpy.tobytes())
             temp_file.write(compressed_data)
         return temp_file_path
     except Exception as e:
 @spaces.GPU(duration=180)
 def decode_audio(encoded_file_path):
     try:
+        # Load encoded data and sample rate from the .owie file
         with open(encoded_file_path, 'rb') as temp_file:
+            sample_rate = int.from_bytes(temp_file.read(4), byteorder='little')
             compressed_data = temp_file.read()
             z_numpy_bytes = lz4.frame.decompress(compressed_data)
             z_numpy = np.frombuffer(z_numpy_bytes, dtype=np.float32).reshape(1, 32, -1)
         # Save to a temporary WAV file
         temp_wav_path = tempfile.mktemp(suffix=".wav")
+        torchaudio.save(temp_wav_path, reconstructed_audio.squeeze(0).cpu(), sample_rate)
         return temp_wav_path
     except Exception as e:
 @spaces.GPU(duration=180)
 def stream_decode_audio(encoded_file_path) -> Generator[tuple, None, None]:
     try:
+        # Load encoded data and sample rate from the .owie file
         with open(encoded_file_path, 'rb') as temp_file:
+            sample_rate = int.from_bytes(temp_file.read(4), byteorder='little')
             compressed_data = temp_file.read()
             z_numpy_bytes = lz4.frame.decompress(compressed_data)
             z_numpy = np.frombuffer(z_numpy_bytes, dtype=np.float32).reshape(1, 32, -1)
             z = torch.from_numpy(z_numpy).to(torch_device)
         # Decode the audio in chunks
+        chunk_size = sample_rate  # Use the stored sample rate as chunk size
         with torch.no_grad():
             for i in range(0, z.shape[2], chunk_size):
                 z_chunk = z[:, :, i:i+chunk_size]
         print(f"Streaming decoding error: {e}")
         yield (sample_rate, np.zeros((chunk_size, 32), dtype=np.float32))  # Return silence
 # Gradio Interface
 with gr.Blocks() as demo:
+    gr.Markdown("## Audio Compression with AGC (GPU/CPU)")
     with gr.Tab("Encode"):
         input_audio = gr.Audio(label="Input Audio", type="filepath")