Spaces:

owiedotch
/

dac

Sleeping

App Files Files Community

owiedotch commited on Aug 26

Commit

0b50165

•

1 Parent(s): e4056a5

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -10

app.py CHANGED Viewed

@@ -18,6 +18,7 @@ except Exception as e:
     torch_device = torch.device("cpu")
 # Load the AGC model
 def load_agc_model():
     return AGC.from_pretrained("Audiogen/agc-continuous").to(torch_device)
@@ -29,9 +30,16 @@ def encode_audio(audio_file_path):
         # Load the audio file
         waveform, sample_rate = torchaudio.load(audio_file_path)
-        # Convert to stereo if necessary
-        if waveform.size(0) == 1:
-            waveform = waveform.repeat(2, 1)
         # Encode the audio
         audio = waveform.unsqueeze(0).to(torch_device)
@@ -67,7 +75,7 @@ def decode_audio(encoded_file_path):
         # Save to a temporary WAV file
         temp_wav_path = tempfile.mktemp(suffix=".wav")
-        torchaudio.save(temp_wav_path, reconstructed_audio.squeeze(0).cpu(), sample_rate)
         return temp_wav_path
     except Exception as e:
@@ -84,23 +92,23 @@ def stream_decode_audio(encoded_file_path) -> Generator[tuple, None, None]:
             z = torch.from_numpy(z_numpy).to(torch_device)
         # Decode the audio in chunks
-        chunk_size = 16000  # 1 second of audio at 16kHz
-        sample_rate = 16000  # AGC model's output sample rate
         with torch.no_grad():
             for i in range(0, z.shape[2], chunk_size):
                 z_chunk = z[:, :, i:i+chunk_size]
                 audio_chunk = agc.decode(z_chunk)
-                # Convert to numpy array and transpose
-                audio_data = audio_chunk.squeeze(0).cpu().numpy().T
                 yield (sample_rate, audio_data)
     except Exception as e:
         print(f"Streaming decoding error: {e}")
-        yield (sample_rate, np.zeros((2, chunk_size), dtype=np.float32))  # Return silence in case of error
 # Gradio Interface
 with gr.Blocks() as demo:
-    gr.Markdown("## Audio Compression with AGC (GPU/CPU)")
     with gr.Tab("Encode"):
         input_audio = gr.Audio(label="Input Audio", type="filepath")

     torch_device = torch.device("cpu")
 # Load the AGC model
+@spaces.GPU(duration=180)
 def load_agc_model():
     return AGC.from_pretrained("Audiogen/agc-continuous").to(torch_device)
         # Load the audio file
         waveform, sample_rate = torchaudio.load(audio_file_path)
+        # Resample to 32kHz if necessary
+        if sample_rate != 32000:
+            resampler = torchaudio.transforms.Resample(sample_rate, 32000)
+            waveform = resampler(waveform)
+        # Convert to 32 channels if necessary
+        if waveform.size(0) < 32:
+            waveform = waveform.repeat(32, 1)[:32, :]
+        elif waveform.size(0) > 32:
+            waveform = waveform[:32, :]
         # Encode the audio
         audio = waveform.unsqueeze(0).to(torch_device)
         # Save to a temporary WAV file
         temp_wav_path = tempfile.mktemp(suffix=".wav")
+        torchaudio.save(temp_wav_path, reconstructed_audio.squeeze(0).cpu(), 32000)
         return temp_wav_path
     except Exception as e:
             z = torch.from_numpy(z_numpy).to(torch_device)
         # Decode the audio in chunks
+        chunk_size = 32000  # 1 second of audio at 32kHz
+        sample_rate = 32000  # AGC model's output sample rate
         with torch.no_grad():
             for i in range(0, z.shape[2], chunk_size):
                 z_chunk = z[:, :, i:i+chunk_size]
                 audio_chunk = agc.decode(z_chunk)
+                # Convert to numpy array (32 channels)
+                audio_data = audio_chunk.squeeze(0).cpu().numpy()
                 yield (sample_rate, audio_data)
     except Exception as e:
         print(f"Streaming decoding error: {e}")
+        yield (sample_rate, np.zeros((32, chunk_size), dtype=np.float32))  # Return silence in case of error
 # Gradio Interface
 with gr.Blocks() as demo:
+    gr.Markdown("## Audio Compression with AGC (GPU/CPU) - 32 channels, 32kHz")
     with gr.Tab("Encode"):
         input_audio = gr.Audio(label="Input Audio", type="filepath")