Spaces:

owiedotch
/

dac

Sleeping

App Files Files Community

owiedotch commited on Aug 25

Commit

ab12e78

•

1 Parent(s): d07be48

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -23

app.py CHANGED Viewed

@@ -3,48 +3,46 @@ import torch
 from datasets import load_dataset, Audio
 from transformers import EncodecModel, AutoProcessor
 import spaces
 # Load the Encodec model and processor
 model = EncodecModel.from_pretrained("facebook/encodec_48khz")
 processor = AutoProcessor.from_pretrained("facebook/encodec_48khz")
 @spaces.GPU
-def encode(audio_file_path):  # Change argument name to reflect it's a path
     try:
         # Open the audio file
         with open(audio_file_path, "rb") as audio_file:
             # Load and preprocess the audio
             audio_sample, sampling_rate = load_dataset("audiofolder", data_dir=audio_file_path, split="train")[0]["audio"]
-            inputs = processor(raw_audio=audio_sample, sampling_rate=sampling_rate, return_tensors="pt")
-            # Encode the audio
-            with torch.no_grad():
-                encoder_outputs = model.encode(inputs["input_values"], inputs["padding_mask"])
-            # Extract the encoded codes and scales
-            audio_codes = encoder_outputs.audio_codes
-            audio_scales = encoder_outputs.audio_scales
-            # Return the encoded data
-            return {"codes": audio_codes.tolist(), "scales": audio_scales.tolist()}
     except Exception as e:
         gr.Warning(f"An error occurred during encoding: {e}")
         return None
 @spaces.GPU
-def decode(encoded_data):
     try:
-        # Convert the encoded data back to tensors
-        audio_codes = torch.tensor(encoded_data["codes"])
-        audio_scales = torch.tensor(encoded_data["scales"])
-        # Decode the audio
-        with torch.no_grad():
-            audio_values = model.decode(audio_codes, audio_scales)[0]
         # Convert the decoded audio to a numpy array for Gradio output
-        decoded_audio = audio_values.cpu().numpy()
         return decoded_audio
@@ -61,17 +59,17 @@ with gr.Blocks() as demo:
             audio_input = gr.Audio(type="filepath", label="Input Audio")
             encode_button = gr.Button("Encode", variant="primary")
         with gr.Row():
-            encoded_output = gr.JSON(label="Encoded Audio")
         encode_button.click(encode, inputs=audio_input, outputs=encoded_output)
     with gr.Tab("Decode"):
         with gr.Row():
-            encoded_input = gr.JSON(label="Encoded Audio")
             decode_button = gr.Button("Decode", variant="primary")
         with gr.Row():
             decoded_output = gr.Audio(label="Decompressed Audio")
-        decode_button.click(decode, inputs=encoded_input, outputs=decoded_output)
 demo.queue().launch()

 from datasets import load_dataset, Audio
 from transformers import EncodecModel, AutoProcessor
 import spaces
+from encodec import compress, decompress
+import io
 # Load the Encodec model and processor
 model = EncodecModel.from_pretrained("facebook/encodec_48khz")
 processor = AutoProcessor.from_pretrained("facebook/encodec_48khz")
 @spaces.GPU
+def encode(audio_file_path):
     try:
         # Open the audio file
         with open(audio_file_path, "rb") as audio_file:
             # Load and preprocess the audio
             audio_sample, sampling_rate = load_dataset("audiofolder", data_dir=audio_file_path, split="train")[0]["audio"]
+            wav = torch.tensor(audio_sample).unsqueeze(0)
+            # Compress to ecdc
+            compressed_audio = compress(model, wav)
+            # Save compressed audio to BytesIO
+            output = io.BytesIO(compressed_audio)
+            output.seek(0)
+            return output
     except Exception as e:
         gr.Warning(f"An error occurred during encoding: {e}")
         return None
 @spaces.GPU
+def decode(compressed_audio_file):
     try:
+        # Load compressed audio
+        compressed_audio = compressed_audio_file.read()
+        # Decompress audio
+        wav, sr = decompress(compressed_audio)
         # Convert the decoded audio to a numpy array for Gradio output
+        decoded_audio = wav.cpu().numpy()
         return decoded_audio
             audio_input = gr.Audio(type="filepath", label="Input Audio")
             encode_button = gr.Button("Encode", variant="primary")
         with gr.Row():
+            encoded_output = gr.File(label="Compressed Audio (.ecdc)")
         encode_button.click(encode, inputs=audio_input, outputs=encoded_output)
     with gr.Tab("Decode"):
         with gr.Row():
+            compressed_input = gr.File(label="Compressed Audio (.ecdc)")
             decode_button = gr.Button("Decode", variant="primary")
         with gr.Row():
             decoded_output = gr.Audio(label="Decompressed Audio")
+        decode_button.click(decode, inputs=compressed_input, outputs=decoded_output)
 demo.queue().launch()