Spaces:

owiedotch
/

dac

Sleeping

App Files Files Community

owiedotch commited on Aug 26

Commit

cefd33c

•

1 Parent(s): c07b48c

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -51

app.py CHANGED Viewed

@@ -27,7 +27,7 @@ cancel_encode = False
 cancel_decode = False
 cancel_stream = False
-@spaces.GPU(duration=30)  # Changed from 250 to 30
 def encode_audio(audio_file_path):
     global cancel_encode
@@ -51,22 +51,10 @@ def encode_audio(audio_file_path):
         # Encode the audio
         tokens = semanticodec.encode(temp_wav_file_path)
-        # Convert tokens to NumPy and save to .owie file
         tokens_numpy = tokens.detach().cpu().numpy()
-        print(f"Original tokens shape: {tokens_numpy.shape}")
-        # Ensure tokens_numpy is 2D
-        if tokens_numpy.ndim == 1:
-            tokens_numpy = tokens_numpy.reshape(1, -1)
-        elif tokens_numpy.ndim == 2:
-            pass  # Already 2D
-        elif tokens_numpy.ndim == 3 and tokens_numpy.shape[0] == 1:
-            tokens_numpy = tokens_numpy.squeeze(0)
-        else:
-            raise ValueError(f"Unexpected tokens array shape: {tokens_numpy.shape}")
-        print(f"Reshaped tokens shape: {tokens_numpy.shape}")
         # Create temporary .owie file
         temp_fd, temp_file_path = tempfile.mkstemp(suffix=".owie")
@@ -82,12 +70,12 @@ def encode_audio(audio_file_path):
     except Exception as e:
         print(f"Encoding error: {e}")
-        return None  # Return None instead of the error message
     finally:
-        cancel_encode = False  # Reset cancel flag after encoding
         if 'temp_wav_file_path' in locals():
-            os.remove(temp_wav_file_path)  # Clean up temporary WAV file
 # Add this function to handle the output
 def handle_encode_output(file_path):
@@ -95,7 +83,7 @@ def handle_encode_output(file_path):
         return None, gr.Markdown("Encoding failed. Please ensure you've uploaded an audio file and try again.", visible=True)
     return file_path, gr.Markdown(visible=False)
-@spaces.GPU(duration=30)  # Changed from 250 to 30
 def decode_audio(encoded_file_path):
     global cancel_decode
@@ -105,18 +93,11 @@ def decode_audio(encoded_file_path):
             sample_rate = int.from_bytes(temp_file.read(4), byteorder='little')
             compressed_data = temp_file.read()
             tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
-            tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.int64)
-            # Reshape tokens to match the original shape
-            tokens_numpy = tokens_numpy.reshape(1, -1, 2)
-            # Create a writable copy of the numpy array
-            tokens_numpy = np.array(tokens_numpy, copy=True)
             # Move the tensor to the same device as the model
             tokens = torch.from_numpy(tokens_numpy).to(device=semanticodec.device)
-        # Debugging prints to check tensor shapes and device
         print(f"Tokens shape: {tokens.shape}, dtype: {tokens.dtype}, device: {tokens.device}")
         print(f"Model device: {semanticodec.device}")
@@ -124,23 +105,20 @@ def decode_audio(encoded_file_path):
         with torch.no_grad():
             waveform = semanticodec.decode(tokens)
-        # Move waveform to CPU for saving
-        waveform_cpu = waveform.cpu()
         # Save to a temporary WAV file
         temp_wav_path = tempfile.mktemp(suffix=".wav")
-        torchaudio.save(temp_wav_path, waveform_cpu.squeeze(0), sample_rate)
         return temp_wav_path
     except Exception as e:
         print(f"Decoding error: {e}")
         print(f"Traceback: {traceback.format_exc()}")
-        return str(e)  # Return error message as string
     finally:
-        cancel_decode = False  # Reset cancel flag after decoding
-@spaces.GPU(duration=30)  # Changed from 250 to 30
 async def stream_decode_audio(encoded_file_path) -> Generator[tuple, None, None]:
     global cancel_stream
@@ -150,11 +128,7 @@ async def stream_decode_audio(encoded_file_path) -> Generator[tuple, None, None]
             sample_rate = int.from_bytes(temp_file.read(4), byteorder='little')
             compressed_data = temp_file.read()
             tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
-            tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.int64)
-            tokens_numpy = tokens_numpy.reshape(1, -1, 2)
-            # Create a writable copy of the numpy array
-            tokens_numpy = np.array(tokens_numpy, copy=True)
             # Move the tensor to the same device as the model
             tokens = torch.from_numpy(tokens_numpy).to(device=semanticodec.device)
@@ -163,13 +137,13 @@ async def stream_decode_audio(encoded_file_path) -> Generator[tuple, None, None]
         print(f"Model device: {semanticodec.device}")
         # Decode the audio in chunks
-        chunk_size = sample_rate // 2  # Adjust chunk size to account for the new shape
         with torch.no_grad():
-            for i in range(0, tokens.shape[1], chunk_size):
                 if cancel_stream:
                     break  # Exit the loop if cancellation is requested
-                tokens_chunk = tokens[:, i:i+chunk_size, :]
                 audio_chunk = semanticodec.decode(tokens_chunk)
                 # Convert to numpy array and transpose
                 audio_data = audio_chunk.squeeze(0).cpu().numpy().T
@@ -182,17 +156,17 @@ async def stream_decode_audio(encoded_file_path) -> Generator[tuple, None, None]
         yield (sample_rate, np.zeros((chunk_size, 1), dtype=np.float32))  # Return silence
     finally:
-        cancel_stream = False  # Reset cancel flag after streaming
 # Gradio Interface
 with gr.Blocks() as demo:
     gr.Markdown("## Audio Compression with SemantiCodec (GPU/CPU)")
     with gr.Tab("Encode"):
-        input_audio = gr.Audio(label="Input Audio", type="filepath")  # Using "filepath" mode
         encode_button = gr.Button("Encode")
         cancel_encode_button = gr.Button("Cancel")
-        encoded_output = gr.File(label="Encoded File (.owie)", type="filepath")  # Using "filepath" mode
         encode_error_message = gr.Markdown(visible=False)
         def encode_wrapper(audio):
@@ -205,24 +179,24 @@ with gr.Blocks() as demo:
             inputs=input_audio,
             outputs=[encoded_output, encode_error_message]
         )
-        cancel_encode_button.click(lambda: globals().update(cancel_encode=True), outputs=None)  # Set cancel_encode flag
     with gr.Tab("Decode"):
-        input_encoded = gr.File(label="Encoded File (.owie)", type="filepath")  # Using "filepath" mode
         decode_button = gr.Button("Decode")
         cancel_decode_button = gr.Button("Cancel")
-        decoded_output = gr.Audio(label="Decoded Audio", type="filepath")  # Using "filepath" mode
         decode_button.click(decode_audio, inputs=input_encoded, outputs=decoded_output)
-        cancel_decode_button.click(lambda: globals().update(cancel_decode=True), outputs=None)  # Set cancel_decode flag
     with gr.Tab("Streaming"):
-        input_encoded_stream = gr.File(label="Encoded File (.owie)", type="filepath")  # Using "filepath" mode
         stream_button = gr.Button("Start Streaming")
         cancel_stream_button = gr.Button("Cancel")
         audio_output = gr.Audio(label="Streaming Audio Output", streaming=True)
         stream_button.click(stream_decode_audio, inputs=input_encoded_stream, outputs=audio_output)
-        cancel_stream_button.click(lambda: globals().update(cancel_stream=True), outputs=None)  # Set cancel_stream flag
 demo.queue().launch()

 cancel_decode = False
 cancel_stream = False
+@spaces.GPU(duration=30)
 def encode_audio(audio_file_path):
     global cancel_encode
         # Encode the audio
         tokens = semanticodec.encode(temp_wav_file_path)
+        # Convert tokens to NumPy
         tokens_numpy = tokens.detach().cpu().numpy()
+        print(f"Tokens shape: {tokens_numpy.shape}")
         # Create temporary .owie file
         temp_fd, temp_file_path = tempfile.mkstemp(suffix=".owie")
     except Exception as e:
         print(f"Encoding error: {e}")
+        return None
     finally:
+        cancel_encode = False
         if 'temp_wav_file_path' in locals():
+            os.remove(temp_wav_file_path)
 # Add this function to handle the output
 def handle_encode_output(file_path):
         return None, gr.Markdown("Encoding failed. Please ensure you've uploaded an audio file and try again.", visible=True)
     return file_path, gr.Markdown(visible=False)
+@spaces.GPU(duration=30)
 def decode_audio(encoded_file_path):
     global cancel_decode
             sample_rate = int.from_bytes(temp_file.read(4), byteorder='little')
             compressed_data = temp_file.read()
             tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
+            tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.int64).reshape(-1)
             # Move the tensor to the same device as the model
             tokens = torch.from_numpy(tokens_numpy).to(device=semanticodec.device)
         print(f"Tokens shape: {tokens.shape}, dtype: {tokens.dtype}, device: {tokens.device}")
         print(f"Model device: {semanticodec.device}")
         with torch.no_grad():
             waveform = semanticodec.decode(tokens)
         # Save to a temporary WAV file
         temp_wav_path = tempfile.mktemp(suffix=".wav")
+        torchaudio.save(temp_wav_path, waveform.squeeze(0).cpu(), sample_rate)
         return temp_wav_path
     except Exception as e:
         print(f"Decoding error: {e}")
         print(f"Traceback: {traceback.format_exc()}")
+        return str(e)
     finally:
+        cancel_decode = False
+@spaces.GPU(duration=30)
 async def stream_decode_audio(encoded_file_path) -> Generator[tuple, None, None]:
     global cancel_stream
             sample_rate = int.from_bytes(temp_file.read(4), byteorder='little')
             compressed_data = temp_file.read()
             tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
+            tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.int64).reshape(-1)
             # Move the tensor to the same device as the model
             tokens = torch.from_numpy(tokens_numpy).to(device=semanticodec.device)
         print(f"Model device: {semanticodec.device}")
         # Decode the audio in chunks
+        chunk_size = sample_rate * 2  # Adjust chunk size as needed
         with torch.no_grad():
+            for i in range(0, tokens.shape[0], chunk_size):
                 if cancel_stream:
                     break  # Exit the loop if cancellation is requested
+                tokens_chunk = tokens[i:i+chunk_size]
                 audio_chunk = semanticodec.decode(tokens_chunk)
                 # Convert to numpy array and transpose
                 audio_data = audio_chunk.squeeze(0).cpu().numpy().T
         yield (sample_rate, np.zeros((chunk_size, 1), dtype=np.float32))  # Return silence
     finally:
+        cancel_stream = False
 # Gradio Interface
 with gr.Blocks() as demo:
     gr.Markdown("## Audio Compression with SemantiCodec (GPU/CPU)")
     with gr.Tab("Encode"):
+        input_audio = gr.Audio(label="Input Audio", type="filepath")
         encode_button = gr.Button("Encode")
         cancel_encode_button = gr.Button("Cancel")
+        encoded_output = gr.File(label="Encoded File (.owie)", type="filepath")
         encode_error_message = gr.Markdown(visible=False)
         def encode_wrapper(audio):
             inputs=input_audio,
             outputs=[encoded_output, encode_error_message]
         )
+        cancel_encode_button.click(lambda: globals().update(cancel_encode=True), outputs=None)
     with gr.Tab("Decode"):
+        input_encoded = gr.File(label="Encoded File (.owie)", type="filepath")
         decode_button = gr.Button("Decode")
         cancel_decode_button = gr.Button("Cancel")
+        decoded_output = gr.Audio(label="Decoded Audio", type="filepath")
         decode_button.click(decode_audio, inputs=input_encoded, outputs=decoded_output)
+        cancel_decode_button.click(lambda: globals().update(cancel_decode=True), outputs=None)
     with gr.Tab("Streaming"):
+        input_encoded_stream = gr.File(label="Encoded File (.owie)", type="filepath")
         stream_button = gr.Button("Start Streaming")
         cancel_stream_button = gr.Button("Cancel")
         audio_output = gr.Audio(label="Streaming Audio Output", streaming=True)
         stream_button.click(stream_decode_audio, inputs=input_encoded_stream, outputs=audio_output)
+        cancel_stream_button.click(lambda: globals().update(cancel_stream=True), outputs=None)
 demo.queue().launch()