Spaces:

owiedotch
/

dac

Sleeping

App Files Files Community

owiedotch commited on Aug 26

Commit

47c28f2

•

1 Parent(s): 44da512

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -6

app.py CHANGED Viewed

@@ -33,14 +33,15 @@ def encode_audio(audio_file_path):
         # Load the audio file
         waveform, sample_rate = torchaudio.load(audio_file_path)
         # Encode the audio
-        audio = waveform.unsqueeze(0).to(torch_device)
         with torch.no_grad():
             tokens = semanticodec.encode(audio)
-        # Debugging print statement
-        print(f"Tokens shape: {tokens.shape}, dtype: {tokens.dtype}")
         # Convert to NumPy and save to a temporary .owie file
         tokens_numpy = tokens.detach().cpu().numpy()
         temp_fd, temp_file_path = tempfile.mkstemp(suffix=".owie")
@@ -70,9 +71,12 @@ def decode_audio(encoded_file_path):
             sample_rate = int.from_bytes(temp_file.read(4), byteorder='little')
             compressed_data = temp_file.read()
             tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
-            tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.int64)
             tokens = torch.from_numpy(tokens_numpy).to(torch_device)
         # Decode the audio
         with torch.no_grad():
             waveform = semanticodec.decode(tokens)
@@ -98,9 +102,12 @@ async def stream_decode_audio(encoded_file_path) -> Generator[tuple, None, None]
             sample_rate = int.from_bytes(temp_file.read(4), byteorder='little')
             compressed_data = temp_file.read()
             tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
-            tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.int64)
             tokens = torch.from_numpy(tokens_numpy).to(torch_device)
         # Decode the audio in chunks
         chunk_size = sample_rate  # Use the stored sample rate as chunk size
         with torch.no_grad():

         # Load the audio file
         waveform, sample_rate = torchaudio.load(audio_file_path)
+        # Ensure waveform has the right dimensions
+        if waveform.ndim == 1:
+            waveform = waveform.unsqueeze(0)
         # Encode the audio
+        audio = waveform.to(torch_device)
         with torch.no_grad():
             tokens = semanticodec.encode(audio)
         # Convert to NumPy and save to a temporary .owie file
         tokens_numpy = tokens.detach().cpu().numpy()
         temp_fd, temp_file_path = tempfile.mkstemp(suffix=".owie")
             sample_rate = int.from_bytes(temp_file.read(4), byteorder='little')
             compressed_data = temp_file.read()
             tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
+            tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.float32)  # Updated to np.float32
             tokens = torch.from_numpy(tokens_numpy).to(torch_device)
+        # Ensure tokens has the right dimensions
+        tokens = tokens.unsqueeze(0) if tokens.ndimension() == 1 else tokens
         # Decode the audio
         with torch.no_grad():
             waveform = semanticodec.decode(tokens)
             sample_rate = int.from_bytes(temp_file.read(4), byteorder='little')
             compressed_data = temp_file.read()
             tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
+            tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.float32)  # Updated to np.float32
             tokens = torch.from_numpy(tokens_numpy).to(torch_device)
+        # Ensure tokens has the right dimensions
+        tokens = tokens.unsqueeze(0) if tokens.ndimension() == 1 else tokens
         # Decode the audio in chunks
         chunk_size = sample_rate  # Use the stored sample rate as chunk size
         with torch.no_grad():