Spaces:

owiedotch
/

dac

Sleeping

App Files Files Community

owiedotch commited on Aug 26

Commit

c707064

•

1 Parent(s): 7b5adeb

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -9

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ import lz4.frame
 import os
 from typing import Generator
 import asyncio  # Import asyncio for cancellation
 # Attempt to use GPU, fallback to CPU
 try:
@@ -106,13 +107,8 @@ def decode_audio(encoded_file_path):
             tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
             tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.int64)
-            # Check and reshape tokens
-            if tokens_numpy.ndim == 1:
-                tokens_numpy = tokens_numpy.reshape(1, -1)  # Reshape to [1, token_length]
-            elif tokens_numpy.ndim == 2:
-                tokens_numpy = tokens_numpy.reshape(1, tokens_numpy.shape[1])  # Ensure 2D tensor
-            else:
-                raise ValueError("Tokens array must be 1D or 2D")
             tokens = torch.from_numpy(tokens_numpy).to(torch_device)
@@ -130,6 +126,7 @@ def decode_audio(encoded_file_path):
     except Exception as e:
         print(f"Decoding error: {e}")
         return str(e)  # Return error message as string
     finally:
@@ -146,16 +143,19 @@ async def stream_decode_audio(encoded_file_path) -> Generator[tuple, None, None]
             compressed_data = temp_file.read()
             tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
             tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.int64)
             tokens = torch.from_numpy(tokens_numpy).to(torch_device)
         # Decode the audio in chunks
-        chunk_size = sample_rate  # Use the stored sample rate as chunk size
         with torch.no_grad():
             for i in range(0, tokens.shape[1], chunk_size):
                 if cancel_stream:
                     break  # Exit the loop if cancellation is requested
-                tokens_chunk = tokens[:, i:i+chunk_size]
                 audio_chunk = semanticodec.decode(tokens_chunk)
                 # Convert to numpy array and transpose
                 audio_data = audio_chunk.squeeze(0).cpu().numpy().T
@@ -164,6 +164,7 @@ async def stream_decode_audio(encoded_file_path) -> Generator[tuple, None, None]
     except Exception as e:
         print(f"Streaming decoding error: {e}")
         yield (sample_rate, np.zeros((chunk_size, 1), dtype=np.float32))  # Return silence
     finally:

 import os
 from typing import Generator
 import asyncio  # Import asyncio for cancellation
+import traceback  # Import traceback for error handling
 # Attempt to use GPU, fallback to CPU
 try:
             tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
             tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.int64)
+            # Reshape tokens to match the original shape
+            tokens_numpy = tokens_numpy.reshape(1, -1, 2)
             tokens = torch.from_numpy(tokens_numpy).to(torch_device)
     except Exception as e:
         print(f"Decoding error: {e}")
+        print(f"Traceback: {traceback.format_exc()}")
         return str(e)  # Return error message as string
     finally:
             compressed_data = temp_file.read()
             tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
             tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.int64)
+            tokens_numpy = tokens_numpy.reshape(1, -1, 2)
             tokens = torch.from_numpy(tokens_numpy).to(torch_device)
+        print(f"Streaming tokens shape: {tokens.shape}, dtype: {tokens.dtype}")
         # Decode the audio in chunks
+        chunk_size = sample_rate // 2  # Adjust chunk size to account for the new shape
         with torch.no_grad():
             for i in range(0, tokens.shape[1], chunk_size):
                 if cancel_stream:
                     break  # Exit the loop if cancellation is requested
+                tokens_chunk = tokens[:, i:i+chunk_size, :]
                 audio_chunk = semanticodec.decode(tokens_chunk)
                 # Convert to numpy array and transpose
                 audio_data = audio_chunk.squeeze(0).cpu().numpy().T
     except Exception as e:
         print(f"Streaming decoding error: {e}")
+        print(f"Traceback: {traceback.format_exc()}")
         yield (sample_rate, np.zeros((chunk_size, 1), dtype=np.float32))  # Return silence
     finally: