Spaces:

owiedotch
/

dac

Sleeping

owiedotch commited on Aug 26

Commit

ff92e4e

•

1 Parent(s): c707064

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -110,10 +110,15 @@ def decode_audio(encoded_file_path):
             # Reshape tokens to match the original shape
             tokens_numpy = tokens_numpy.reshape(1, -1, 2)
-            tokens = torch.from_numpy(tokens_numpy).to(torch_device)
-        # Debugging prints to check tensor shapes
-        print(f"Tokens shape: {tokens.shape}, dtype: {tokens.dtype}")
         # Decode the audio
         with torch.no_grad():
@@ -144,9 +149,15 @@ async def stream_decode_audio(encoded_file_path) -> Generator[tuple, None, None]
             tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
             tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.int64)
             tokens_numpy = tokens_numpy.reshape(1, -1, 2)
-            tokens = torch.from_numpy(tokens_numpy).to(torch_device)
-        print(f"Streaming tokens shape: {tokens.shape}, dtype: {tokens.dtype}")
         # Decode the audio in chunks
         chunk_size = sample_rate // 2  # Adjust chunk size to account for the new shape

             # Reshape tokens to match the original shape
             tokens_numpy = tokens_numpy.reshape(1, -1, 2)
+            # Create a writable copy of the numpy array
+            tokens_numpy = np.array(tokens_numpy, copy=True)
+            # Move the tensor to the same device as the model
+            tokens = torch.from_numpy(tokens_numpy).to(device=semanticodec.device)
+        # Debugging prints to check tensor shapes and device
+        print(f"Tokens shape: {tokens.shape}, dtype: {tokens.dtype}, device: {tokens.device}")
+        print(f"Model device: {semanticodec.device}")
         # Decode the audio
         with torch.no_grad():
             tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
             tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.int64)
             tokens_numpy = tokens_numpy.reshape(1, -1, 2)
+            # Create a writable copy of the numpy array
+            tokens_numpy = np.array(tokens_numpy, copy=True)
+            # Move the tensor to the same device as the model
+            tokens = torch.from_numpy(tokens_numpy).to(device=semanticodec.device)
+        print(f"Streaming tokens shape: {tokens.shape}, dtype: {tokens.dtype}, device: {tokens.device}")
+        print(f"Model device: {semanticodec.device}")
         # Decode the audio in chunks
         chunk_size = sample_rate // 2  # Adjust chunk size to account for the new shape