Update app.py
Browse files
app.py
CHANGED
@@ -110,10 +110,15 @@ def decode_audio(encoded_file_path):
|
|
110 |
# Reshape tokens to match the original shape
|
111 |
tokens_numpy = tokens_numpy.reshape(1, -1, 2)
|
112 |
|
113 |
-
|
|
|
114 |
|
115 |
-
|
116 |
-
|
|
|
|
|
|
|
|
|
117 |
|
118 |
# Decode the audio
|
119 |
with torch.no_grad():
|
@@ -144,9 +149,15 @@ async def stream_decode_audio(encoded_file_path) -> Generator[tuple, None, None]
|
|
144 |
tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
|
145 |
tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.int64)
|
146 |
tokens_numpy = tokens_numpy.reshape(1, -1, 2)
|
147 |
-
|
148 |
-
|
149 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
|
151 |
# Decode the audio in chunks
|
152 |
chunk_size = sample_rate // 2 # Adjust chunk size to account for the new shape
|
|
|
110 |
# Reshape tokens to match the original shape
|
111 |
tokens_numpy = tokens_numpy.reshape(1, -1, 2)
|
112 |
|
113 |
+
# Create a writable copy of the numpy array
|
114 |
+
tokens_numpy = np.array(tokens_numpy, copy=True)
|
115 |
|
116 |
+
# Move the tensor to the same device as the model
|
117 |
+
tokens = torch.from_numpy(tokens_numpy).to(device=semanticodec.device)
|
118 |
+
|
119 |
+
# Debugging prints to check tensor shapes and device
|
120 |
+
print(f"Tokens shape: {tokens.shape}, dtype: {tokens.dtype}, device: {tokens.device}")
|
121 |
+
print(f"Model device: {semanticodec.device}")
|
122 |
|
123 |
# Decode the audio
|
124 |
with torch.no_grad():
|
|
|
149 |
tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
|
150 |
tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.int64)
|
151 |
tokens_numpy = tokens_numpy.reshape(1, -1, 2)
|
152 |
+
|
153 |
+
# Create a writable copy of the numpy array
|
154 |
+
tokens_numpy = np.array(tokens_numpy, copy=True)
|
155 |
+
|
156 |
+
# Move the tensor to the same device as the model
|
157 |
+
tokens = torch.from_numpy(tokens_numpy).to(device=semanticodec.device)
|
158 |
+
|
159 |
+
print(f"Streaming tokens shape: {tokens.shape}, dtype: {tokens.dtype}, device: {tokens.device}")
|
160 |
+
print(f"Model device: {semanticodec.device}")
|
161 |
|
162 |
# Decode the audio in chunks
|
163 |
chunk_size = sample_rate // 2 # Adjust chunk size to account for the new shape
|