Update app.py
Browse files
app.py
CHANGED
@@ -9,6 +9,7 @@ import lz4.frame
|
|
9 |
import os
|
10 |
from typing import Generator
|
11 |
import asyncio # Import asyncio for cancellation
|
|
|
12 |
|
13 |
# Attempt to use GPU, fallback to CPU
|
14 |
try:
|
@@ -106,13 +107,8 @@ def decode_audio(encoded_file_path):
|
|
106 |
tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
|
107 |
tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.int64)
|
108 |
|
109 |
-
#
|
110 |
-
|
111 |
-
tokens_numpy = tokens_numpy.reshape(1, -1) # Reshape to [1, token_length]
|
112 |
-
elif tokens_numpy.ndim == 2:
|
113 |
-
tokens_numpy = tokens_numpy.reshape(1, tokens_numpy.shape[1]) # Ensure 2D tensor
|
114 |
-
else:
|
115 |
-
raise ValueError("Tokens array must be 1D or 2D")
|
116 |
|
117 |
tokens = torch.from_numpy(tokens_numpy).to(torch_device)
|
118 |
|
@@ -130,6 +126,7 @@ def decode_audio(encoded_file_path):
|
|
130 |
|
131 |
except Exception as e:
|
132 |
print(f"Decoding error: {e}")
|
|
|
133 |
return str(e) # Return error message as string
|
134 |
|
135 |
finally:
|
@@ -146,16 +143,19 @@ async def stream_decode_audio(encoded_file_path) -> Generator[tuple, None, None]
|
|
146 |
compressed_data = temp_file.read()
|
147 |
tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
|
148 |
tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.int64)
|
|
|
149 |
tokens = torch.from_numpy(tokens_numpy).to(torch_device)
|
150 |
|
|
|
|
|
151 |
# Decode the audio in chunks
|
152 |
-
chunk_size = sample_rate #
|
153 |
with torch.no_grad():
|
154 |
for i in range(0, tokens.shape[1], chunk_size):
|
155 |
if cancel_stream:
|
156 |
break # Exit the loop if cancellation is requested
|
157 |
|
158 |
-
tokens_chunk = tokens[:, i:i+chunk_size]
|
159 |
audio_chunk = semanticodec.decode(tokens_chunk)
|
160 |
# Convert to numpy array and transpose
|
161 |
audio_data = audio_chunk.squeeze(0).cpu().numpy().T
|
@@ -164,6 +164,7 @@ async def stream_decode_audio(encoded_file_path) -> Generator[tuple, None, None]
|
|
164 |
|
165 |
except Exception as e:
|
166 |
print(f"Streaming decoding error: {e}")
|
|
|
167 |
yield (sample_rate, np.zeros((chunk_size, 1), dtype=np.float32)) # Return silence
|
168 |
|
169 |
finally:
|
|
|
9 |
import os
|
10 |
from typing import Generator
|
11 |
import asyncio # Import asyncio for cancellation
|
12 |
+
import traceback # Import traceback for error handling
|
13 |
|
14 |
# Attempt to use GPU, fallback to CPU
|
15 |
try:
|
|
|
107 |
tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
|
108 |
tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.int64)
|
109 |
|
110 |
+
# Reshape tokens to match the original shape
|
111 |
+
tokens_numpy = tokens_numpy.reshape(1, -1, 2)
|
|
|
|
|
|
|
|
|
|
|
112 |
|
113 |
tokens = torch.from_numpy(tokens_numpy).to(torch_device)
|
114 |
|
|
|
126 |
|
127 |
except Exception as e:
|
128 |
print(f"Decoding error: {e}")
|
129 |
+
print(f"Traceback: {traceback.format_exc()}")
|
130 |
return str(e) # Return error message as string
|
131 |
|
132 |
finally:
|
|
|
143 |
compressed_data = temp_file.read()
|
144 |
tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
|
145 |
tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.int64)
|
146 |
+
tokens_numpy = tokens_numpy.reshape(1, -1, 2)
|
147 |
tokens = torch.from_numpy(tokens_numpy).to(torch_device)
|
148 |
|
149 |
+
print(f"Streaming tokens shape: {tokens.shape}, dtype: {tokens.dtype}")
|
150 |
+
|
151 |
# Decode the audio in chunks
|
152 |
+
chunk_size = sample_rate // 2 # Adjust chunk size to account for the new shape
|
153 |
with torch.no_grad():
|
154 |
for i in range(0, tokens.shape[1], chunk_size):
|
155 |
if cancel_stream:
|
156 |
break # Exit the loop if cancellation is requested
|
157 |
|
158 |
+
tokens_chunk = tokens[:, i:i+chunk_size, :]
|
159 |
audio_chunk = semanticodec.decode(tokens_chunk)
|
160 |
# Convert to numpy array and transpose
|
161 |
audio_data = audio_chunk.squeeze(0).cpu().numpy().T
|
|
|
164 |
|
165 |
except Exception as e:
|
166 |
print(f"Streaming decoding error: {e}")
|
167 |
+
print(f"Traceback: {traceback.format_exc()}")
|
168 |
yield (sample_rate, np.zeros((chunk_size, 1), dtype=np.float32)) # Return silence
|
169 |
|
170 |
finally:
|