owiedotch commited on
Commit
c707064
1 Parent(s): 7b5adeb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -9
app.py CHANGED
@@ -9,6 +9,7 @@ import lz4.frame
9
  import os
10
  from typing import Generator
11
  import asyncio # Import asyncio for cancellation
 
12
 
13
  # Attempt to use GPU, fallback to CPU
14
  try:
@@ -106,13 +107,8 @@ def decode_audio(encoded_file_path):
106
  tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
107
  tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.int64)
108
 
109
- # Check and reshape tokens
110
- if tokens_numpy.ndim == 1:
111
- tokens_numpy = tokens_numpy.reshape(1, -1) # Reshape to [1, token_length]
112
- elif tokens_numpy.ndim == 2:
113
- tokens_numpy = tokens_numpy.reshape(1, tokens_numpy.shape[1]) # Ensure 2D tensor
114
- else:
115
- raise ValueError("Tokens array must be 1D or 2D")
116
 
117
  tokens = torch.from_numpy(tokens_numpy).to(torch_device)
118
 
@@ -130,6 +126,7 @@ def decode_audio(encoded_file_path):
130
 
131
  except Exception as e:
132
  print(f"Decoding error: {e}")
 
133
  return str(e) # Return error message as string
134
 
135
  finally:
@@ -146,16 +143,19 @@ async def stream_decode_audio(encoded_file_path) -> Generator[tuple, None, None]
146
  compressed_data = temp_file.read()
147
  tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
148
  tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.int64)
 
149
  tokens = torch.from_numpy(tokens_numpy).to(torch_device)
150
 
 
 
151
  # Decode the audio in chunks
152
- chunk_size = sample_rate # Use the stored sample rate as chunk size
153
  with torch.no_grad():
154
  for i in range(0, tokens.shape[1], chunk_size):
155
  if cancel_stream:
156
  break # Exit the loop if cancellation is requested
157
 
158
- tokens_chunk = tokens[:, i:i+chunk_size]
159
  audio_chunk = semanticodec.decode(tokens_chunk)
160
  # Convert to numpy array and transpose
161
  audio_data = audio_chunk.squeeze(0).cpu().numpy().T
@@ -164,6 +164,7 @@ async def stream_decode_audio(encoded_file_path) -> Generator[tuple, None, None]
164
 
165
  except Exception as e:
166
  print(f"Streaming decoding error: {e}")
 
167
  yield (sample_rate, np.zeros((chunk_size, 1), dtype=np.float32)) # Return silence
168
 
169
  finally:
 
9
  import os
10
  from typing import Generator
11
  import asyncio # Import asyncio for cancellation
12
+ import traceback # Import traceback for error handling
13
 
14
  # Attempt to use GPU, fallback to CPU
15
  try:
 
107
  tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
108
  tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.int64)
109
 
110
+ # Reshape tokens to match the original shape
111
+ tokens_numpy = tokens_numpy.reshape(1, -1, 2)
 
 
 
 
 
112
 
113
  tokens = torch.from_numpy(tokens_numpy).to(torch_device)
114
 
 
126
 
127
  except Exception as e:
128
  print(f"Decoding error: {e}")
129
+ print(f"Traceback: {traceback.format_exc()}")
130
  return str(e) # Return error message as string
131
 
132
  finally:
 
143
  compressed_data = temp_file.read()
144
  tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
145
  tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.int64)
146
+ tokens_numpy = tokens_numpy.reshape(1, -1, 2)
147
  tokens = torch.from_numpy(tokens_numpy).to(torch_device)
148
 
149
+ print(f"Streaming tokens shape: {tokens.shape}, dtype: {tokens.dtype}")
150
+
151
  # Decode the audio in chunks
152
+ chunk_size = sample_rate // 2 # Adjust chunk size to account for the new shape
153
  with torch.no_grad():
154
  for i in range(0, tokens.shape[1], chunk_size):
155
  if cancel_stream:
156
  break # Exit the loop if cancellation is requested
157
 
158
+ tokens_chunk = tokens[:, i:i+chunk_size, :]
159
  audio_chunk = semanticodec.decode(tokens_chunk)
160
  # Convert to numpy array and transpose
161
  audio_data = audio_chunk.squeeze(0).cpu().numpy().T
 
164
 
165
  except Exception as e:
166
  print(f"Streaming decoding error: {e}")
167
+ print(f"Traceback: {traceback.format_exc()}")
168
  yield (sample_rate, np.zeros((chunk_size, 1), dtype=np.float32)) # Return silence
169
 
170
  finally: