owiedotch commited on
Commit
fea8244
1 Parent(s): cefd33c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -5
app.py CHANGED
@@ -62,6 +62,10 @@ def encode_audio(audio_file_path):
62
  with open(temp_file_path, 'wb') as temp_file:
63
  # Write sample rate
64
  temp_file.write(sample_rate.to_bytes(4, byteorder='little'))
 
 
 
 
65
  # Compress and write the tokens data
66
  compressed_data = lz4.frame.compress(tokens_numpy.tobytes())
67
  temp_file.write(compressed_data)
@@ -91,9 +95,11 @@ def decode_audio(encoded_file_path):
91
  # Load encoded data and sample rate
92
  with open(encoded_file_path, 'rb') as temp_file:
93
  sample_rate = int.from_bytes(temp_file.read(4), byteorder='little')
 
 
94
  compressed_data = temp_file.read()
95
  tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
96
- tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.int64).reshape(-1)
97
 
98
  # Move the tensor to the same device as the model
99
  tokens = torch.from_numpy(tokens_numpy).to(device=semanticodec.device)
@@ -126,9 +132,11 @@ async def stream_decode_audio(encoded_file_path) -> Generator[tuple, None, None]
126
  # Load encoded data and sample rate from the .owie file
127
  with open(encoded_file_path, 'rb') as temp_file:
128
  sample_rate = int.from_bytes(temp_file.read(4), byteorder='little')
 
 
129
  compressed_data = temp_file.read()
130
  tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
131
- tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.int64).reshape(-1)
132
 
133
  # Move the tensor to the same device as the model
134
  tokens = torch.from_numpy(tokens_numpy).to(device=semanticodec.device)
@@ -137,13 +145,13 @@ async def stream_decode_audio(encoded_file_path) -> Generator[tuple, None, None]
137
  print(f"Model device: {semanticodec.device}")
138
 
139
  # Decode the audio in chunks
140
- chunk_size = sample_rate * 2 # Adjust chunk size as needed
141
  with torch.no_grad():
142
- for i in range(0, tokens.shape[0], chunk_size):
143
  if cancel_stream:
144
  break # Exit the loop if cancellation is requested
145
 
146
- tokens_chunk = tokens[i:i+chunk_size]
147
  audio_chunk = semanticodec.decode(tokens_chunk)
148
  # Convert to numpy array and transpose
149
  audio_data = audio_chunk.squeeze(0).cpu().numpy().T
 
62
  with open(temp_file_path, 'wb') as temp_file:
63
  # Write sample rate
64
  temp_file.write(sample_rate.to_bytes(4, byteorder='little'))
65
+ # Write shape information
66
+ temp_file.write(len(tokens_numpy.shape).to_bytes(4, byteorder='little'))
67
+ for dim in tokens_numpy.shape:
68
+ temp_file.write(dim.to_bytes(4, byteorder='little'))
69
  # Compress and write the tokens data
70
  compressed_data = lz4.frame.compress(tokens_numpy.tobytes())
71
  temp_file.write(compressed_data)
 
95
  # Load encoded data and sample rate
96
  with open(encoded_file_path, 'rb') as temp_file:
97
  sample_rate = int.from_bytes(temp_file.read(4), byteorder='little')
98
+ ndim = int.from_bytes(temp_file.read(4), byteorder='little')
99
+ shape = tuple(int.from_bytes(temp_file.read(4), byteorder='little') for _ in range(ndim))
100
  compressed_data = temp_file.read()
101
  tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
102
+ tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.int64).reshape(shape)
103
 
104
  # Move the tensor to the same device as the model
105
  tokens = torch.from_numpy(tokens_numpy).to(device=semanticodec.device)
 
132
  # Load encoded data and sample rate from the .owie file
133
  with open(encoded_file_path, 'rb') as temp_file:
134
  sample_rate = int.from_bytes(temp_file.read(4), byteorder='little')
135
+ ndim = int.from_bytes(temp_file.read(4), byteorder='little')
136
+ shape = tuple(int.from_bytes(temp_file.read(4), byteorder='little') for _ in range(ndim))
137
  compressed_data = temp_file.read()
138
  tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
139
+ tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.int64).reshape(shape)
140
 
141
  # Move the tensor to the same device as the model
142
  tokens = torch.from_numpy(tokens_numpy).to(device=semanticodec.device)
 
145
  print(f"Model device: {semanticodec.device}")
146
 
147
  # Decode the audio in chunks
148
+ chunk_size = sample_rate // 2 # Adjust chunk size to account for the new shape
149
  with torch.no_grad():
150
+ for i in range(0, tokens.shape[1], chunk_size):
151
  if cancel_stream:
152
  break # Exit the loop if cancellation is requested
153
 
154
+ tokens_chunk = tokens[:, i:i+chunk_size, :]
155
  audio_chunk = semanticodec.decode(tokens_chunk)
156
  # Convert to numpy array and transpose
157
  audio_data = audio_chunk.squeeze(0).cpu().numpy().T