Update app.py
Browse files
app.py
CHANGED
@@ -62,6 +62,10 @@ def encode_audio(audio_file_path):
|
|
62 |
with open(temp_file_path, 'wb') as temp_file:
|
63 |
# Write sample rate
|
64 |
temp_file.write(sample_rate.to_bytes(4, byteorder='little'))
|
|
|
|
|
|
|
|
|
65 |
# Compress and write the tokens data
|
66 |
compressed_data = lz4.frame.compress(tokens_numpy.tobytes())
|
67 |
temp_file.write(compressed_data)
|
@@ -91,9 +95,11 @@ def decode_audio(encoded_file_path):
|
|
91 |
# Load encoded data and sample rate
|
92 |
with open(encoded_file_path, 'rb') as temp_file:
|
93 |
sample_rate = int.from_bytes(temp_file.read(4), byteorder='little')
|
|
|
|
|
94 |
compressed_data = temp_file.read()
|
95 |
tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
|
96 |
-
tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.int64).reshape(
|
97 |
|
98 |
# Move the tensor to the same device as the model
|
99 |
tokens = torch.from_numpy(tokens_numpy).to(device=semanticodec.device)
|
@@ -126,9 +132,11 @@ async def stream_decode_audio(encoded_file_path) -> Generator[tuple, None, None]
|
|
126 |
# Load encoded data and sample rate from the .owie file
|
127 |
with open(encoded_file_path, 'rb') as temp_file:
|
128 |
sample_rate = int.from_bytes(temp_file.read(4), byteorder='little')
|
|
|
|
|
129 |
compressed_data = temp_file.read()
|
130 |
tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
|
131 |
-
tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.int64).reshape(
|
132 |
|
133 |
# Move the tensor to the same device as the model
|
134 |
tokens = torch.from_numpy(tokens_numpy).to(device=semanticodec.device)
|
@@ -137,13 +145,13 @@ async def stream_decode_audio(encoded_file_path) -> Generator[tuple, None, None]
|
|
137 |
print(f"Model device: {semanticodec.device}")
|
138 |
|
139 |
# Decode the audio in chunks
|
140 |
-
chunk_size = sample_rate
|
141 |
with torch.no_grad():
|
142 |
-
for i in range(0, tokens.shape[
|
143 |
if cancel_stream:
|
144 |
break # Exit the loop if cancellation is requested
|
145 |
|
146 |
-
tokens_chunk = tokens[i:i+chunk_size]
|
147 |
audio_chunk = semanticodec.decode(tokens_chunk)
|
148 |
# Convert to numpy array and transpose
|
149 |
audio_data = audio_chunk.squeeze(0).cpu().numpy().T
|
|
|
62 |
with open(temp_file_path, 'wb') as temp_file:
|
63 |
# Write sample rate
|
64 |
temp_file.write(sample_rate.to_bytes(4, byteorder='little'))
|
65 |
+
# Write shape information
|
66 |
+
temp_file.write(len(tokens_numpy.shape).to_bytes(4, byteorder='little'))
|
67 |
+
for dim in tokens_numpy.shape:
|
68 |
+
temp_file.write(dim.to_bytes(4, byteorder='little'))
|
69 |
# Compress and write the tokens data
|
70 |
compressed_data = lz4.frame.compress(tokens_numpy.tobytes())
|
71 |
temp_file.write(compressed_data)
|
|
|
95 |
# Load encoded data and sample rate
|
96 |
with open(encoded_file_path, 'rb') as temp_file:
|
97 |
sample_rate = int.from_bytes(temp_file.read(4), byteorder='little')
|
98 |
+
ndim = int.from_bytes(temp_file.read(4), byteorder='little')
|
99 |
+
shape = tuple(int.from_bytes(temp_file.read(4), byteorder='little') for _ in range(ndim))
|
100 |
compressed_data = temp_file.read()
|
101 |
tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
|
102 |
+
tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.int64).reshape(shape)
|
103 |
|
104 |
# Move the tensor to the same device as the model
|
105 |
tokens = torch.from_numpy(tokens_numpy).to(device=semanticodec.device)
|
|
|
132 |
# Load encoded data and sample rate from the .owie file
|
133 |
with open(encoded_file_path, 'rb') as temp_file:
|
134 |
sample_rate = int.from_bytes(temp_file.read(4), byteorder='little')
|
135 |
+
ndim = int.from_bytes(temp_file.read(4), byteorder='little')
|
136 |
+
shape = tuple(int.from_bytes(temp_file.read(4), byteorder='little') for _ in range(ndim))
|
137 |
compressed_data = temp_file.read()
|
138 |
tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
|
139 |
+
tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.int64).reshape(shape)
|
140 |
|
141 |
# Move the tensor to the same device as the model
|
142 |
tokens = torch.from_numpy(tokens_numpy).to(device=semanticodec.device)
|
|
|
145 |
print(f"Model device: {semanticodec.device}")
|
146 |
|
147 |
# Decode the audio in chunks
|
148 |
+
chunk_size = sample_rate // 2 # Adjust chunk size to account for the new shape
|
149 |
with torch.no_grad():
|
150 |
+
for i in range(0, tokens.shape[1], chunk_size):
|
151 |
if cancel_stream:
|
152 |
break # Exit the loop if cancellation is requested
|
153 |
|
154 |
+
tokens_chunk = tokens[:, i:i+chunk_size, :]
|
155 |
audio_chunk = semanticodec.decode(tokens_chunk)
|
156 |
# Convert to numpy array and transpose
|
157 |
audio_data = audio_chunk.squeeze(0).cpu().numpy().T
|