Update app.py
Browse files
app.py
CHANGED
@@ -33,14 +33,15 @@ def encode_audio(audio_file_path):
|
|
33 |
# Load the audio file
|
34 |
waveform, sample_rate = torchaudio.load(audio_file_path)
|
35 |
|
|
|
|
|
|
|
|
|
36 |
# Encode the audio
|
37 |
-
audio = waveform.
|
38 |
with torch.no_grad():
|
39 |
tokens = semanticodec.encode(audio)
|
40 |
|
41 |
-
# Debugging print statement
|
42 |
-
print(f"Tokens shape: {tokens.shape}, dtype: {tokens.dtype}")
|
43 |
-
|
44 |
# Convert to NumPy and save to a temporary .owie file
|
45 |
tokens_numpy = tokens.detach().cpu().numpy()
|
46 |
temp_fd, temp_file_path = tempfile.mkstemp(suffix=".owie")
|
@@ -70,9 +71,12 @@ def decode_audio(encoded_file_path):
|
|
70 |
sample_rate = int.from_bytes(temp_file.read(4), byteorder='little')
|
71 |
compressed_data = temp_file.read()
|
72 |
tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
|
73 |
-
tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.
|
74 |
tokens = torch.from_numpy(tokens_numpy).to(torch_device)
|
75 |
|
|
|
|
|
|
|
76 |
# Decode the audio
|
77 |
with torch.no_grad():
|
78 |
waveform = semanticodec.decode(tokens)
|
@@ -98,9 +102,12 @@ async def stream_decode_audio(encoded_file_path) -> Generator[tuple, None, None]
|
|
98 |
sample_rate = int.from_bytes(temp_file.read(4), byteorder='little')
|
99 |
compressed_data = temp_file.read()
|
100 |
tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
|
101 |
-
tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.
|
102 |
tokens = torch.from_numpy(tokens_numpy).to(torch_device)
|
103 |
|
|
|
|
|
|
|
104 |
# Decode the audio in chunks
|
105 |
chunk_size = sample_rate # Use the stored sample rate as chunk size
|
106 |
with torch.no_grad():
|
|
|
33 |
# Load the audio file
|
34 |
waveform, sample_rate = torchaudio.load(audio_file_path)
|
35 |
|
36 |
+
# Ensure waveform has the right dimensions
|
37 |
+
if waveform.ndim == 1:
|
38 |
+
waveform = waveform.unsqueeze(0)
|
39 |
+
|
40 |
# Encode the audio
|
41 |
+
audio = waveform.to(torch_device)
|
42 |
with torch.no_grad():
|
43 |
tokens = semanticodec.encode(audio)
|
44 |
|
|
|
|
|
|
|
45 |
# Convert to NumPy and save to a temporary .owie file
|
46 |
tokens_numpy = tokens.detach().cpu().numpy()
|
47 |
temp_fd, temp_file_path = tempfile.mkstemp(suffix=".owie")
|
|
|
71 |
sample_rate = int.from_bytes(temp_file.read(4), byteorder='little')
|
72 |
compressed_data = temp_file.read()
|
73 |
tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
|
74 |
+
tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.float32) # Updated to np.float32
|
75 |
tokens = torch.from_numpy(tokens_numpy).to(torch_device)
|
76 |
|
77 |
+
# Ensure tokens has the right dimensions
|
78 |
+
tokens = tokens.unsqueeze(0) if tokens.ndimension() == 1 else tokens
|
79 |
+
|
80 |
# Decode the audio
|
81 |
with torch.no_grad():
|
82 |
waveform = semanticodec.decode(tokens)
|
|
|
102 |
sample_rate = int.from_bytes(temp_file.read(4), byteorder='little')
|
103 |
compressed_data = temp_file.read()
|
104 |
tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
|
105 |
+
tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.float32) # Updated to np.float32
|
106 |
tokens = torch.from_numpy(tokens_numpy).to(torch_device)
|
107 |
|
108 |
+
# Ensure tokens has the right dimensions
|
109 |
+
tokens = tokens.unsqueeze(0) if tokens.ndimension() == 1 else tokens
|
110 |
+
|
111 |
# Decode the audio in chunks
|
112 |
chunk_size = sample_rate # Use the stored sample rate as chunk size
|
113 |
with torch.no_grad():
|