owiedotch commited on
Commit
47c28f2
1 Parent(s): 44da512

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -6
app.py CHANGED
@@ -33,14 +33,15 @@ def encode_audio(audio_file_path):
33
  # Load the audio file
34
  waveform, sample_rate = torchaudio.load(audio_file_path)
35
 
 
 
 
 
36
  # Encode the audio
37
- audio = waveform.unsqueeze(0).to(torch_device)
38
  with torch.no_grad():
39
  tokens = semanticodec.encode(audio)
40
 
41
- # Debugging print statement
42
- print(f"Tokens shape: {tokens.shape}, dtype: {tokens.dtype}")
43
-
44
  # Convert to NumPy and save to a temporary .owie file
45
  tokens_numpy = tokens.detach().cpu().numpy()
46
  temp_fd, temp_file_path = tempfile.mkstemp(suffix=".owie")
@@ -70,9 +71,12 @@ def decode_audio(encoded_file_path):
70
  sample_rate = int.from_bytes(temp_file.read(4), byteorder='little')
71
  compressed_data = temp_file.read()
72
  tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
73
- tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.int64)
74
  tokens = torch.from_numpy(tokens_numpy).to(torch_device)
75
 
 
 
 
76
  # Decode the audio
77
  with torch.no_grad():
78
  waveform = semanticodec.decode(tokens)
@@ -98,9 +102,12 @@ async def stream_decode_audio(encoded_file_path) -> Generator[tuple, None, None]
98
  sample_rate = int.from_bytes(temp_file.read(4), byteorder='little')
99
  compressed_data = temp_file.read()
100
  tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
101
- tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.int64)
102
  tokens = torch.from_numpy(tokens_numpy).to(torch_device)
103
 
 
 
 
104
  # Decode the audio in chunks
105
  chunk_size = sample_rate # Use the stored sample rate as chunk size
106
  with torch.no_grad():
 
33
  # Load the audio file
34
  waveform, sample_rate = torchaudio.load(audio_file_path)
35
 
36
+ # Ensure waveform has the right dimensions
37
+ if waveform.ndim == 1:
38
+ waveform = waveform.unsqueeze(0)
39
+
40
  # Encode the audio
41
+ audio = waveform.to(torch_device)
42
  with torch.no_grad():
43
  tokens = semanticodec.encode(audio)
44
 
 
 
 
45
  # Convert to NumPy and save to a temporary .owie file
46
  tokens_numpy = tokens.detach().cpu().numpy()
47
  temp_fd, temp_file_path = tempfile.mkstemp(suffix=".owie")
 
71
  sample_rate = int.from_bytes(temp_file.read(4), byteorder='little')
72
  compressed_data = temp_file.read()
73
  tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
74
+ tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.float32) # Updated to np.float32
75
  tokens = torch.from_numpy(tokens_numpy).to(torch_device)
76
 
77
+ # Ensure tokens has the right dimensions
78
+ tokens = tokens.unsqueeze(0) if tokens.ndimension() == 1 else tokens
79
+
80
  # Decode the audio
81
  with torch.no_grad():
82
  waveform = semanticodec.decode(tokens)
 
102
  sample_rate = int.from_bytes(temp_file.read(4), byteorder='little')
103
  compressed_data = temp_file.read()
104
  tokens_numpy_bytes = lz4.frame.decompress(compressed_data)
105
+ tokens_numpy = np.frombuffer(tokens_numpy_bytes, dtype=np.float32) # Updated to np.float32
106
  tokens = torch.from_numpy(tokens_numpy).to(torch_device)
107
 
108
+ # Ensure tokens has the right dimensions
109
+ tokens = tokens.unsqueeze(0) if tokens.ndimension() == 1 else tokens
110
+
111
  # Decode the audio in chunks
112
  chunk_size = sample_rate # Use the stored sample rate as chunk size
113
  with torch.no_grad():