owiedotch commited on
Commit
dfdd7ad
1 Parent(s): 593bdeb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -13
app.py CHANGED
@@ -1,29 +1,30 @@
1
  import gradio as gr
2
- import spaces
3
  import torch
4
  import torchaudio
5
  from encodec import EncodecModel
6
  from encodec.utils import convert_audio
7
- from encodec.compress import compress, decompress
8
  import io
9
 
10
  # Load the Encodec model
11
- model = EncodecModel.encodec_model_48khz() # Use the encodec version of the model
12
- model.set_target_bandwidth(6.0) # Set the desired bandwidth
13
 
14
  @spaces.GPU
15
  def encode(audio_file_path):
16
  try:
17
  # Load and pre-process the audio waveform
18
  wav, sr = torchaudio.load(audio_file_path)
 
 
 
 
 
19
  wav = convert_audio(wav, sr, model.sample_rate, model.channels)
20
- wav = wav.unsqueeze(0)
21
 
22
  # Compress to ecdc file in memory
23
- compressed_audio = compress(model, wav)
24
-
25
- # Save compressed audio to BytesIO
26
- output = io.BytesIO(compressed_audio)
27
  output.seek(0)
28
 
29
  return output
@@ -35,11 +36,8 @@ def encode(audio_file_path):
35
  @spaces.GPU
36
  def decode(compressed_audio_file):
37
  try:
38
- # Load compressed audio
39
- compressed_audio = compressed_audio_file.read()
40
-
41
  # Decompress audio
42
- wav, sr = decompress(compressed_audio)
43
 
44
  # Convert the decoded audio to a numpy array for Gradio output
45
  decoded_audio = wav.cpu().numpy()
 
1
  import gradio as gr
 
2
  import torch
3
  import torchaudio
4
  from encodec import EncodecModel
5
  from encodec.utils import convert_audio
6
+ from encodec.compress import compress_to_file, decompress_from_file
7
  import io
8
 
9
  # Load the Encodec model
10
+ model = EncodecModel.encodec_model_48khz()
11
+ model.set_target_bandwidth(6.0)
12
 
13
  @spaces.GPU
14
  def encode(audio_file_path):
15
  try:
16
  # Load and pre-process the audio waveform
17
  wav, sr = torchaudio.load(audio_file_path)
18
+
19
+ # Convert to mono if necessary
20
+ if wav.shape[0] > 1:
21
+ wav = torch.mean(wav, dim=0, keepdim=True)
22
+
23
  wav = convert_audio(wav, sr, model.sample_rate, model.channels)
 
24
 
25
  # Compress to ecdc file in memory
26
+ output = io.BytesIO()
27
+ compress_to_file(model, wav, output)
 
 
28
  output.seek(0)
29
 
30
  return output
 
36
  @spaces.GPU
37
  def decode(compressed_audio_file):
38
  try:
 
 
 
39
  # Decompress audio
40
+ wav, sr = decompress_from_file(compressed_audio_file)
41
 
42
  # Convert the decoded audio to a numpy array for Gradio output
43
  decoded_audio = wav.cpu().numpy()