owiedotch commited on
Commit
c6e9cf1
1 Parent(s): 70a04be

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -67
app.py CHANGED
@@ -2,11 +2,12 @@ import gradio as gr
2
  import spaces
3
  import torch
4
  import dac
5
- import numpy as np
6
- from pydub import AudioSegment
7
- from audiotools import AudioSignal
8
  import io
9
- import soundfile as sf
 
 
 
 
10
 
11
  class DACApi:
12
  def __init__(self, model_type="44khz", model_bitrate="16kbps"):
@@ -15,106 +16,72 @@ class DACApi:
15
  self.model_path = dac.utils.download(model_type, model_bitrate)
16
  print("Loading DAC model...")
17
  self.model = dac.DAC.load(self.model_path)
18
- self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
19
- self.model.to(self.device)
20
 
21
- @spaces.GPU
22
- def encode_audio(self, input_file):
23
- # Convert various audio formats to WAV if necessary
24
- if not input_file.name.lower().endswith('.wav'):
25
- print(f"Converting {input_file.name} to WAV...")
26
- audio = AudioSegment.from_file(input_file.name)
27
- input_wav = io.BytesIO()
28
- audio.export(input_wav, format="wav")
29
- input_wav.seek(0)
30
- else:
31
- input_wav = input_file
32
 
33
  # Load audio signal
34
- signal = AudioSignal(input_wav)
35
 
36
  # Compress audio
37
  print("Compressing audio...")
38
  compressed = self.model.compress(signal)
39
 
 
40
  output = io.BytesIO()
41
  compressed.save(output)
42
  output.seek(0)
 
43
  return output
44
 
45
- @spaces.GPU
46
- def decode_audio(self, input_file):
47
  # Load compressed audio
48
- print("Loading compressed audio...")
49
- compressed = dac.DACFile.load(input_file.name)
50
 
51
  # Decompress audio
52
  print("Decompressing audio...")
53
  decompressed = self.model.decompress(compressed)
54
-
 
55
  output = io.BytesIO()
56
  decompressed.write(output, format='wav')
57
  output.seek(0)
 
58
  return output
59
 
60
- @spaces.GPU
61
- def stream_audio(self, input_file):
62
- # Load compressed audio
63
- print("Loading compressed audio...")
64
- compressed = dac.DACFile.load(input_file.name)
65
-
66
- # Decompress audio
67
- print("Decompressing audio...")
68
- decompressed = self.model.decompress(compressed)
69
-
70
- audio_data = decompressed.audio_data.cpu().detach().numpy().squeeze().T
71
- sample_rate = decompressed.sample_rate
72
-
73
- return (sample_rate, audio_data)
74
-
75
  dac_api = DACApi()
76
 
 
77
  def encode(audio):
78
- if isinstance(audio, torch.Tensor):
79
- audio = audio.detach()
80
  compressed = dac_api.encode_audio(audio)
81
  return compressed
82
 
83
- def decode(audio):
84
- if isinstance(audio, torch.Tensor):
85
- audio = audio.detach()
86
- decompressed = dac_api.decode_audio(audio)
87
  return decompressed
88
 
89
- def stream(audio):
90
- if isinstance(audio, torch.Tensor):
91
- audio = audio.detach()
92
- sample_rate, audio_data = dac_api.stream_audio(audio)
93
- return (sample_rate, audio_data)
94
-
95
  # Gradio interface
96
  with gr.Blocks() as demo:
 
97
 
98
  with gr.Tab("Encode"):
99
- with gr.Row():
100
- input_audio = gr.Audio(type="filepath", label="Input Audio")
101
- output_file = gr.File(label="Compressed DAC File")
102
  encode_button = gr.Button("Encode")
103
- encode_button.click(encode, inputs=[input_audio], outputs=[output_file])
 
 
104
 
105
  with gr.Tab("Decode"):
106
- with gr.Row():
107
- input_file = gr.File(label="Compressed DAC File")
108
- output_audio = gr.Audio(label="Decompressed Audio")
109
  decode_button = gr.Button("Decode")
110
- decode_button.click(decode, inputs=[input_file], outputs=[output_audio])
111
-
112
- with gr.Tab("Stream"):
113
- with gr.Row():
114
- stream_input = gr.File(label="Compressed DAC File")
115
- stream_output = gr.Audio(label="Streamed Audio")
116
- stream_button = gr.Button("Stream")
117
- stream_button.click(stream, inputs=[stream_input], outputs=[stream_output])
118
 
119
- if __name__ == "__main__":
120
- demo.launch()
 
2
  import spaces
3
  import torch
4
  import dac
 
 
 
5
  import io
6
+ from audiotools import AudioSignal
7
+ from pydub import AudioSegment
8
+
9
+ # Ensure we're using CPU even if GPU is available
10
+ torch.set_default_tensor_type(torch.FloatTensor)
11
 
12
  class DACApi:
13
  def __init__(self, model_type="44khz", model_bitrate="16kbps"):
 
16
  self.model_path = dac.utils.download(model_type, model_bitrate)
17
  print("Loading DAC model...")
18
  self.model = dac.DAC.load(self.model_path)
19
+ self.model.to('cpu')
 
20
 
21
+ def encode_audio(self, audio):
22
+ # Convert audio to WAV
23
+ audio = AudioSegment.from_file(audio.name)
24
+ wav_io = io.BytesIO()
25
+ audio.export(wav_io, format="wav")
26
+ wav_io.seek(0)
 
 
 
 
 
27
 
28
  # Load audio signal
29
+ signal = AudioSignal(wav_io)
30
 
31
  # Compress audio
32
  print("Compressing audio...")
33
  compressed = self.model.compress(signal)
34
 
35
+ # Save compressed audio to BytesIO
36
  output = io.BytesIO()
37
  compressed.save(output)
38
  output.seek(0)
39
+
40
  return output
41
 
42
+ def decode_audio(self, compressed_file):
 
43
  # Load compressed audio
44
+ compressed = dac.DACFile.load(compressed_file)
 
45
 
46
  # Decompress audio
47
  print("Decompressing audio...")
48
  decompressed = self.model.decompress(compressed)
49
+
50
+ # Save decompressed audio to BytesIO
51
  output = io.BytesIO()
52
  decompressed.write(output, format='wav')
53
  output.seek(0)
54
+
55
  return output
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  dac_api = DACApi()
58
 
59
+ @spaces.CPU
60
  def encode(audio):
 
 
61
  compressed = dac_api.encode_audio(audio)
62
  return compressed
63
 
64
+ @spaces.CPU
65
+ def decode(compressed_file):
66
+ decompressed = dac_api.decode_audio(compressed_file)
 
67
  return decompressed
68
 
 
 
 
 
 
 
69
  # Gradio interface
70
  with gr.Blocks() as demo:
71
+ gr.Markdown("# Audio Compression with DAC")
72
 
73
  with gr.Tab("Encode"):
74
+ audio_input = gr.Audio(type="filepath", label="Input Audio")
 
 
75
  encode_button = gr.Button("Encode")
76
+ encoded_output = gr.File(label="Compressed Audio")
77
+
78
+ encode_button.click(encode, inputs=audio_input, outputs=encoded_output)
79
 
80
  with gr.Tab("Decode"):
81
+ compressed_input = gr.File(label="Compressed Audio")
 
 
82
  decode_button = gr.Button("Decode")
83
+ decoded_output = gr.Audio(label="Decompressed Audio")
84
+
85
+ decode_button.click(decode, inputs=compressed_input, outputs=decoded_output)
 
 
 
 
 
86
 
87
+ demo.launch()