ernestchu
commited on
Commit
·
ded337a
1
Parent(s):
c55d8da
add speech enhancement
Browse files- app.py +21 -7
- requirements.txt +1 -0
app.py
CHANGED
@@ -3,9 +3,11 @@ import time
|
|
3 |
from tsmnet import Stretcher
|
4 |
import gradio as gr
|
5 |
from gradio import processing_utils
|
6 |
-
|
|
|
7 |
import torchaudio
|
8 |
import yt_dlp
|
|
|
9 |
|
10 |
model_root = './weights'
|
11 |
yt_dl_dir = 'yt-audio'
|
@@ -58,7 +60,7 @@ def prepare_audio_file(rec, audio_file, yt_url):
|
|
58 |
raise gr.Error('No audio found!')
|
59 |
|
60 |
|
61 |
-
def run(rec, audio_file, yt_url, speed, model, start_time, end_time):
|
62 |
audio_file = prepare_audio_file(rec, audio_file, yt_url)
|
63 |
|
64 |
x, sr = torchaudio.load(audio_file)
|
@@ -67,8 +69,18 @@ def run(rec, audio_file, yt_url, speed, model, start_time, end_time):
|
|
67 |
|
68 |
x = x[:, int(start_time * sr):int(end_time * sr)]
|
69 |
|
70 |
-
if speed
|
71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
|
73 |
torchaudio.save(audio_file, x, sr)
|
74 |
return processing_utils.audio_from_file(audio_file)
|
@@ -86,16 +98,17 @@ with gr.Blocks() as demo:
|
|
86 |
with gr.Column():
|
87 |
with gr.Tab('From microphone'):
|
88 |
rec_box = gr.Audio(label='Recording', source='microphone', type='filepath')
|
89 |
-
with gr.Tab('From file'):
|
90 |
-
audio_file_box = gr.Audio(label='Audio sample', type='filepath')
|
91 |
with gr.Tab('From YouTube'):
|
92 |
yt_url_box = gr.Textbox(label='YouTube URL', placeholder='https://youtu.be/q6EoRBvdVPQ')
|
|
|
|
|
|
|
93 |
|
94 |
rec_box.change(lambda: [None] * 2, outputs=[audio_file_box, yt_url_box])
|
95 |
audio_file_box.change(lambda: [None] * 2, outputs=[rec_box, yt_url_box])
|
96 |
yt_url_box.input(lambda: [None] * 2, outputs=[rec_box, audio_file_box])
|
97 |
|
98 |
-
speed_box = gr.Slider(label='Playback speed', minimum=0, maximum=2, value=1)
|
99 |
with gr.Accordion('Fine-grained settings', open=False):
|
100 |
with gr.Tab('Trim audio sample (sec)'):
|
101 |
# gr.Markdown('### Trim audio sample (sec)')
|
@@ -117,6 +130,7 @@ with gr.Blocks() as demo:
|
|
117 |
rec_box,
|
118 |
audio_file_box,
|
119 |
yt_url_box,
|
|
|
120 |
speed_box,
|
121 |
model_box,
|
122 |
start_time_box,
|
|
|
3 |
from tsmnet import Stretcher
|
4 |
import gradio as gr
|
5 |
from gradio import processing_utils
|
6 |
+
import torch
|
7 |
+
import numpy as np
|
8 |
import torchaudio
|
9 |
import yt_dlp
|
10 |
+
import noisereduce as nr
|
11 |
|
12 |
model_root = './weights'
|
13 |
yt_dl_dir = 'yt-audio'
|
|
|
60 |
raise gr.Error('No audio found!')
|
61 |
|
62 |
|
63 |
+
def run(rec, audio_file, yt_url, denoise, speed, model, start_time, end_time):
|
64 |
audio_file = prepare_audio_file(rec, audio_file, yt_url)
|
65 |
|
66 |
x, sr = torchaudio.load(audio_file)
|
|
|
69 |
|
70 |
x = x[:, int(start_time * sr):int(end_time * sr)]
|
71 |
|
72 |
+
if speed == 1:
|
73 |
+
torchaudio.save(audio_file, x, sr)
|
74 |
+
return processing_utils.audio_from_file(audio_file)
|
75 |
+
|
76 |
+
x = models[model](x, speed).cpu()
|
77 |
+
|
78 |
+
if denoise:
|
79 |
+
if len(x.shape) == 1: # mono
|
80 |
+
x = x[None]
|
81 |
+
x = x.numpy()
|
82 |
+
# perform noise reduction
|
83 |
+
x = torch.from_numpy(np.stack([nr.reduce_noise(y=y, sr=sr) for y in x]))
|
84 |
|
85 |
torchaudio.save(audio_file, x, sr)
|
86 |
return processing_utils.audio_from_file(audio_file)
|
|
|
98 |
with gr.Column():
|
99 |
with gr.Tab('From microphone'):
|
100 |
rec_box = gr.Audio(label='Recording', source='microphone', type='filepath')
|
|
|
|
|
101 |
with gr.Tab('From YouTube'):
|
102 |
yt_url_box = gr.Textbox(label='YouTube URL', placeholder='https://youtu.be/q6EoRBvdVPQ')
|
103 |
+
with gr.Tab('From file'):
|
104 |
+
audio_file_box = gr.Audio(label='Audio sample', type='filepath')
|
105 |
+
denoise_box = gr.Checkbox(label='Speech enhancement (should be off for music)', value=True)
|
106 |
|
107 |
rec_box.change(lambda: [None] * 2, outputs=[audio_file_box, yt_url_box])
|
108 |
audio_file_box.change(lambda: [None] * 2, outputs=[rec_box, yt_url_box])
|
109 |
yt_url_box.input(lambda: [None] * 2, outputs=[rec_box, audio_file_box])
|
110 |
|
111 |
+
speed_box = gr.Slider(label='Playback speed', minimum=0.25, maximum=2, value=1)
|
112 |
with gr.Accordion('Fine-grained settings', open=False):
|
113 |
with gr.Tab('Trim audio sample (sec)'):
|
114 |
# gr.Markdown('### Trim audio sample (sec)')
|
|
|
130 |
rec_box,
|
131 |
audio_file_box,
|
132 |
yt_url_box,
|
133 |
+
denoise_box,
|
134 |
speed_box,
|
135 |
model_box,
|
136 |
start_time_box,
|
requirements.txt
CHANGED
@@ -3,4 +3,5 @@ torchvision
|
|
3 |
torchaudio
|
4 |
yt-dlp
|
5 |
wget
|
|
|
6 |
|
|
|
3 |
torchaudio
|
4 |
yt-dlp
|
5 |
wget
|
6 |
+
noisereduce
|
7 |
|