File size: 1,143 Bytes
b7af60e
930cece
 
 
b7af60e
930cece
 
 
 
 
b7af60e
930cece
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import gradio as gr
import torch
import torchaudio
from speechbrain.inference.enhancement import SpectralMaskEnhancement

# Load the MetricGAN model
enhance_model = SpectralMaskEnhancement.from_hparams(
    source="speechbrain/metricgan-plus-voicebank",
    savedir="tmpdir_metricgan",
)

# Define a function to enhance speech
def enhance_speech(audio):
    # Load the audio and add fake batch dimension
    noisy = enhance_model.load_audio(audio).unsqueeze(0)
    
    # Add relative length tensor (assuming full length)
    lengths = torch.tensor([1.])
    
    # Enhance the audio
    enhanced = enhance_model.enhance_batch(noisy, lengths)
    
    # Save enhanced audio to a temporary file
    output_path = "enhanced.wav"
    torchaudio.save(output_path, enhanced.cpu(), 16000)
    return output_path

# Set up the Gradio interface
iface = gr.Interface(
    fn=enhance_speech,
    inputs=gr.Audio(type="filepath"),  # Removed 'source' argument
    outputs=gr.Audio(type="filepath"),
    title="Speech Enhancement",
    description="Upload a noisy audio file to enhance it using MetricGAN."
)

# Launch the Gradio interface
iface.launch()