Spaces:
Sleeping
Sleeping
import gradio as gr | |
import torch | |
import torchaudio | |
from speechbrain.inference.enhancement import SpectralMaskEnhancement | |
# Load the MetricGAN model | |
enhance_model = SpectralMaskEnhancement.from_hparams( | |
source="speechbrain/metricgan-plus-voicebank", | |
savedir="tmpdir_metricgan", | |
) | |
# Define a function to enhance speech | |
def enhance_speech(audio): | |
# Load the audio and add fake batch dimension | |
noisy = enhance_model.load_audio(audio).unsqueeze(0) | |
# Add relative length tensor (assuming full length) | |
lengths = torch.tensor([1.]) | |
# Enhance the audio | |
enhanced = enhance_model.enhance_batch(noisy, lengths) | |
# Save enhanced audio to a temporary file | |
output_path = "enhanced.wav" | |
torchaudio.save(output_path, enhanced.cpu(), 16000) | |
return output_path | |
# Set up the Gradio interface | |
iface = gr.Interface( | |
fn=enhance_speech, | |
inputs=gr.Audio(type="filepath"), # Removed 'source' argument | |
outputs=gr.Audio(type="filepath"), | |
title="Speech Enhancement", | |
description="Upload a noisy audio file to enhance it using MetricGAN." | |
) | |
# Launch the Gradio interface | |
iface.launch() | |