speechbraintest / app.py
Shokoufehhh's picture
Update app.py
930cece verified
import gradio as gr
import torch
import torchaudio
from speechbrain.inference.enhancement import SpectralMaskEnhancement
# Load the MetricGAN model
enhance_model = SpectralMaskEnhancement.from_hparams(
source="speechbrain/metricgan-plus-voicebank",
savedir="tmpdir_metricgan",
)
# Define a function to enhance speech
def enhance_speech(audio):
# Load the audio and add fake batch dimension
noisy = enhance_model.load_audio(audio).unsqueeze(0)
# Add relative length tensor (assuming full length)
lengths = torch.tensor([1.])
# Enhance the audio
enhanced = enhance_model.enhance_batch(noisy, lengths)
# Save enhanced audio to a temporary file
output_path = "enhanced.wav"
torchaudio.save(output_path, enhanced.cpu(), 16000)
return output_path
# Set up the Gradio interface
iface = gr.Interface(
fn=enhance_speech,
inputs=gr.Audio(type="filepath"), # Removed 'source' argument
outputs=gr.Audio(type="filepath"),
title="Speech Enhancement",
description="Upload a noisy audio file to enhance it using MetricGAN."
)
# Launch the Gradio interface
iface.launch()