import gradio as gr import torch import torchaudio from speechbrain.inference.enhancement import SpectralMaskEnhancement # Load the MetricGAN model enhance_model = SpectralMaskEnhancement.from_hparams( source="speechbrain/metricgan-plus-voicebank", savedir="tmpdir_metricgan", ) # Define a function to enhance speech def enhance_speech(audio): # Load the audio and add fake batch dimension noisy = enhance_model.load_audio(audio).unsqueeze(0) # Add relative length tensor (assuming full length) lengths = torch.tensor([1.]) # Enhance the audio enhanced = enhance_model.enhance_batch(noisy, lengths) # Save enhanced audio to a temporary file output_path = "enhanced.wav" torchaudio.save(output_path, enhanced.cpu(), 16000) return output_path # Set up the Gradio interface iface = gr.Interface( fn=enhance_speech, inputs=gr.Audio(type="filepath"), # Removed 'source' argument outputs=gr.Audio(type="filepath"), title="Speech Enhancement", description="Upload a noisy audio file to enhance it using MetricGAN." ) # Launch the Gradio interface iface.launch()