import gradio as gr
import torch
import torchaudio
from speechbrain.inference.enhancement import SpectralMaskEnhancement

# Load the MetricGAN model
enhance_model = SpectralMaskEnhancement.from_hparams(
    source="speechbrain/metricgan-plus-voicebank",
    savedir="tmpdir_metricgan",
)

# Define a function to enhance speech
def enhance_speech(audio):
    # Load the audio and add fake batch dimension
    noisy = enhance_model.load_audio(audio).unsqueeze(0)
    
    # Add relative length tensor (assuming full length)
    lengths = torch.tensor([1.])
    
    # Enhance the audio
    enhanced = enhance_model.enhance_batch(noisy, lengths)
    
    # Save enhanced audio to a temporary file
    output_path = "enhanced.wav"
    torchaudio.save(output_path, enhanced.cpu(), 16000)
    return output_path

# Set up the Gradio interface
iface = gr.Interface(
    fn=enhance_speech,
    inputs=gr.Audio(type="filepath"),  # Removed 'source' argument
    outputs=gr.Audio(type="filepath"),
    title="Speech Enhancement",
    description="Upload a noisy audio file to enhance it using MetricGAN."
)

# Launch the Gradio interface
iface.launch()