import gradio as gr from transformers import pipeline # Load the ASR model using the Hugging Face pipeline model_id = "riteshkr/quantized-whisper-large-v3" # Update with your model path or ID pipe = pipeline("automatic-speech-recognition", model=model_id) # Define the transcription function def transcribe_speech(filepath): output = pipe( filepath, max_new_tokens=256, generate_kwargs={ "task": "transcribe", "language": "english", }, # Update the language as per your model's fine-tuning chunk_length_s=30, batch_size=8, ) return output["text"] # Define the Gradio interface for microphone input mic_transcribe = gr.Interface( fn=transcribe_speech, inputs=gr.Audio(sources="microphone", type="filepath"), outputs=gr.Textbox(), ) # Define the Gradio interface for file upload input file_transcribe = gr.Interface( fn=transcribe_speech, inputs=gr.Audio(sources="upload", type="filepath"), outputs=gr.Textbox(), ) # Creating the tabbed layout using Blocks demo = gr.Blocks() with demo: gr.TabbedInterface( [mic_transcribe, file_transcribe], ["Transcribe Microphone", "Transcribe Audio File"], ) # Launch the app with debugging enabled if __name__ == "__main__": demo.launch(debug=True, share=True)