from transformers import pipeline from gradio_client import Client asr_pipe = pipeline("automatic-speech-recognition", model="Abdullah17/whisper-small-urdu") def transcribe_the_command(audio): import soundfile as sf sample_rate, audio_data = audio file_name = "recorded_audio.wav" sf.write(file_name, audio_data, sample_rate) # Convert stereo to mono by averaging the two channels print(file_name) transcript = asr_pipe(file_name)["text"] client = Client("https://ysharma-explore-llamav2-with-tgi.hf.space/") result = client.predict( transcript, api_name="/chat" ) return result # get_text_from_voice("urdu.wav") import gradio as gr iface = gr.Interface( fn=transcribe_the_command, inputs=gr.inputs.Audio(label="Recorded Audio",source="microphone"), outputs="text", title="Whisper Small Urdu Command", description="Realtime demo for Urdu speech recognition using a fine-tuned Whisper small model and outputting the estimated command on the basis of speech transcript.", ) iface.launch()