|
from transformers import pipeline,Conversation |
|
import gradio as gr |
|
|
|
pipe = pipeline( |
|
"automatic-speech-recognition", |
|
model="openai/whisper-base", |
|
) |
|
|
|
model= pipeline("conversational", model="facebook/blenderbot-400M-distill") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def transcribe_speech(filepath): |
|
output = pipe( |
|
filepath, |
|
max_new_tokens=256, |
|
generate_kwargs={ |
|
"task": "transcribe", |
|
"language": "english", |
|
}, |
|
chunk_length_s=30, |
|
batch_size=8, |
|
) |
|
return output["text"] |
|
|
|
def handle_audio_input(audio_file): |
|
try: |
|
|
|
transcribed_text = transcribe_speech(audio_file) |
|
print(f"Transcribed text: {transcribed_text}") |
|
|
|
|
|
conversation = Conversation(transcribed_text) |
|
response = model(conversation) |
|
chatbot_response = response.generated_responses[-1] |
|
print(f"Chatbot response: {chatbot_response}") |
|
|
|
return transcribed_text, chatbot_response |
|
except Exception as e: |
|
print(f"Error: {e}") |
|
return "Error in processing audio", str(e) |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("## Customer query audio to text chatbot") |
|
|
|
with gr.Tab("Microphone"): |
|
mic_transcribe = gr.Interface( |
|
fn=handle_audio_input, |
|
inputs=gr.Audio(sources="microphone", type="filepath"), |
|
outputs=[gr.Textbox(label="Transcription"), gr.Textbox(label="Chatbot Response")], |
|
) |
|
mic_transcribe.render() |
|
|
|
with gr.Tab("File Upload"): |
|
file_transcribe = gr.Interface( |
|
fn=handle_audio_input, |
|
inputs=gr.Audio(sources="upload", type="filepath"), |
|
outputs=[gr.Textbox(label="Transcription"), gr.Textbox(label="Chatbot Response")], |
|
) |
|
file_transcribe.render() |
|
|
|
|
|
demo.launch(share=True) |