Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import pipeline | |
from gtts import gTTS | |
import tempfile | |
import os | |
# Initialize the speech-to-text transcriber | |
transcriber = pipeline("automatic-speech-recognition", model="jonatasgrosman/wav2vec2-large-xlsr-53-english") | |
# Initialize the question-answering model | |
qa_model = pipeline("question-answering", model="AVISHKAARAM/avishkaarak-ekta-hindi") | |
def answer_question(context, question=None, audio=None): | |
try: | |
# If audio is provided, transcribe it | |
if audio: | |
transcription_result = transcriber(audio)["text"] | |
question_text = transcription_result | |
else: | |
question_text = question | |
# Generate an answer to the question | |
qa_result = qa_model(question=question_text, context=context) | |
answer = qa_result["answer"] | |
# Convert the answer to speech | |
tts = gTTS(text=answer, lang="en") | |
audio_path = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name | |
tts.save(audio_path) | |
return answer, audio_path | |
except Exception as e: | |
return str(e), None | |
# Define the Gradio interface | |
context_input = gr.Textbox(label="Context", lines=3) | |
question_input = gr.Textbox(label="Question") | |
audio_input = gr.Audio(type="filepath", label="Question (Audio Input)") | |
output_text = gr.Textbox(label="Answer") | |
output_audio = gr.Audio(label="Answer (Audio Output)") | |
interface = gr.Interface( | |
fn=answer_question, | |
inputs=[context_input, question_input, audio_input], | |
outputs=[output_text, output_audio], | |
title="Multimodal Question Answering", | |
description="Provide a context and either a text question or an audio question to get an answer.", | |
examples=[ | |
["The capital of France is Paris.", "What is the capital of France?", None], | |
["OpenAI is famous for developing GPT-3.", "What is OpenAI known for?", None], | |
], | |
) | |
# Launch the Gradio app | |
if __name__ == "__main__": | |
interface.launch() | |