Spaces:
Sleeping
Sleeping
File size: 1,985 Bytes
f2c3fe7 36fdb50 f2c3fe7 97549eb f2c3fe7 97549eb f2c3fe7 97549eb f2c3fe7 97549eb f2c3fe7 97549eb f2c3fe7 97549eb ebc7318 97549eb f2c3fe7 ebc7318 97549eb f2c3fe7 97549eb f2c3fe7 97549eb f2c3fe7 8899039 97549eb f2c3fe7 ebc7318 97549eb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import gradio as gr
from transformers import pipeline
from gtts import gTTS
import tempfile
import os
# Initialize the speech-to-text transcriber
transcriber = pipeline("automatic-speech-recognition", model="jonatasgrosman/wav2vec2-large-xlsr-53-english")
# Initialize the question-answering model
qa_model = pipeline("question-answering", model="AVISHKAARAM/avishkaarak-ekta-hindi")
def answer_question(context, question=None, audio=None):
try:
# If audio is provided, transcribe it
if audio:
transcription_result = transcriber(audio)["text"]
question_text = transcription_result
else:
question_text = question
# Generate an answer to the question
qa_result = qa_model(question=question_text, context=context)
answer = qa_result["answer"]
# Convert the answer to speech
tts = gTTS(text=answer, lang="en")
audio_path = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name
tts.save(audio_path)
return answer, audio_path
except Exception as e:
return str(e), None
# Define the Gradio interface
context_input = gr.Textbox(label="Context", lines=3)
question_input = gr.Textbox(label="Question")
audio_input = gr.Audio(type="filepath", label="Question (Audio Input)")
output_text = gr.Textbox(label="Answer")
output_audio = gr.Audio(label="Answer (Audio Output)")
interface = gr.Interface(
fn=answer_question,
inputs=[context_input, question_input, audio_input],
outputs=[output_text, output_audio],
title="Multimodal Question Answering",
description="Provide a context and either a text question or an audio question to get an answer.",
examples=[
["The capital of France is Paris.", "What is the capital of France?", None],
["OpenAI is famous for developing GPT-3.", "What is OpenAI known for?", None],
],
)
# Launch the Gradio app
if __name__ == "__main__":
interface.launch()
|