File size: 1,985 Bytes
f2c3fe7
 
36fdb50
f2c3fe7
97549eb
f2c3fe7
 
 
 
97549eb
 
 
f2c3fe7
 
97549eb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f2c3fe7
97549eb
f2c3fe7
97549eb
 
f2c3fe7
 
 
97549eb
ebc7318
97549eb
f2c3fe7
ebc7318
97549eb
f2c3fe7
97549eb
f2c3fe7
 
 
97549eb
 
f2c3fe7
8899039
 
97549eb
f2c3fe7
ebc7318
97549eb
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import gradio as gr
from transformers import pipeline
from gtts import gTTS
import tempfile
import os

# Initialize the speech-to-text transcriber
transcriber = pipeline("automatic-speech-recognition", model="jonatasgrosman/wav2vec2-large-xlsr-53-english")

# Initialize the question-answering model
qa_model = pipeline("question-answering", model="AVISHKAARAM/avishkaarak-ekta-hindi")


def answer_question(context, question=None, audio=None):
    try:
        # If audio is provided, transcribe it
        if audio:
            transcription_result = transcriber(audio)["text"]
            question_text = transcription_result
        else:
            question_text = question

        # Generate an answer to the question
        qa_result = qa_model(question=question_text, context=context)
        answer = qa_result["answer"]

        # Convert the answer to speech
        tts = gTTS(text=answer, lang="en")
        audio_path = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name
        tts.save(audio_path)

        return answer, audio_path

    except Exception as e:
        return str(e), None


# Define the Gradio interface
context_input = gr.Textbox(label="Context", lines=3)
question_input = gr.Textbox(label="Question")
audio_input = gr.Audio(type="filepath", label="Question (Audio Input)")

output_text = gr.Textbox(label="Answer")
output_audio = gr.Audio(label="Answer (Audio Output)")

interface = gr.Interface(
    fn=answer_question,
    inputs=[context_input, question_input, audio_input],
    outputs=[output_text, output_audio],
    title="Multimodal Question Answering",
    description="Provide a context and either a text question or an audio question to get an answer.",
    examples=[
        ["The capital of France is Paris.", "What is the capital of France?", None],
        ["OpenAI is famous for developing GPT-3.", "What is OpenAI known for?", None],
    ],
)

# Launch the Gradio app
if __name__ == "__main__":
    interface.launch()