Spaces:

AVISHKAARAM
/

avishkarak-ekta-audio

Sleeping

App Files Files Community

techysanoj commited on Dec 6, 2024

Commit

8899039

verified ·

1 Parent(s): 47c9700

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -14

app.py CHANGED Viewed

@@ -61,9 +61,8 @@
 import torchaudio
 import gradio as gr
 from transformers import pipeline
-from gtts import gTTS
 import tempfile
-import pygame
 import time
 # Initialize the speech-to-text transcriber
@@ -73,29 +72,28 @@ transcriber = pipeline("automatic-speech-recognition", model="jonatasgrosman/wav
 model_name = "AVISHKAARAM/avishkaarak-ekta-hindi"
 qa_model = pipeline("question-answering", model=model_name)
 def answer_question(context, question=None, audio=None):
     if audio is not None:
         text = transcriber(audio)["text"]
         question_text = text
     else:
         question_text = question
     qa_result = qa_model(question=question_text, context=context)
     answer = qa_result["answer"]
-    tts = gTTS(text=answer, lang='en')
-    audio_path = tempfile.NamedTemporaryFile(suffix=".mp3").name
-    tts.save(audio_path)
     return answer, audio_path
-def play_audio(audio_path):
-    pygame.mixer.init()
-    pygame.mixer.music.load(audio_path)
-    pygame.mixer.music.play()
-    while pygame.mixer.music.get_busy():
-        time.sleep(0.1)
 # Define the Gradio interface
 context_input = gr.Textbox(label="Context")
 question_input = gr.Textbox(label="Question")
@@ -111,10 +109,11 @@ inter = gr.Interface(
     title="Question Answering",
     description="Enter a context and a question to get an answer. You can also record audio with the question.",
     examples=[
-        ["The capital of France is Paris.", "What is the capital of France?"],
-        ["OpenAI is famous for developing GPT-3.", "What is OpenAI known for?"],
     ]
 )
 # Launch the Gradio interface
 inter.launch()

 import torchaudio
 import gradio as gr
 from transformers import pipeline
+import pyttsx3
 import tempfile
 import time
 # Initialize the speech-to-text transcriber
 model_name = "AVISHKAARAM/avishkaarak-ekta-hindi"
 qa_model = pipeline("question-answering", model=model_name)
+# Initialize pyttsx3 TTS
+engine = pyttsx3.init()
 def answer_question(context, question=None, audio=None):
+    # Handle audio input
     if audio is not None:
         text = transcriber(audio)["text"]
         question_text = text
     else:
         question_text = question
+    # Generate the answer
     qa_result = qa_model(question=question_text, context=context)
     answer = qa_result["answer"]
+    # Convert answer to speech
+    audio_path = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name
+    engine.save_to_file(answer, audio_path)
+    engine.runAndWait()
     return answer, audio_path
 # Define the Gradio interface
 context_input = gr.Textbox(label="Context")
 question_input = gr.Textbox(label="Question")
     title="Question Answering",
     description="Enter a context and a question to get an answer. You can also record audio with the question.",
     examples=[
+        ["The capital of France is Paris.", "What is the capital of France?", None],
+        ["OpenAI is famous for developing GPT-3.", "What is OpenAI known for?", None],
     ]
 )
 # Launch the Gradio interface
 inter.launch()