Update app.py
Browse files
app.py
CHANGED
@@ -238,26 +238,30 @@ def predict_category(que, input_image):
|
|
238 |
return preds[0]
|
239 |
|
240 |
|
241 |
-
def combine(audio, input_image):
|
242 |
-
|
243 |
-
|
|
|
|
|
244 |
|
245 |
image = Image.fromarray(input_image).convert('RGB')
|
246 |
category = predict_category(que, image)
|
247 |
-
|
248 |
answer = predict_answer(0, que, image)
|
249 |
|
250 |
-
# print(category)
|
251 |
-
|
252 |
tts = gTTS(answer)
|
253 |
tts.save('answer.mp3')
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
|
|
|
|
|
|
|
|
|
|
261 |
|
262 |
# Launch the Gradio interface
|
263 |
model_interface.launch(debug=True)
|
|
|
238 |
return preds[0]
|
239 |
|
240 |
|
241 |
+
def combine(audio, input_image, text_question=""):
|
242 |
+
if audio:
|
243 |
+
que = transcribe_audio(audio)
|
244 |
+
else:
|
245 |
+
que = text_question
|
246 |
|
247 |
image = Image.fromarray(input_image).convert('RGB')
|
248 |
category = predict_category(que, image)
|
|
|
249 |
answer = predict_answer(0, que, image)
|
250 |
|
|
|
|
|
251 |
tts = gTTS(answer)
|
252 |
tts.save('answer.mp3')
|
253 |
+
|
254 |
+
return que, answer, 'answer.mp3', category
|
255 |
+
|
256 |
+
# Define the Gradio interface for recording audio, text input, and image upload
|
257 |
+
model_interface = gr.Interface(fn=combine,
|
258 |
+
inputs=[gr.Microphone(label="Ask your question"),
|
259 |
+
gr.Image(label="Upload the image"),
|
260 |
+
gr.Textbox(label="Text Question")],
|
261 |
+
outputs=[gr.Text(label="Transcribed Question"),
|
262 |
+
gr.Text(label="Answer"),
|
263 |
+
gr.Audio(label="Audio Answer"),
|
264 |
+
gr.Text(label="Category")])
|
265 |
|
266 |
# Launch the Gradio interface
|
267 |
model_interface.launch(debug=True)
|