Abdullah commited on
Commit
cd3c843
·
verified ·
1 Parent(s): b09ddde

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -0
app.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import os
3
+ import gradio as gr
4
+ from groq import Groq
5
+ from gtts import gTTS
6
+ import tempfile
7
+ import whisper
8
+
9
+
10
+
11
+ # Initialize Groq client
12
+ GROQ_API_KEY = "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"
13
+ client = Groq(api_key = GROQ_API_KEY)
14
+
15
+
16
+ # Load the Whisper model
17
+ whisper_model = whisper.load_model("base") # You can use "small", "medium", or "large" depending on your preference
18
+
19
+ # Function to convert audio to text using OpenAI Whisper
20
+ def audio_to_text(audio_file):
21
+ audio = whisper.load_audio(audio_file)
22
+ audio = whisper.pad_or_trim(audio)
23
+ mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
24
+ options = whisper.DecodingOptions(fp16=False)
25
+ result = whisper.decode(whisper_model, mel, options)
26
+ return result.text
27
+
28
+ # Function to interact with Groq API and generate a response
29
+ def interact_with_groq(user_input):
30
+ try:
31
+ chat_completion = client.chat.completions.create(
32
+ messages=[{"role": "user", "content": user_input}],
33
+ model="llama3-8b-8192", # Use the appropriate model
34
+ stream=False,
35
+ )
36
+ return chat_completion.choices[0].message.content
37
+ except Exception as e:
38
+ return f"Error interacting with Groq API: {e}"
39
+
40
+ # Function to convert text to speech using gTTS
41
+ def text_to_audio(response_text):
42
+ tts = gTTS(response_text)
43
+ output_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
44
+ tts.save(output_path)
45
+ return output_path
46
+
47
+ # Main function for the chatbot
48
+ def voice_to_voice(audio_file):
49
+ try:
50
+ # Step 1: Convert voice input to text
51
+ print("Transcribing audio...")
52
+ transcribed_text = audio_to_text(audio_file)
53
+ print(f"Transcribed Text: {transcribed_text}")
54
+
55
+ # Step 2: Interact with LLM via Groq API
56
+ print("Getting LLM response...")
57
+ response_text = interact_with_groq(transcribed_text)
58
+ print(f"LLM Response: {response_text}")
59
+
60
+ # Step 3: Convert LLM response to audio
61
+ print("Generating audio response...")
62
+ audio_response = text_to_audio(response_text)
63
+ return transcribed_text, audio_response
64
+ except Exception as e:
65
+ return f"Error processing request: {e}", None
66
+
67
+ # Gradio Interface
68
+ interface = gr.Interface(
69
+ fn=voice_to_voice,
70
+ inputs=gr.Audio(type="filepath"),
71
+ outputs=[gr.Textbox(label="Transcribed Text"), gr.Audio(label="Response Audio")],
72
+ title="Real-Time Voice-to-Voice Chatbot",
73
+ description="A real-time voice-to-voice chatbot using Whisper for transcription, Groq API for LLM, and gTTS for audio response.",
74
+ )
75
+
76
+ # Launch the interface
77
+ if __name__ == "__main__":
78
+ interface.launch()