Shanulhaq commited on
Commit
f539be4
·
verified ·
1 Parent(s): a34db48

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -0
app.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import whisper
4
+ from gtts import gTTS
5
+ import tempfile
6
+ import logging
7
+ import numpy as np
8
+ import requests
9
+
10
+ # Set up logging
11
+ logging.basicConfig(level=logging.INFO)
12
+ logger = logging.getLogger(__name__)
13
+
14
+ # Set up Eleven Labs API key
15
+ eleven_labs_api_key = os.getenv('sk_e64bdb06f08a04813fc4aa482316d0ff751ff73e2aa88cc1')
16
+
17
+ if not eleven_labs_api_key:
18
+ raise ValueError("ELEVEN_LABS_API_KEY is not set.")
19
+
20
+ logger.info("Eleven Labs API key is set.")
21
+
22
+ try:
23
+ # Load Whisper model
24
+ whisper_model = whisper.load_model("base")
25
+ logger.info("Whisper model loaded successfully.")
26
+ except Exception as e:
27
+ logger.error(f"Failed to load Whisper model: {e}")
28
+ raise
29
+
30
+ def process_audio(audio_file):
31
+ try:
32
+ # Transcribe audio using Whisper
33
+ result = whisper_model.transcribe(audio_file)
34
+ user_text = result['text']
35
+ logger.info(f"Transcription successful: {user_text}")
36
+ except Exception as e:
37
+ logger.error(f"Error in transcribing audio: {e}")
38
+ return "Error in transcribing audio.", None
39
+
40
+ try:
41
+ # Generate response using Eleven Labs API
42
+ headers = {
43
+ "Authorization": f"Bearer {eleven_labs_api_key}",
44
+ "Content-Type": "application/json"
45
+ }
46
+ payload = {
47
+ "model": "llama3-8b-8192",
48
+ "messages": [
49
+ {
50
+ "role": "user",
51
+ "content": user_text,
52
+ }
53
+ ]
54
+ }
55
+
56
+ response = requests.post("https://api.elevenlabs.io/v1/chat/completions", json=payload, headers=headers)
57
+ response.raise_for_status()
58
+
59
+ response_data = response.json()
60
+ response_text = response_data['choices'][0]['message']['content']
61
+ logger.info(f"Received response from Eleven Labs API: {response_text}")
62
+ except Exception as e:
63
+ logger.error(f"Error in generating response with Eleven Labs API: {e}")
64
+ return "Error in generating response with Eleven Labs API.", None
65
+
66
+ try:
67
+ # Convert response text to speech using gTTS
68
+ tts = gTTS(text=response_text, lang='en')
69
+ audio_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
70
+ tts.save(audio_file.name)
71
+ logger.info("Text-to-speech conversion successful.")
72
+ except Exception as e:
73
+ logger.error(f"Error in text-to-speech conversion: {e}")
74
+ return "Error in text-to-speech conversion.", None
75
+
76
+ return response_text, audio_file.name
77
+
78
+ # Create Gradio interface
79
+ iface = gr.Interface(
80
+ fn=process_audio,
81
+ inputs=gr.Audio(type="filepath"),
82
+ title="AI-Powered Converter",
83
+ outputs=[gr.Textbox(label="Response"), gr.Audio(label="Response Audio")],
84
+ live=True
85
+ )
86
+
87
+ try:
88
+ iface.launch()
89
+ logger.info("Gradio interface launched successfully.")
90
+ except Exception as e:
91
+ logger.error(f"Failed to launch Gradio interface: {e}")
92
+ raise