yaoyugua commited on
Commit
1141e6b
·
1 Parent(s): aa480e2
Files changed (1) hide show
  1. app.py +42 -17
app.py CHANGED
@@ -1,40 +1,66 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
- import whisper
 
 
 
4
 
5
  """
6
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
7
  """
8
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
9
 
 
 
10
 
11
- def respond(
12
- audio, # This will receive the audio input
13
  history: list[tuple[str, str]],
14
  system_message,
15
  max_tokens,
16
  temperature,
17
  top_p,
18
  ):
19
- # Load whisper model (you might want to move this outside the function)
20
- model = whisper.load_model("base")
21
 
22
- # Transcribe the audio file
23
- if audio is not None:
24
- result = model.transcribe(audio)
25
- transcribed_text = result["text"]
26
- yield f"Transcribed: {transcribed_text}\nResponse: Buang NB"
27
- else:
28
- yield "No audio detected. Please try again."
 
 
 
 
 
 
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  """
32
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
33
  """
34
  demo = gr.ChatInterface(
35
- respond,
36
- chatbot=gr.Chatbot(),
37
- textbox=gr.Audio(type="filepath"), # Removed 'source' parameter as it's not supported
38
  additional_inputs=[
39
  gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
40
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
@@ -49,6 +75,5 @@ demo = gr.ChatInterface(
49
  ],
50
  )
51
 
52
-
53
  if __name__ == "__main__":
54
  demo.launch()
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
+ import openai
4
+ from decouple import config
5
+ import win32com.client
6
+ import pythoncom
7
 
8
  """
9
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
10
  """
 
11
 
12
+ # Configure OpenAI for speech-to-text
13
+ client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
14
 
15
+ def process_audio_and_respond(
16
+ audio,
17
  history: list[tuple[str, str]],
18
  system_message,
19
  max_tokens,
20
  temperature,
21
  top_p,
22
  ):
23
+ if audio is None:
24
+ return "Please provide an audio input."
25
 
26
+ # Convert speech to text using Whisper
27
+ audio_file = open(audio, "rb")
28
+ transcript = openai.Audio.transcribe("whisper-1", audio_file)
29
+ user_message = transcript["text"]
30
+
31
+ # Prepare messages for Zephyr
32
+ messages = [{"role": "system", "content": system_message}]
33
+ for user, assistant in history:
34
+ if user:
35
+ messages.append({"role": "user", "content": user})
36
+ if assistant:
37
+ messages.append({"role": "assistant", "content": assistant})
38
+ messages.append({"role": "user", "content": user_message})
39
 
40
+ # Get response from Zephyr
41
+ response = ""
42
+ for message in client.chat_completion(
43
+ messages,
44
+ max_tokens=max_tokens,
45
+ stream=True,
46
+ temperature=temperature,
47
+ top_p=top_p,
48
+ ):
49
+ token = message.choices[0].delta.content
50
+ response += token
51
+
52
+ # Convert response to speech
53
+ pythoncom.CoInitialize()
54
+ speaker = win32com.client.Dispatch("SAPI.SpVoice")
55
+ speaker.Speak(response)
56
+
57
+ return user_message, response
58
 
59
  """
60
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
61
  """
62
  demo = gr.ChatInterface(
63
+ process_audio_and_respond,
 
 
64
  additional_inputs=[
65
  gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
66
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
 
75
  ],
76
  )
77
 
 
78
  if __name__ == "__main__":
79
  demo.launch()