Spaces:

yaoyugua
/

gradio_wave2wave

Sleeping

App Files Files Community

yaoyugua commited on 21 days ago

Commit

1141e6b

1 Parent(s): aa480e2

try

Browse files

Files changed (1) hide show

app.py +42 -17

app.py CHANGED Viewed

@@ -1,40 +1,66 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
-import whisper
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-def respond(
-    audio,  # This will receive the audio input
     history: list[tuple[str, str]],
     system_message,
     max_tokens,
     temperature,
     top_p,
 ):
-    # Load whisper model (you might want to move this outside the function)
-    model = whisper.load_model("base")
-    # Transcribe the audio file
-    if audio is not None:
-        result = model.transcribe(audio)
-        transcribed_text = result["text"]
-        yield f"Transcribed: {transcribed_text}\nResponse: Buang NB"
-    else:
-        yield "No audio detected. Please try again."
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 """
 demo = gr.ChatInterface(
-    respond,
-    chatbot=gr.Chatbot(),
-    textbox=gr.Audio(type="filepath"),  # Removed 'source' parameter as it's not supported
     additional_inputs=[
         gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
@@ -49,6 +75,5 @@ demo = gr.ChatInterface(
     ],
 )
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
 from huggingface_hub import InferenceClient
+import openai
+from decouple import config
+import win32com.client
+import pythoncom
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
+# Configure OpenAI for speech-to-text
+client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
+def process_audio_and_respond(
+    audio,
     history: list[tuple[str, str]],
     system_message,
     max_tokens,
     temperature,
     top_p,
 ):
+    if audio is None:
+        return "Please provide an audio input."
+    # Convert speech to text using Whisper
+    audio_file = open(audio, "rb")
+    transcript = openai.Audio.transcribe("whisper-1", audio_file)
+    user_message = transcript["text"]
+    # Prepare messages for Zephyr
+    messages = [{"role": "system", "content": system_message}]
+    for user, assistant in history:
+        if user:
+            messages.append({"role": "user", "content": user})
+        if assistant:
+            messages.append({"role": "assistant", "content": assistant})
+    messages.append({"role": "user", "content": user_message})
+    # Get response from Zephyr
+    response = ""
+    for message in client.chat_completion(
+        messages,
+        max_tokens=max_tokens,
+        stream=True,
+        temperature=temperature,
+        top_p=top_p,
+    ):
+        token = message.choices[0].delta.content
+        response += token
+    # Convert response to speech
+    pythoncom.CoInitialize()
+    speaker = win32com.client.Dispatch("SAPI.SpVoice")
+    speaker.Speak(response)
+    return user_message, response
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 """
 demo = gr.ChatInterface(
+    process_audio_and_respond,
     additional_inputs=[
         gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
     ],
 )
 if __name__ == "__main__":
     demo.launch()