Voice-Chat

Running

App Files Files Community

NeoPy commited on 21 days ago

Commit

fe5270d

verified ·

1 Parent(s): 218b3e0

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -35

app.py CHANGED Viewed

@@ -3,7 +3,8 @@ import json
 import re
 import tempfile
 from importlib.resources import files
 import click
 import gradio as gr
 import numpy as np
@@ -60,25 +61,46 @@ chat_model_state = None
 chat_tokenizer_state = None
 @gpu_decorator
-def generate_response(messages, model, tokenizer):
-    """Generate response using Qwen"""
-    text = tokenizer.apply_chat_template(
-        messages,
-        tokenize=False,
-        add_generation_prompt=True,
     )
-    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
-    generated_ids = model.generate(
-        **model_inputs,
-        max_new_tokens=512,
-        temperature=0.7,
-        top_p=0.95,
-    )
-    generated_ids = [
-        output_ids[len(input_ids) :] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
-    ]
-    return tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
 @gpu_decorator
@@ -210,24 +232,9 @@ Have a conversation with an AI using your reference voice!
                 }
             ]
         )
-        @gpu_decorator
-        def process_audio_input(audio_path, text, history, conv_state):
-            if not audio_path and not text.strip():
-                return history, conv_state, ""
-            if audio_path:
-                text = preprocess_ref_audio_text(audio_path, text)[1]
-            if not text.strip():
-                return history, conv_state, ""
-            conv_state.append({"role": "user", "content": text})
-            history.append((text, None))
-            response = generate_response(conv_state, chat_model_state, chat_tokenizer_state)
-            conv_state.append({"role": "assistant", "content": response})
-            history[-1] = (text, response)
-            return history, conv_state, ""
         @gpu_decorator
         def generate_audio_response(history, ref_audio, ref_text, remove_silence):

 import re
 import tempfile
 from importlib.resources import files
+from groq import Groq
+import os
 import click
 import gradio as gr
 import numpy as np
 chat_tokenizer_state = None
+groq_token = os.getenv("Groq_TOKEN", None)
+client = Groq(
+    api_key=groq_token,
+)
 @gpu_decorator
+def generate_response(messages):
+    """Generate response using Groq"""
+    chat_completion = client.chat.completions.create(
+        messages=[
+            {
+                "role": "user",
+                "content": messages,
+            }
+        ] if isinstance(messages, str) else messages,
+        model="llama-3.3-70b-versatile",
+        stream=False,
     )
+    return chat_completion.choices[0].message.content # this may need to be fixed
+@gpu_decorator
+def process_audio_input(audio_path, text, history, conv_state):
+    if not audio_path and not text.strip():
+        return history, conv_state, ""
+    if audio_path:
+        text = preprocess_ref_audio_text(audio_path, text)[1]
+    if not text.strip():
+        return history, conv_state, ""
+    conv_state.append({"role": "user", "content": text})
+    history.append((text, None))
+    response = generate_response(conv_state)
+    conv_state.append({"role": "assistant", "content": response})
+    history[-1] = (text, response)
+    return history, conv_state, ""
 @gpu_decorator
                 }
             ]
         )
         @gpu_decorator
         def generate_audio_response(history, ref_audio, ref_text, remove_silence):