Spaces:

akjedidtz
/

AIBOT

Runtime error

App Files Files Community

akjedidtz commited on Nov 10, 2024

Commit

3651420

verified ·

1 Parent(s): a3b5666

Create app.py

Browse files

Files changed (1) hide show

app.py +143 -0

app.py ADDED Viewed

	@@ -0,0 +1,143 @@

+import speech_recognition as sr
+from gtts import gTTS
+from pydub import AudioSegment
+from IPython.display import Audio
+import torch
+from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
+import soundfile as sf
+# Setup device and dtype
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+import os
+from groq import Groq
+# Initialize the Groq client with the API key
+client = Groq(
+    api_key="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",
+)
+#@@##
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+# Load model and processor
+model_id = "openai/whisper-medium"
+model = AutoModelForSpeechSeq2Seq.from_pretrained(
+    model_id,
+    torch_dtype=torch_dtype,
+    low_cpu_mem_usage=True,
+    use_safetensors=True
+)
+model.to(device)
+processor = AutoProcessor.from_pretrained(model_id)
+from transformers import pipeline
+from gtts import gTTS
+import gradio as gr
+import torch
+# Load ASR pipeline
+asr_pipe =pipeline(
+    "automatic-speech-recognition",
+    model=model,
+    tokenizer=processor.tokenizer,
+    feature_extractor=processor.feature_extractor,
+    torch_dtype=torch_dtype,
+    device=device,
+)
+# Initialize Groq client
+client = Groq(
+    api_key="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"
+)
+# Text-to-Speech function
+def text_to_speech(text):
+    try:
+        # Convert text to speech using gTTS
+        tts = gTTS(text, lang='hi')
+        tts.save("response.mp3")
+        return "response.mp3"  # Return the MP3 file path for playback in Gradio
+    except Exception as e:
+        print(f"Text-to-speech error: {e}")
+        return None
+# Function to process audio, get model response, and return TTS output
+def process_audio(audio):
+    # Convert audio to text
+    print("Converting audio to text...")
+    result = asr_pipe(audio, generate_kwargs={"language": "urdu"})
+    # Check if audio-to-text conversion was successful
+    if "text" in result and result["text"].strip():
+        user_ques = result["text"]
+        print("Audio-to-text conversion successful. User Question:", user_ques)
+        # Prepare messages for model input
+        messages = [
+            {
+                "role": "system",
+                "content": "You are a helpful assistant named SSk BOT that stands for (sehar bot) who mostly answers in Roman Urdu. Be professional. No emojis; just Urdu written in English letters, and if you receive a prompt in Urdu font, answer only in English (Roman Urdu).",
+            },
+            {
+                "role": "user",
+                "content": user_ques,
+            }
+        ]
+        # Get response from Groq model
+        print("Getting response from the model...")
+        response = client.chat.completions.create(
+            messages=messages,
+            model="gemma2-9b-it",
+        )
+        # Extract model's response
+        model_response = response['choices'][0]['message']['content']
+        print("Model:", model_response)
+        # Convert model's response to speech
+        audio_path = text_to_speech(model_response)
+        return model_response, audio_path
+    else:
+        print("Audio-to-text conversion failed or produced no text.")
+        return "Audio-to-text conversion failed or no text was detected.", None
+# Gradio interface
+interface = gr.Interface(
+    fn=process_audio,
+    inputs=gr.Audio(type="filepath"),
+    outputs=[gr.Textbox(label="Model Response"), gr.Audio(label="Response Audio")],
+    title="Real-time ASR to Language Model Response",
+    description="Upload an audio file in Urdu, get a text response from the model, and hear the response in English."
+)
+# Launch the Gradio Interface
+interface.launch()