import time import pandas as pd import numpy as np import tiktoken from openai.embeddings_utils import get_embedding, cosine_similarity # import translators as ts import speech_recognition as sr import gradio as gr import openai import subprocess from googletrans import Translator import os openai.api_key = os.environ.get('OPENAI_API_KEY') df = pd.read_csv("absher_user_guide_with_embeddings_1k.csv") df["embedding"] = df.embedding.apply(eval).apply(np.array) def search_services(df, user_input, n=3, pprint=False): product_embedding = get_embedding( user_input, engine="text-embedding-ada-002" ) df["similarity"] = df.embedding.apply(lambda x: cosine_similarity(x, product_embedding)) results = ( df.sort_values("similarity", ascending=False) .head(n) .combined.str.replace("Title: ", "") .str.replace("; Content:", ": ") ) results = ( df.sort_values("similarity", ascending=False) .head(n) .Steps_Arabic ).values[0] if pprint: for r in results: print(r[:200]) print() return results def transcribe(audio_filepath): print(audio_filepath) harvard = sr.AudioFile(audio_filepath) r = sr.Recognizer() with harvard as source: audio = r.record(source) try: text = r.recognize_google(audio, language="ar-AE", show_all=False) print("You said: " + text) except sr.UnknownValueError: text = None print("Speech Recognition could not understand audio") except sr.RequestError as e: text = None print("Could not request results from Speech Recognition service; {0}".format(e)) return text def run(audio_filepath): translator = Translator() text = transcribe(audio_filepath) # print(translator.translate(text)) results = search_services(df, translator.translate(text, dest='en').text, n=1) return results title = "حاضر أبشر" description = """

قول الخدمة اللي تبيها من منصة أبشر، وحاضر، أبشر! حنقلك كيف توصلها من المنصة

""" demo = gr.Interface( fn=run, title=title, description=description, inputs=gr.Audio(source="microphone", type="filepath"), examples=[["https://huggingface.co./spaces/hassanalsawadi/hala-absher-demo/resolve/main/example1.wav"], ["https://huggingface.co./spaces/hassanalsawadi/hala-absher-demo/resolve/main/example2.wav"]], outputs="text") if __name__ == "__main__": demo.launch() # prompt = listen() # results = search_services(df, ts.translate_text(prompt), n=3) # print(results)