import time import pandas as pd import numpy as np import tiktoken from openai.embeddings_utils import get_embedding, cosine_similarity import translators as ts import speech_recognition as sr import gradio as gr import openai import subprocess openai.api_key = "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" df = pd.read_csv("absher_user_guide_with_embeddings_1k.csv") df["embedding"] = df.embedding.apply(eval).apply(np.array) def search_services(df, user_input, n=3, pprint=False): product_embedding = get_embedding( user_input, engine="text-embedding-ada-002" ) df["similarity"] = df.embedding.apply(lambda x: cosine_similarity(x, product_embedding)) results = ( df.sort_values("similarity", ascending=False) .head(n) .combined.str.replace("Title: ", "") .str.replace("; Content:", ": ") ) results = ( df.sort_values("similarity", ascending=False) .head(n) .Steps_Arabic ).values[0] if pprint: for r in results: print(r[:200]) print() return results def listen(): r = sr.Recognizer() with sr.Microphone() as source: print("May I help you?") audio = r.listen(source) try: text = r.recognize_google(audio, language="ar-AE", show_all=False) print("You said: " + text) except sr.UnknownValueError: print("Speech Recognition could not understand audio") except sr.RequestError as e: print("Could not request results from Speech Recognition service; {0}".format(e)) return text def transcribe(audio_filepath): harvard = sr.AudioFile(audio_filepath) r = sr.Recognizer() with harvard as source: audio = r.record(source) try: text = r.recognize_google(audio, language="ar-AE", show_all=False) print("You said: " + text) except sr.UnknownValueError: print("Speech Recognition could not understand audio") except sr.RequestError as e: print("Could not request results from Speech Recognition service; {0}".format(e)) return text def run(audio_filepath): text = transcribe(audio_filepath) results = search_services(df, ts.translate_text(text), n=1) return results demo = gr.Interface( fn=run, inputs=gr.Audio(source="microphone", type="filepath"), outputs="text").launch() if __name__ == "__main__": demo.launch() # prompt = listen() # results = search_services(df, ts.translate_text(prompt), n=3) # print(results)