hassanalsawadi's picture
Update app.py
d9b09c5
import time
import pandas as pd
import numpy as np
import tiktoken
from openai.embeddings_utils import get_embedding, cosine_similarity
# import translators as ts
import speech_recognition as sr
import gradio as gr
import openai
import subprocess
from googletrans import Translator
import os
openai.api_key = os.environ.get('OPENAI_API_KEY')
df = pd.read_csv("absher_user_guide_with_embeddings_1k.csv")
df["embedding"] = df.embedding.apply(eval).apply(np.array)
def search_services(df, user_input, n=3, pprint=False):
product_embedding = get_embedding(
user_input,
engine="text-embedding-ada-002"
)
df["similarity"] = df.embedding.apply(lambda x: cosine_similarity(x, product_embedding))
results = (
df.sort_values("similarity", ascending=False)
.head(n)
.combined.str.replace("Title: ", "")
.str.replace("; Content:", ": ")
)
results = (
df.sort_values("similarity", ascending=False)
.head(n)
.Steps_Arabic
).values[0]
if pprint:
for r in results:
print(r[:200])
print()
return results
def transcribe(audio_filepath):
print(audio_filepath)
harvard = sr.AudioFile(audio_filepath)
r = sr.Recognizer()
with harvard as source:
audio = r.record(source)
try:
text = r.recognize_google(audio, language="ar-AE", show_all=False)
print("You said: " + text)
except sr.UnknownValueError:
text = None
print("Speech Recognition could not understand audio")
except sr.RequestError as e:
text = None
print("Could not request results from Speech Recognition service; {0}".format(e))
return text
def run(audio_filepath):
translator = Translator()
text = transcribe(audio_filepath)
# print(translator.translate(text))
results = search_services(df, translator.translate(text, dest='en').text, n=1)
return results
title = "حاضر أبشر"
description = """
<p style="text-align:center;">قول الخدمة اللي تبيها من منصة أبشر، وحاضر، أبشر! حنقلك كيف توصلها من المنصة</p>
<center><img src="https://huggingface.co./spaces/hassanalsawadi/hala-absher-demo/resolve/main/logo.png" width=60px></center>
"""
demo = gr.Interface(
fn=run,
title=title,
description=description,
inputs=gr.Audio(source="microphone", type="filepath"),
examples=[["https://huggingface.co./spaces/hassanalsawadi/hala-absher-demo/resolve/main/example1.wav"], ["https://huggingface.co./spaces/hassanalsawadi/hala-absher-demo/resolve/main/example2.wav"]],
outputs="text")
if __name__ == "__main__":
demo.launch()
# prompt = listen()
# results = search_services(df, ts.translate_text(prompt), n=3)
# print(results)