hatim00101
commited on
Commit
•
2953169
1
Parent(s):
edde32f
Update app.py
Browse files
app.py
CHANGED
@@ -1,63 +1,93 @@
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
)
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
"""
|
43 |
-
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
|
44 |
-
"""
|
45 |
-
demo = gr.ChatInterface(
|
46 |
-
respond,
|
47 |
-
additional_inputs=[
|
48 |
-
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
|
49 |
-
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
|
50 |
-
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
51 |
-
gr.Slider(
|
52 |
-
minimum=0.1,
|
53 |
-
maximum=1.0,
|
54 |
-
value=0.95,
|
55 |
-
step=0.05,
|
56 |
-
label="Top-p (nucleus sampling)",
|
57 |
-
),
|
58 |
-
],
|
59 |
-
)
|
60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
-
|
63 |
-
|
|
|
1 |
+
from sentence_transformers import SentenceTransformer
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import openai
|
5 |
import gradio as gr
|
6 |
+
import faiss
|
7 |
+
import os
|
8 |
+
|
9 |
+
# Load the CSV file and FAISS index
|
10 |
+
df = pd.read_csv('extracted_texts_df.csv')
|
11 |
+
faiss_index_path = "faiss_index.index"
|
12 |
+
index = faiss.read_index(faiss_index_path)
|
13 |
+
|
14 |
+
embedder = SentenceTransformer('all-mpnet-base-v2')
|
15 |
+
|
16 |
+
# Function to get content from indices
|
17 |
+
def get_content_from_indices(indices):
|
18 |
+
contents = []
|
19 |
+
for idx in indices[0]:
|
20 |
+
if idx < len(df):
|
21 |
+
contents.append(df.iloc[idx]['pdf_content'])
|
22 |
+
else:
|
23 |
+
contents.append("Content not found.")
|
24 |
+
return "\n\n".join(contents)
|
25 |
+
|
26 |
+
# Search function using FAISS and embeddings
|
27 |
+
def search(query_text, top_k=1):
|
28 |
+
# Embed the query
|
29 |
+
query_embedding = embedder.encode(query_text, convert_to_tensor=True)
|
30 |
+
query_embedding = query_embedding.cpu().numpy()
|
31 |
+
|
32 |
+
# Normalize the query embedding
|
33 |
+
query_embedding_normalized = query_embedding / np.linalg.norm(query_embedding)
|
34 |
+
|
35 |
+
# Reshape to a 2D array for FAISS
|
36 |
+
query_embedding_normalized = query_embedding_normalized.reshape(1, -1)
|
37 |
+
|
38 |
+
# Perform the search
|
39 |
+
distances, indices = index.search(query_embedding_normalized, top_k)
|
40 |
+
|
41 |
+
# Get the content based on indices
|
42 |
+
content = get_content_from_indices(indices)
|
43 |
+
|
44 |
+
return content
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
+
# Retrieve the API key from Hugging Face Secrets
|
47 |
+
api_key = os.getenv("OPENAI_API_KEY")
|
48 |
+
|
49 |
+
# Generate the answer using OpenAI API
|
50 |
+
def generate_answer(query):
|
51 |
+
prompt = f"""
|
52 |
+
Answer the following query based on the provided content from pharmaceutical documents.
|
53 |
+
Provide a detailed and accurate response in readable formatو make the user read comfortably, get a very summarized answer at the end for who want take the answer in short time, and if you do not know the answer say ('I don't have any idea) do not write anything more.'
|
54 |
+
Query:
|
55 |
+
"{query}"
|
56 |
+
Context:
|
57 |
+
"{search(query)}"
|
58 |
+
"""
|
59 |
+
|
60 |
+
messages = [
|
61 |
+
{"role": "system", "content": "You are a pharmacy assistant providing detailed answers based on document content."},
|
62 |
+
{"role": "user", "content": prompt}
|
63 |
+
]
|
64 |
+
|
65 |
+
response = openai.ChatCompletion.create(
|
66 |
+
model="gpt-4o-mini",
|
67 |
+
max_tokens=1500,
|
68 |
+
n=1,
|
69 |
+
stop=None,
|
70 |
+
temperature=0.2,
|
71 |
+
messages=messages
|
72 |
+
)
|
73 |
+
|
74 |
+
generated_text = response.choices[0].message['content'].strip()
|
75 |
+
|
76 |
+
return generated_text
|
77 |
+
|
78 |
+
|
79 |
+
# Gradio interface
|
80 |
+
def pharmacy_assistant(query):
|
81 |
+
response = generate_answer(query)
|
82 |
+
return response
|
83 |
+
|
84 |
+
interface = gr.Interface(
|
85 |
+
fn=pharmacy_assistant,
|
86 |
+
inputs=gr.Textbox(lines=2, placeholder="Ask your pharmacy-related question here..."),
|
87 |
+
outputs="text",
|
88 |
+
title="Assistant",
|
89 |
+
description="Ask questions about pharmaceutical products, and get detailed answers based on document content."
|
90 |
+
)
|
91 |
|
92 |
+
# Try launching without debug mode
|
93 |
+
interface.launch()
|