|
from sentence_transformers import SentenceTransformer |
|
import pandas as pd |
|
import numpy as np |
|
import openai |
|
import gradio as gr |
|
import faiss |
|
import os |
|
|
|
|
|
df = pd.read_csv('extracted_texts_df.csv') |
|
faiss_index_path = "faiss_index.index" |
|
index = faiss.read_index(faiss_index_path) |
|
|
|
embedder = SentenceTransformer('all-mpnet-base-v2') |
|
|
|
|
|
def get_content_from_indices(indices): |
|
contents = [] |
|
for idx in indices[0]: |
|
if idx < len(df): |
|
contents.append(df.iloc[idx]['pdf_content']) |
|
else: |
|
contents.append("Content not found.") |
|
return "\n\n".join(contents) |
|
|
|
|
|
def search(query_text, top_k=1): |
|
|
|
query_embedding = embedder.encode(query_text, convert_to_tensor=True) |
|
query_embedding = query_embedding.cpu().numpy() |
|
|
|
|
|
query_embedding_normalized = query_embedding / np.linalg.norm(query_embedding) |
|
|
|
|
|
query_embedding_normalized = query_embedding_normalized.reshape(1, -1) |
|
|
|
|
|
distances, indices = index.search(query_embedding_normalized, top_k) |
|
|
|
|
|
content = get_content_from_indices(indices) |
|
|
|
return content |
|
|
|
|
|
api_key = os.getenv("OPENAI_API_KEY") |
|
|
|
|
|
def generate_answer(query): |
|
prompt = f""" |
|
Answer the following query based on the provided content from pharmaceutical documents. |
|
Provide a detailed and accurate response in readable formatو make the user read comfortably, get a very summarized answer at the end for who want take the answer in short time, and if you do not know the answer say ('I don't have any idea) do not write anything more.' |
|
Query: |
|
"{query}" |
|
Context: |
|
"{search(query)}" |
|
""" |
|
|
|
messages = [ |
|
{"role": "system", "content": "You are a pharmacy assistant providing detailed answers based on document content."}, |
|
{"role": "user", "content": prompt} |
|
] |
|
|
|
response = openai.ChatCompletion.create( |
|
model="gpt-4o-mini", |
|
max_tokens=1500, |
|
n=1, |
|
stop=None, |
|
temperature=0.2, |
|
messages=messages |
|
) |
|
|
|
generated_text = response.choices[0].message['content'].strip() |
|
|
|
return generated_text |
|
|
|
|
|
|
|
def pharmacy_assistant(query): |
|
response = generate_answer(query) |
|
return response |
|
|
|
interface = gr.Interface( |
|
fn=pharmacy_assistant, |
|
inputs=gr.Textbox(lines=2, placeholder="Ask your pharmacy-related question here..."), |
|
outputs="text", |
|
title="Assistant", |
|
description="Ask questions about pharmaceutical products, and get detailed answers based on document content." |
|
) |
|
|
|
|
|
interface.launch() |