from sentence_transformers import SentenceTransformer import pandas as pd import numpy as np import openai import gradio as gr import faiss import os # Load the CSV file and FAISS index df = pd.read_csv('extracted_texts_df.csv') faiss_index_path = "faiss_index.index" index = faiss.read_index(faiss_index_path) embedder = SentenceTransformer('all-mpnet-base-v2') # Function to get content from indices def get_content_from_indices(indices): contents = [] for idx in indices[0]: if idx < len(df): contents.append(df.iloc[idx]['pdf_content']) else: contents.append("Content not found.") return "\n\n".join(contents) # Search function using FAISS and embeddings def search(query_text, top_k=1): # Embed the query query_embedding = embedder.encode(query_text, convert_to_tensor=True) query_embedding = query_embedding.cpu().numpy() # Normalize the query embedding query_embedding_normalized = query_embedding / np.linalg.norm(query_embedding) # Reshape to a 2D array for FAISS query_embedding_normalized = query_embedding_normalized.reshape(1, -1) # Perform the search distances, indices = index.search(query_embedding_normalized, top_k) # Get the content based on indices content = get_content_from_indices(indices) return content # Retrieve the API key from Hugging Face Secrets api_key = os.getenv("OPENAI_API_KEY") # Generate the answer using OpenAI API def generate_answer(query): prompt = f""" Answer the following query based on the provided content from pharmaceutical documents. Provide a detailed and accurate response in readable formatÙˆ make the user read comfortably, get a very summarized answer at the end for who want take the answer in short time, and if you do not know the answer say ('I don't have any idea) do not write anything more.' Query: "{query}" Context: "{search(query)}" """ messages = [ {"role": "system", "content": "You are a pharmacy assistant providing detailed answers based on document content."}, {"role": "user", "content": prompt} ] response = openai.ChatCompletion.create( model="gpt-4o-mini", max_tokens=1500, n=1, stop=None, temperature=0.2, messages=messages ) generated_text = response.choices[0].message['content'].strip() return generated_text # Gradio interface def pharmacy_assistant(query): response = generate_answer(query) return response interface = gr.Interface( fn=pharmacy_assistant, inputs=gr.Textbox(lines=2, placeholder="Ask your pharmacy-related question here..."), outputs="text", title="Assistant", description="Ask questions about pharmaceutical products, and get detailed answers based on document content." ) # Try launching without debug mode interface.launch()