Rahatara commited on
Commit
bad3068
1 Parent(s): bf340e7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -0
app.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import google.generativeai as genai
4
+ from typing import List, Tuple
5
+ import fitz # PyMuPDF
6
+ from sentence_transformers import SentenceTransformer
7
+ import numpy as np
8
+ import faiss
9
+
10
+ # Initialize Google API Key
11
+ GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
12
+ genai.configure(api_key=GOOGLE_API_KEY)
13
+
14
+ # Placeholder for the app's state
15
+ class MyApp:
16
+ def __init__(self) -> None:
17
+ self.documents = []
18
+ self.embeddings = None
19
+ self.index = None
20
+ self.load_pdf("THEDIA1.pdf")
21
+ self.build_vector_db()
22
+
23
+ def load_pdf(self, file_path: str) -> None:
24
+ """Extracts text from a PDF file and stores it in the app's documents."""
25
+ doc = fitz.open(file_path)
26
+ self.documents = []
27
+ for page_num in range(len(doc)):
28
+ page = doc[page_num]
29
+ text = page.get_text()
30
+ self.documents.append({"page": page_num + 1, "content": text})
31
+ print("PDF processed successfully!")
32
+
33
+ def build_vector_db(self) -> None:
34
+ """Builds a vector database using FAISS and SentenceTransformer embeddings."""
35
+ model = SentenceTransformer("all-MiniLM-L6-v2")
36
+ embeddings = model.encode([doc["content"] for doc in self.documents])
37
+ self.embeddings = np.array(embeddings, dtype="float32")
38
+ self.index = faiss.IndexFlatL2(self.embeddings.shape[1])
39
+ self.index.add(self.embeddings)
40
+ print("Vector database built successfully!")
41
+
42
+ def search(self, query: str, top_k: int = 5) -> List[Tuple[int, str]]:
43
+ """Searches for the most similar documents based on the query."""
44
+ query_embedding = SentenceTransformer("all-MiniLM-L6-v2").encode([query])
45
+ distances, indices = self.index.search(np.array(query_embedding, dtype="float32"), top_k)
46
+ return [(self.documents[idx]["page"], self.documents[idx]["content"]) for idx in indices[0]]
47
+
48
+ def generate_response(self, query: str) -> str:
49
+ """Generates a response using the Gemini model based on the query."""
50
+ if not GOOGLE_API_KEY:
51
+ raise ValueError("GOOGLE_API_KEY is not set. Please set it up.")
52
+
53
+ generation_config = genai.types.GenerationConfig(
54
+ temperature=0.7,
55
+ max_output_tokens=512
56
+ )
57
+
58
+ model_name = "gemini-1.5-pro-latest"
59
+ model = genai.GenerativeModel(model_name)
60
+ response = model.generate_content([query], generation_config=generation_config)
61
+
62
+ return response[0].text if response else "No response generated."
63
+
64
+ # Gradio UI setup for interaction
65
+ def main():
66
+ app = MyApp()
67
+
68
+ def handle_query(query):
69
+ search_results = app.search(query)
70
+ response = app.generate_response(query)
71
+ return {"Search Results": search_results, "Response": response}
72
+
73
+ gr.Interface(
74
+ fn=handle_query,
75
+ inputs=gr.Textbox(placeholder="Enter your query here"),
76
+ outputs=[
77
+ gr.JSON(label="Search Results"),
78
+ gr.Textbox(label="Generated Response")
79
+ ],
80
+ title="Dialectical Behavioral Exercise with Gemini",
81
+ description="This app uses Google Gemini to generate responses based on document content."
82
+ ).launch()
83
+
84
+ if __name__ == "__main__":
85
+ main()