lekkalar commited on
Commit
bead70d
·
1 Parent(s): 928a91a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -48
app.py CHANGED
@@ -14,6 +14,55 @@ from langchain.chat_models import ChatOpenAI # the LLM model we'll use (ChatGPT)
14
  from langchain import PromptTemplate
15
 
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  def create_db_connection():
18
  DB_FILE = "./questionset.db"
19
  connection = sqlite3.connect(DB_FILE,check_same_thread=False)
@@ -162,53 +211,6 @@ def add_questionset(data, document_type, tag_for_questionset):
162
  connection.commit()
163
  connection.close()
164
 
165
-
166
- def load_pdf_and_generate_embeddings(pdf_doc, relevant_pages):
167
- os.environ['OPENAI_API_KEY'] = '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'
168
- #OCR Conversion - skips conversion of pages that already contain text
169
- pdf_doc = ocr_converter(pdf_doc)
170
- #Load the pdf file
171
- loader = OnlinePDFLoader(pdf_doc)
172
- pages = loader.load_and_split()
173
- print('pages loaded:', len(pages))
174
-
175
- #Create an instance of OpenAIEmbeddings, which is responsible for generating embeddings for text
176
- embeddings = OpenAIEmbeddings()
177
-
178
- pages_to_be_loaded =[]
179
-
180
- if relevant_pages:
181
- page_numbers = relevant_pages.split(",")
182
- if len(page_numbers) != 0:
183
- for page_number in page_numbers:
184
- if page_number.isdigit():
185
- pageIndex = int(page_number)-1
186
- if pageIndex >=0 and pageIndex <len(pages):
187
- pages_to_be_loaded.append(pages[pageIndex])
188
-
189
- #In the scenario where none of the page numbers supplied exist in the PDF, we will revert to using the entire PDF.
190
- if len(pages_to_be_loaded) ==0:
191
- pages_to_be_loaded = pages.copy()
192
-
193
-
194
- #To create a vector store, we use the Chroma class, which takes the documents (pages in our case) and the embeddings instance
195
- vectordb = Chroma.from_documents(pages_to_be_loaded, embedding=embeddings)
196
-
197
- #Finally, we create the bot using the RetrievalQA class
198
- global pdf_qa
199
-
200
- prompt_template = """Use the following pieces of context to answer the question at the end. If you do not know the answer, just return N/A. If you encounter a date, return it in mm/dd/yyyy format.
201
-
202
- {context}
203
-
204
- Question: {question}
205
- Return just the answer. Provide the answer in the JSON format and extract the key from the question :"""
206
- PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
207
- chain_type_kwargs = {"prompt": PROMPT}
208
- pdf_qa = RetrievalQA.from_chain_type(llm=ChatOpenAI(temperature=0, model_name="gpt-4"),chain_type="stuff", retriever=vectordb.as_retriever(search_kwargs={"k": 5}), chain_type_kwargs=chain_type_kwargs, return_source_documents=False)
209
-
210
- return "Ready"
211
-
212
  def load_csv_and_store_questionset_into_sqlite(csv_file, document_type, tag_for_questionset):
213
  print('document type is:',document_type)
214
  print('tag_for_questionset is:',tag_for_questionset)
@@ -270,7 +272,7 @@ title = """
270
  <h1>Chatbot for PDFs - GPT-4</h1>
271
  <p style="text-align: center;">Upload a .PDF, click the "Upload PDF and generate embeddings" button, <br />
272
  Wait for the Status to show Ready. You can chose to get answers to the pre-defined question set OR ask your own question <br />
273
- The app is built on GPT-4 and leverages PromptTemplate</p>
274
  </div>
275
  """
276
 
@@ -280,6 +282,7 @@ with gr.Blocks(css=css,theme=gr.themes.Monochrome()) as demo:
280
 
281
  with gr.Tab("Chatbot"):
282
  with gr.Column():
 
283
  pdf_doc = gr.File(label="Load a pdf",file_types=['.pdf'],type='file')
284
  relevant_pages = gr.Textbox(label="*Optional - List comma separated page numbers to load or leave this field blank to use the entire PDF")
285
 
 
14
  from langchain import PromptTemplate
15
 
16
 
17
+ def load_pdf_and_generate_embeddings(pdf_doc, open_ai_key, relevant_pages):
18
+ if open_ai_key is not None:
19
+ os.environ['OPENAI_API_KEY'] = open_ai_key
20
+ #OCR Conversion - skips conversion of pages that already contain text
21
+ pdf_doc = ocr_converter(pdf_doc)
22
+ #Load the pdf file
23
+ loader = OnlinePDFLoader(pdf_doc)
24
+ pages = loader.load_and_split()
25
+ print('pages loaded:', len(pages))
26
+
27
+ #Create an instance of OpenAIEmbeddings, which is responsible for generating embeddings for text
28
+ embeddings = OpenAIEmbeddings()
29
+
30
+ pages_to_be_loaded =[]
31
+
32
+ if relevant_pages:
33
+ page_numbers = relevant_pages.split(",")
34
+ if len(page_numbers) != 0:
35
+ for page_number in page_numbers:
36
+ if page_number.isdigit():
37
+ pageIndex = int(page_number)-1
38
+ if pageIndex >=0 and pageIndex <len(pages):
39
+ pages_to_be_loaded.append(pages[pageIndex])
40
+
41
+ #In the scenario where none of the page numbers supplied exist in the PDF, we will revert to using the entire PDF.
42
+ if len(pages_to_be_loaded) ==0:
43
+ pages_to_be_loaded = pages.copy()
44
+
45
+
46
+ #To create a vector store, we use the Chroma class, which takes the documents (pages in our case) and the embeddings instance
47
+ vectordb = Chroma.from_documents(pages_to_be_loaded, embedding=embeddings)
48
+
49
+ #Finally, we create the bot using the RetrievalQA class
50
+ global pdf_qa
51
+
52
+ prompt_template = """Use the following pieces of context to answer the question at the end. If you do not know the answer, just return N/A. If you encounter a date, return it in mm/dd/yyyy format.
53
+
54
+ {context}
55
+
56
+ Question: {question}
57
+ Return just the answer. Provide the answer in the JSON format and extract the key from the question :"""
58
+ PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
59
+ chain_type_kwargs = {"prompt": PROMPT}
60
+ pdf_qa = RetrievalQA.from_chain_type(llm=ChatOpenAI(temperature=0, model_name="gpt-4"),chain_type="stuff", retriever=vectordb.as_retriever(search_kwargs={"k": 5}), chain_type_kwargs=chain_type_kwargs, return_source_documents=False)
61
+
62
+ return "Ready"
63
+ else:
64
+ return "Please provide an OpenAI gpt-4 API key"
65
+
66
  def create_db_connection():
67
  DB_FILE = "./questionset.db"
68
  connection = sqlite3.connect(DB_FILE,check_same_thread=False)
 
211
  connection.commit()
212
  connection.close()
213
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
  def load_csv_and_store_questionset_into_sqlite(csv_file, document_type, tag_for_questionset):
215
  print('document type is:',document_type)
216
  print('tag_for_questionset is:',tag_for_questionset)
 
272
  <h1>Chatbot for PDFs - GPT-4</h1>
273
  <p style="text-align: center;">Upload a .PDF, click the "Upload PDF and generate embeddings" button, <br />
274
  Wait for the Status to show Ready. You can chose to get answers to the pre-defined question set OR ask your own question <br />
275
+ The app is built on GPT-4 and leverages the magic of PromptTemplate</p>
276
  </div>
277
  """
278
 
 
282
 
283
  with gr.Tab("Chatbot"):
284
  with gr.Column():
285
+ openai_key = gr.Textbox(label="Your GPT-4 OpenAI API key", type="password")
286
  pdf_doc = gr.File(label="Load a pdf",file_types=['.pdf'],type='file')
287
  relevant_pages = gr.Textbox(label="*Optional - List comma separated page numbers to load or leave this field blank to use the entire PDF")
288