Mishab commited on
Commit
0f6e5cd
1 Parent(s): 7e8f7e5

Updated code

Browse files

Updated Chunk split size, Updated the logic to load and split the data, Added MultiQueryRetriever, Added thumbs up and thumbs down emoji in UI.

Database/PDF_HTML_CHROMA_DB/chroma.sqlite3 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dbc93c81c0b90157248b40367d521888edbc2ddc4a5c989b7b27cc7e8884103a
3
- size 224436224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e31d552a7a4981d60910ac3e293b5d53d0ba9503a95933ca21ab3a20b64ebc8
3
+ size 330657792
Database/PDF_HTML_CHROMA_DB/{f216dc0a-1fd0-4ec6-9374-c6b509f4406e → e9e678e2-95d4-4b02-8a1e-7c3ca59754d7}/data_level0.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:311e71d48e2a495b503b772c206901c356178f3fcba063393d82b9befcb487aa
3
- size 43576000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8dbd22f72c4c63507f549d3fe1d8350c50ba0bc9d64cc20f1d136119fb9a892e
3
+ size 85476000
Database/PDF_HTML_CHROMA_DB/{f216dc0a-1fd0-4ec6-9374-c6b509f4406e → e9e678e2-95d4-4b02-8a1e-7c3ca59754d7}/header.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74789aa8d0d0a7b1d2b34adfb468c6a8c3c33257b650f7bc3071ba8960f2477c
3
  size 100
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f2080adbc1c9cd3e086e238928ed1f139b21a0ebad87348b410770e6a45b37e
3
  size 100
Database/PDF_HTML_CHROMA_DB/{f216dc0a-1fd0-4ec6-9374-c6b509f4406e → e9e678e2-95d4-4b02-8a1e-7c3ca59754d7}/index_metadata.pickle RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad3698c433298994d9ff4d9ff2e3a18b7a912d345e8d2eca149822335fd45aac
3
- size 1506322
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a37d72452ff59cb80ed779d0ff9ed91f9d6fe7c12adf909845168311e578c06b
3
+ size 2956679
Database/PDF_HTML_CHROMA_DB/{f216dc0a-1fd0-4ec6-9374-c6b509f4406e → e9e678e2-95d4-4b02-8a1e-7c3ca59754d7}/length.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4a6372ec6043b713ff14a34d8290130b954aef6980807d08be18134073cac25
3
- size 104000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a50b6a6ebd0528902d0cbaa4d5d1c60af3a3fdc95a0738162eef134668c4d735
3
+ size 204000
Database/PDF_HTML_CHROMA_DB/{f216dc0a-1fd0-4ec6-9374-c6b509f4406e → e9e678e2-95d4-4b02-8a1e-7c3ca59754d7}/link_lists.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:056d8a348024885261ef2b62f90bd5676434d76b5c10d3cc2ec96c0ce745b668
3
- size 219464
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b00b245cd31e8691cd94191f3afefc59417c252bdabc5ec443aa58cf84328d4
3
+ size 426496
Database/text_chunks_html_pdf.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb9577bdbf5534b25597d36851f6b06f37eee2dfe6b09e2a51057bbbe9da301f
3
- size 19476054
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8248c4c27db5e0950be6c4bf560164990dbd348cddb2b419d6f6764011a5a605
3
+ size 22550517
OPM_Files/OPM_Retirement_backup-20230902T130906Z-001.zip DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:51bf0b1f9298ed989624d7f19d7f59e12fcb89e2ba087a2a0ae91204728523b4
3
- size 168746379
 
 
 
 
app.py CHANGED
@@ -238,6 +238,7 @@ if st.session_state["vector_db"] and st.session_state["llm"]:
238
  st.write("---") # Add a separator between entries
239
  message = {"role": "assistant", "content": full_response, "Source":merged_source_doc}
240
  st.session_state.messages.append(message)
 
241
  # else:
242
  # with st.expander("source"):
243
  # message = {"role": "assistant", "content": full_response, "Source":""}
 
238
  st.write("---") # Add a separator between entries
239
  message = {"role": "assistant", "content": full_response, "Source":merged_source_doc}
240
  st.session_state.messages.append(message)
241
+ st.markdown("👍 👎 Create Ticket")
242
  # else:
243
  # with st.expander("source"):
244
  # message = {"role": "assistant", "content": full_response, "Source":""}
utils.py CHANGED
@@ -16,7 +16,7 @@ from langchain.embeddings import HuggingFaceEmbeddings
16
  from langchain.document_loaders import PyPDFLoader
17
  from langchain.text_splitter import RecursiveCharacterTextSplitter
18
  from langchain.document_loaders import PyPDFLoader, DirectoryLoader
19
- from langchain.memory import ConversationBufferMemory, ConversationBufferWindowMemory
20
  from langchain.chains import ConversationalRetrievalChain
21
  from langchain.prompts.prompt import PromptTemplate
22
  from langchain.vectorstores import Chroma
@@ -33,6 +33,8 @@ from langchain.agents import load_tools
33
  from langchain.chat_models import ChatOpenAI
34
  from langchain.retrievers.multi_query import MultiQueryRetriever
35
  from langchain.chains import RetrievalQA
 
 
36
 
37
  load_dotenv()
38
 
@@ -254,11 +256,14 @@ def load_text_chunks(text_chunks_pkl_dir):
254
  def load_ensemble_retriver(text_chunks, embeddings, chroma_vectorstore):
255
  """Load ensemble retiriever with BM25 and Chroma as individual retrievers"""
256
  bm25_retriever = BM25Retriever.from_documents(text_chunks)
257
- bm25_retriever.k = 1
258
- chroma_retriever = chroma_vectorstore.as_retriever(search_kwargs={"k": 1})
259
  ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever, chroma_retriever], weights=[0.3, 0.7])
260
- # retriever_from_llm = MultiQueryRetriever.from_llm(retriever=ensemble_retriever, llm=ChatOpenAI())
261
- return ensemble_retriever
 
 
 
262
 
263
 
264
  def load_conversational_retrievel_chain(retriever, llm):
@@ -310,7 +315,7 @@ def load_conversational_retrievel_chain(retriever, llm):
310
  Helpful Answer:"""
311
 
312
  prompt = PromptTemplate(input_variables=["history", "context", "question"], template=template)
313
- memory = ConversationBufferWindowMemory(input_key="question", memory_key="history", k=1)
314
 
315
  qa = RetrievalQA.from_chain_type(
316
  llm=llm,
 
16
  from langchain.document_loaders import PyPDFLoader
17
  from langchain.text_splitter import RecursiveCharacterTextSplitter
18
  from langchain.document_loaders import PyPDFLoader, DirectoryLoader
19
+ from langchain.memory import ConversationBufferMemory
20
  from langchain.chains import ConversationalRetrievalChain
21
  from langchain.prompts.prompt import PromptTemplate
22
  from langchain.vectorstores import Chroma
 
33
  from langchain.chat_models import ChatOpenAI
34
  from langchain.retrievers.multi_query import MultiQueryRetriever
35
  from langchain.chains import RetrievalQA
36
+ import logging
37
+
38
 
39
  load_dotenv()
40
 
 
256
  def load_ensemble_retriver(text_chunks, embeddings, chroma_vectorstore):
257
  """Load ensemble retiriever with BM25 and Chroma as individual retrievers"""
258
  bm25_retriever = BM25Retriever.from_documents(text_chunks)
259
+ bm25_retriever.k = 2
260
+ chroma_retriever = chroma_vectorstore.as_retriever(search_kwargs={"k": 3})
261
  ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever, chroma_retriever], weights=[0.3, 0.7])
262
+ logging.basicConfig()
263
+ logging.getLogger('langchain.retrievers.multi_query').setLevel(logging.INFO)
264
+ retriever_from_llm = MultiQueryRetriever.from_llm(retriever=ensemble_retriever,
265
+ llm=ChatOpenAI(temperature=0))
266
+ return retriever_from_llm
267
 
268
 
269
  def load_conversational_retrievel_chain(retriever, llm):
 
315
  Helpful Answer:"""
316
 
317
  prompt = PromptTemplate(input_variables=["history", "context", "question"], template=template)
318
+ memory = ConversationBufferMemory(input_key="question", memory_key="history")
319
 
320
  qa = RetrievalQA.from_chain_type(
321
  llm=llm,