Updated code
Browse filesUpdated Chunk split size, Updated the logic to load and split the data, Added MultiQueryRetriever, Added thumbs up and thumbs down emoji in UI.
- Database/PDF_HTML_CHROMA_DB/chroma.sqlite3 +2 -2
- Database/PDF_HTML_CHROMA_DB/{f216dc0a-1fd0-4ec6-9374-c6b509f4406e → e9e678e2-95d4-4b02-8a1e-7c3ca59754d7}/data_level0.bin +2 -2
- Database/PDF_HTML_CHROMA_DB/{f216dc0a-1fd0-4ec6-9374-c6b509f4406e → e9e678e2-95d4-4b02-8a1e-7c3ca59754d7}/header.bin +1 -1
- Database/PDF_HTML_CHROMA_DB/{f216dc0a-1fd0-4ec6-9374-c6b509f4406e → e9e678e2-95d4-4b02-8a1e-7c3ca59754d7}/index_metadata.pickle +2 -2
- Database/PDF_HTML_CHROMA_DB/{f216dc0a-1fd0-4ec6-9374-c6b509f4406e → e9e678e2-95d4-4b02-8a1e-7c3ca59754d7}/length.bin +2 -2
- Database/PDF_HTML_CHROMA_DB/{f216dc0a-1fd0-4ec6-9374-c6b509f4406e → e9e678e2-95d4-4b02-8a1e-7c3ca59754d7}/link_lists.bin +2 -2
- Database/text_chunks_html_pdf.pkl +2 -2
- OPM_Files/OPM_Retirement_backup-20230902T130906Z-001.zip +0 -3
- app.py +1 -0
- utils.py +11 -6
Database/PDF_HTML_CHROMA_DB/chroma.sqlite3
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2e31d552a7a4981d60910ac3e293b5d53d0ba9503a95933ca21ab3a20b64ebc8
|
3 |
+
size 330657792
|
Database/PDF_HTML_CHROMA_DB/{f216dc0a-1fd0-4ec6-9374-c6b509f4406e → e9e678e2-95d4-4b02-8a1e-7c3ca59754d7}/data_level0.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8dbd22f72c4c63507f549d3fe1d8350c50ba0bc9d64cc20f1d136119fb9a892e
|
3 |
+
size 85476000
|
Database/PDF_HTML_CHROMA_DB/{f216dc0a-1fd0-4ec6-9374-c6b509f4406e → e9e678e2-95d4-4b02-8a1e-7c3ca59754d7}/header.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 100
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3f2080adbc1c9cd3e086e238928ed1f139b21a0ebad87348b410770e6a45b37e
|
3 |
size 100
|
Database/PDF_HTML_CHROMA_DB/{f216dc0a-1fd0-4ec6-9374-c6b509f4406e → e9e678e2-95d4-4b02-8a1e-7c3ca59754d7}/index_metadata.pickle
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a37d72452ff59cb80ed779d0ff9ed91f9d6fe7c12adf909845168311e578c06b
|
3 |
+
size 2956679
|
Database/PDF_HTML_CHROMA_DB/{f216dc0a-1fd0-4ec6-9374-c6b509f4406e → e9e678e2-95d4-4b02-8a1e-7c3ca59754d7}/length.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a50b6a6ebd0528902d0cbaa4d5d1c60af3a3fdc95a0738162eef134668c4d735
|
3 |
+
size 204000
|
Database/PDF_HTML_CHROMA_DB/{f216dc0a-1fd0-4ec6-9374-c6b509f4406e → e9e678e2-95d4-4b02-8a1e-7c3ca59754d7}/link_lists.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8b00b245cd31e8691cd94191f3afefc59417c252bdabc5ec443aa58cf84328d4
|
3 |
+
size 426496
|
Database/text_chunks_html_pdf.pkl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8248c4c27db5e0950be6c4bf560164990dbd348cddb2b419d6f6764011a5a605
|
3 |
+
size 22550517
|
OPM_Files/OPM_Retirement_backup-20230902T130906Z-001.zip
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:51bf0b1f9298ed989624d7f19d7f59e12fcb89e2ba087a2a0ae91204728523b4
|
3 |
-
size 168746379
|
|
|
|
|
|
|
|
app.py
CHANGED
@@ -238,6 +238,7 @@ if st.session_state["vector_db"] and st.session_state["llm"]:
|
|
238 |
st.write("---") # Add a separator between entries
|
239 |
message = {"role": "assistant", "content": full_response, "Source":merged_source_doc}
|
240 |
st.session_state.messages.append(message)
|
|
|
241 |
# else:
|
242 |
# with st.expander("source"):
|
243 |
# message = {"role": "assistant", "content": full_response, "Source":""}
|
|
|
238 |
st.write("---") # Add a separator between entries
|
239 |
message = {"role": "assistant", "content": full_response, "Source":merged_source_doc}
|
240 |
st.session_state.messages.append(message)
|
241 |
+
st.markdown("👍 👎 Create Ticket")
|
242 |
# else:
|
243 |
# with st.expander("source"):
|
244 |
# message = {"role": "assistant", "content": full_response, "Source":""}
|
utils.py
CHANGED
@@ -16,7 +16,7 @@ from langchain.embeddings import HuggingFaceEmbeddings
|
|
16 |
from langchain.document_loaders import PyPDFLoader
|
17 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
18 |
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
|
19 |
-
from langchain.memory import ConversationBufferMemory
|
20 |
from langchain.chains import ConversationalRetrievalChain
|
21 |
from langchain.prompts.prompt import PromptTemplate
|
22 |
from langchain.vectorstores import Chroma
|
@@ -33,6 +33,8 @@ from langchain.agents import load_tools
|
|
33 |
from langchain.chat_models import ChatOpenAI
|
34 |
from langchain.retrievers.multi_query import MultiQueryRetriever
|
35 |
from langchain.chains import RetrievalQA
|
|
|
|
|
36 |
|
37 |
load_dotenv()
|
38 |
|
@@ -254,11 +256,14 @@ def load_text_chunks(text_chunks_pkl_dir):
|
|
254 |
def load_ensemble_retriver(text_chunks, embeddings, chroma_vectorstore):
|
255 |
"""Load ensemble retiriever with BM25 and Chroma as individual retrievers"""
|
256 |
bm25_retriever = BM25Retriever.from_documents(text_chunks)
|
257 |
-
bm25_retriever.k =
|
258 |
-
chroma_retriever = chroma_vectorstore.as_retriever(search_kwargs={"k":
|
259 |
ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever, chroma_retriever], weights=[0.3, 0.7])
|
260 |
-
|
261 |
-
|
|
|
|
|
|
|
262 |
|
263 |
|
264 |
def load_conversational_retrievel_chain(retriever, llm):
|
@@ -310,7 +315,7 @@ def load_conversational_retrievel_chain(retriever, llm):
|
|
310 |
Helpful Answer:"""
|
311 |
|
312 |
prompt = PromptTemplate(input_variables=["history", "context", "question"], template=template)
|
313 |
-
memory =
|
314 |
|
315 |
qa = RetrievalQA.from_chain_type(
|
316 |
llm=llm,
|
|
|
16 |
from langchain.document_loaders import PyPDFLoader
|
17 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
18 |
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
|
19 |
+
from langchain.memory import ConversationBufferMemory
|
20 |
from langchain.chains import ConversationalRetrievalChain
|
21 |
from langchain.prompts.prompt import PromptTemplate
|
22 |
from langchain.vectorstores import Chroma
|
|
|
33 |
from langchain.chat_models import ChatOpenAI
|
34 |
from langchain.retrievers.multi_query import MultiQueryRetriever
|
35 |
from langchain.chains import RetrievalQA
|
36 |
+
import logging
|
37 |
+
|
38 |
|
39 |
load_dotenv()
|
40 |
|
|
|
256 |
def load_ensemble_retriver(text_chunks, embeddings, chroma_vectorstore):
|
257 |
"""Load ensemble retiriever with BM25 and Chroma as individual retrievers"""
|
258 |
bm25_retriever = BM25Retriever.from_documents(text_chunks)
|
259 |
+
bm25_retriever.k = 2
|
260 |
+
chroma_retriever = chroma_vectorstore.as_retriever(search_kwargs={"k": 3})
|
261 |
ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever, chroma_retriever], weights=[0.3, 0.7])
|
262 |
+
logging.basicConfig()
|
263 |
+
logging.getLogger('langchain.retrievers.multi_query').setLevel(logging.INFO)
|
264 |
+
retriever_from_llm = MultiQueryRetriever.from_llm(retriever=ensemble_retriever,
|
265 |
+
llm=ChatOpenAI(temperature=0))
|
266 |
+
return retriever_from_llm
|
267 |
|
268 |
|
269 |
def load_conversational_retrievel_chain(retriever, llm):
|
|
|
315 |
Helpful Answer:"""
|
316 |
|
317 |
prompt = PromptTemplate(input_variables=["history", "context", "question"], template=template)
|
318 |
+
memory = ConversationBufferMemory(input_key="question", memory_key="history")
|
319 |
|
320 |
qa = RetrievalQA.from_chain_type(
|
321 |
llm=llm,
|