Spaces:
Sleeping
Sleeping
Siddartha10
commited on
Upload 2 files
Browse files- app.py +116 -0
- policy_data.pdf +0 -0
app.py
ADDED
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from openai import OpenAI
|
2 |
+
import streamlit as st
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
import os
|
5 |
+
import shelve
|
6 |
+
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
|
7 |
+
from langchain_core.prompts import PromptTemplate
|
8 |
+
from langchain.chains import RetrievalQA
|
9 |
+
from ragatouille import RAGPretrainedModel
|
10 |
+
from langchain.document_loaders import PyPDFLoader
|
11 |
+
|
12 |
+
RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")
|
13 |
+
|
14 |
+
load_dotenv()
|
15 |
+
file_path = "policy_data.pdf"
|
16 |
+
loader = PyPDFLoader(file_path)
|
17 |
+
pages = loader.load()
|
18 |
+
full_document = ""
|
19 |
+
for page in pages:
|
20 |
+
full_document += page.page_content
|
21 |
+
|
22 |
+
index_dir = ".ragatouille/colbert/indexes/policy_data/"
|
23 |
+
ivf_path = os.path.join(index_dir, "ivf.pid.pt")
|
24 |
+
index_exists = os.path.exists(ivf_path)
|
25 |
+
if not index_exists:
|
26 |
+
st.write("Indexing document...")
|
27 |
+
loader = PyPDFLoader(file_path)
|
28 |
+
pages = loader.load()
|
29 |
+
full_document = "".join(page.page_content for page in pages)
|
30 |
+
|
31 |
+
RAG.index(
|
32 |
+
collection=[full_document],
|
33 |
+
index_name="policy_data",
|
34 |
+
max_document_length=512,
|
35 |
+
split_documents=True,
|
36 |
+
)
|
37 |
+
retriever = RAG.as_langchain_retriever(k=3)
|
38 |
+
st.write("Indexing complete!")
|
39 |
+
else:
|
40 |
+
st.write("Loading existing index...")
|
41 |
+
RAG = RAGPretrainedModel.from_index(".ragatouille/colbert/indexes/policy_data/")
|
42 |
+
retriever = RAG.as_langchain_retriever(k=3)
|
43 |
+
|
44 |
+
template = """Use the context below to answer the question.
|
45 |
+
Keep the answer concise and to the point.
|
46 |
+
If you are unsure about the answer, just say i do not know the answer to the question do not create your own answer and make sure the answer is concise and to the point.
|
47 |
+
Summarize the information such that main points are covered and if you think that there needs to be some more information added to the answer then you can add that information as well.
|
48 |
+
{context}
|
49 |
+
|
50 |
+
Question: {question}
|
51 |
+
|
52 |
+
Helpful Answer:"""
|
53 |
+
prompt = PromptTemplate.from_template(template)
|
54 |
+
|
55 |
+
chain_type_kwargs = {"prompt": prompt}
|
56 |
+
|
57 |
+
|
58 |
+
st.title("Streamlit Chatbot Interface")
|
59 |
+
|
60 |
+
USER_AVATAR = "👤"
|
61 |
+
BOT_AVATAR = "🤖"
|
62 |
+
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.4, max_tokens=500)
|
63 |
+
chain = RetrievalQA.from_chain_type(
|
64 |
+
llm = llm,
|
65 |
+
chain_type="stuff",
|
66 |
+
retriever=retriever,
|
67 |
+
chain_type_kwargs=chain_type_kwargs,
|
68 |
+
)
|
69 |
+
|
70 |
+
# Ensure openai_model is initialized in session state
|
71 |
+
if "openai_model" not in st.session_state:
|
72 |
+
st.session_state["openai_model"] = "gpt-3.5-turbo"
|
73 |
+
|
74 |
+
|
75 |
+
# Load chat history from shelve file
|
76 |
+
def load_chat_history():
|
77 |
+
with shelve.open("chat_history") as db:
|
78 |
+
return db.get("messages", [])
|
79 |
+
|
80 |
+
|
81 |
+
# Save chat history to shelve file
|
82 |
+
def save_chat_history(messages):
|
83 |
+
with shelve.open("chat_history") as db:
|
84 |
+
db["messages"] = messages
|
85 |
+
|
86 |
+
|
87 |
+
# Initialize or load chat history
|
88 |
+
if "messages" not in st.session_state:
|
89 |
+
st.session_state.messages = load_chat_history()
|
90 |
+
|
91 |
+
# Sidebar with a button to delete chat history
|
92 |
+
with st.sidebar:
|
93 |
+
if st.button("Delete Chat History"):
|
94 |
+
st.session_state.messages = []
|
95 |
+
save_chat_history([])
|
96 |
+
|
97 |
+
# Display chat messages
|
98 |
+
for message in st.session_state.messages:
|
99 |
+
avatar = USER_AVATAR if message["role"] == "user" else BOT_AVATAR
|
100 |
+
with st.chat_message(message["role"], avatar=avatar):
|
101 |
+
st.markdown(message["content"])
|
102 |
+
|
103 |
+
# Main chat interface
|
104 |
+
if prompt := st.chat_input("How can I help?"):
|
105 |
+
st.session_state.messages.append({"role": "user", "content": prompt})
|
106 |
+
with st.chat_message("user", avatar=USER_AVATAR):
|
107 |
+
st.markdown(prompt)
|
108 |
+
|
109 |
+
with st.chat_message("assistant", avatar=BOT_AVATAR):
|
110 |
+
message_placeholder = st.empty()
|
111 |
+
full_response = ""
|
112 |
+
response = chain.invoke(prompt)
|
113 |
+
full_response = response['result']
|
114 |
+
message_placeholder.markdown(full_response)
|
115 |
+
st.session_state.messages.append({"role": "assistant", "content": full_response})
|
116 |
+
save_chat_history(st.session_state.messages)
|
policy_data.pdf
ADDED
Binary file (850 kB). View file
|
|