Spaces:

thewise
/

Chat-w-PDF

Sleeping

App Files Files Community

Rohan Kataria commited on Jul 31, 2023

Commit

085c24c

1 Parent(s): 1205205

adding app

Browse files

Files changed (4) hide show

.gitignore +5 -0
app.py +115 -0
requirements.txt +3 -0
src/main.py +113 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+.history/
+.vscode/
+notebooks/
+.*ipynb
+__pycache__/

app.py ADDED Viewed

	@@ -0,0 +1,115 @@

+# Streamlit App to perform the conversational retrieval using ConversationalResponse class
+# 1. Main Title of App
+# 2. PDF File Loader
+# 3. Streaming Chat Window to ask questions and get answers from ConversationalResponse
+# 4. Callback Handler to stream the output of the ConversationalResponse
+# 5. Handle the chat interaction with the ConversationalResponse
+import streamlit as st
+from streamlit_chat import message
+from langchain.callbacks.base import BaseCallbackHandler
+from src.main import ConversationalResponse
+import os
+from dotenv import load_dotenv, find_dotenv
+_ = load_dotenv(find_dotenv())
+# Constants
+ROLE_USER = "user"
+ROLE_ASSISTANT = "assistant"
+st.set_page_config(page_title="Chat with Documents", page_icon="🦜")
+st.title("Chat with PDF Documents 🤖📄")
+st.markdown("by [Rohan Kataria](https://www.linkedin.com/in/imrohan/) view more at [VEW.AI](https://vew.ai/)")
+#streamlit message block
+st.markdown("This app allows you to chat with documents. You can upload a PDF file and ask questions about it. In the backround uses the ConversationalRetrival chain from langchain and Streamlit for UI.")
+class StreamHandler(BaseCallbackHandler):
+    """
+    StreamHandler is a callback handler that streams the output of the ConversationalResponse.
+    """
+    def __init__(self, container: st.delta_generator.DeltaGenerator, initial_text: str = ""):
+        self.container = container
+        self.text = initial_text
+    def on_llm_new_token(self, token: str, **kwargs) -> None:
+        self.text += token
+        self.container.markdown(self.text)
+@st.cache_resource(ttl="1h")
+def load_agent(file_path, api_key):
+    """
+    Load the ConversationalResponse agent from the given file path.
+    """
+    with st.spinner('Loading the file...'):
+        agent = ConversationalResponse(file_path, api_key)
+    st.success("File Loaded Successfully")
+    return agent
+def handle_chat(agent):
+    """
+    Handle the chat interaction with the user.
+    """
+    if "messages" not in st.session_state or st.sidebar.button("Clear message history"):
+        st.session_state["messages"] = [{"role": ROLE_ASSISTANT, "content": "How can I help you?"}]
+    for msg in st.session_state.messages:
+        st.chat_message(msg["role"]).write(msg["content"])
+    user_query = st.chat_input(placeholder="Ask me anything!")
+    if user_query:
+        st.session_state.messages.append({"role": ROLE_USER, "content": user_query})
+        st.chat_message(ROLE_USER).write(user_query)
+        # Generate the response
+        with st.spinner("Generating response"):
+            response = agent(user_query)
+        # Display the response immediately
+        st.chat_message(ROLE_ASSISTANT).write(response)
+        # Add the response to the message history
+        st.session_state.messages.append({"role": ROLE_ASSISTANT, "content": response})
+def main():
+    """
+    Main function to handle file upload and chat interaction.
+    """
+    # API Key Loader
+    api_key = st.sidebar.text_input("Enter your OpenAI API Key", type="password")
+    if api_key:
+        os.environ["OPENAI_API_KEY"] = api_key
+    else:
+        st.sidebar.error("Please enter your OpenAI API Key.")
+        return
+    # PDF File Loader to upload the file in the sidebar in session state
+    uploaded_file = st.sidebar.file_uploader("Choose a PDF file", type="pdf")
+    if uploaded_file is None:
+        st.error("Please upload a file.")
+        return
+    file_details = {"FileName":uploaded_file.name,"FileType":uploaded_file.type,"FileSize":uploaded_file.size}
+    st.write(file_details)
+    # Create a temp folder
+    if not os.path.exists("temp"):
+        os.mkdir("temp")
+    # Save the file in temp folder
+    file_path = os.path.join("temp",uploaded_file.name)
+    with open(file_path,"wb") as f:
+        f.write(uploaded_file.getbuffer())
+    agent = load_agent(file_path, api_key)
+    handle_chat(agent)
+    # Delete the file from temp folder
+    os.remove(file_path)
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+openai
+streamlit
+langchain

src/main.py ADDED Viewed

	@@ -0,0 +1,113 @@

+"""
+This is main logic file for the project responsible for the following:
+1. Read the loaded file using langchains
+2. Split the loaded data into chunks
+3. Ingest the data in vector form
+4. Conversational Retrieval logic on loaded data create conversational response
+5. Return the response to the user (Output)
+"""
+#Importing the required libraries
+import os
+import openai
+import sys
+sys.path.append('../..') #To import the langchain package from the parent directory
+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
+from langchain.vectorstores import DocArrayInMemorySearch
+from langchain.document_loaders import TextLoader
+from langchain.chains import RetrievalQA,  ConversationalRetrievalChain
+from langchain.memory import ConversationBufferMemory
+from langchain.chat_models import ChatOpenAI
+from langchain.document_loaders import TextLoader
+from langchain.document_loaders import PyPDFLoader
+from langchain.llms import OpenAI
+from langchain.memory import ConversationBufferMemory
+from langchain.vectorstores import DocArrayInMemorySearch
+import datetime
+from langchain.prompts import PromptTemplate
+from dotenv import load_dotenv, find_dotenv
+_ = load_dotenv(find_dotenv())
+#Function to load the data from the file
+def load_data(file_path):
+    loader = PyPDFLoader(file_path)
+    pages = loader.load()
+    return pages
+#Function to split the data into chunks
+def split_data(data):
+    splitter = RecursiveCharacterTextSplitter(
+            chunk_size=1000,
+            chunk_overlap=150,
+    )
+    chunks = splitter.split_documents(data)
+    return chunks
+# #Creating the OpenAI Embeddings
+# embeddings = OpenAIEmbeddings()
+#Function to ingest the data in vector form in data memory
+def ingest_data(chunks, embeddings):
+    vector_store = DocArrayInMemorySearch.from_documents(chunks, embeddings)
+    return vector_store
+#Function to create the conversational response
+def create_conversational_response(vector_store, chain_type, k):
+    #Creating the retriever, this can also be a contextual compressed retriever
+    retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": k}) #search_type can be "similarity" or "mmr"
+    #Creating Memory
+    memory = ConversationBufferMemory(
+        memory_key="chat_history",
+        input_key="question",
+        output_key="answer",
+        return_messages=True)
+    #Creating LLM
+    current_date = datetime.datetime.now().date()
+    if current_date < datetime.date(2023, 9, 2):
+        llm_name = "gpt-3.5-turbo-0301"
+    else:
+        llm_name = "gpt-3.5-turbo"
+    llm = ChatOpenAI(model=llm_name, temperature=0)
+    # Creating Prompt template
+    template = """
+    {chat_history}
+    {context}
+    Question: {question}
+    Helpful Answer:"""
+    PROMPT = PromptTemplate(input_variables=["chat_history", "context", "question"], template=template,)
+    #creating the conversational retrieval chain
+    chain = ConversationalRetrievalChain.from_llm(
+        llm=llm,
+        chain_type=chain_type, #chain type can be refine, stuff, map_reduce
+        retriever=retriever,
+        memory=memory,
+        return_source_documents=True, #When used these 2 properties, the output gets 3 properties: answer, source_document, source_document_score and then have to speocify input and output key in memory for it to work
+        combine_docs_chain_kwargs=dict({"prompt": PROMPT})
+    )
+    return chain
+# ConversationalResponse Class to call all the defined functions in a single call
+class ConversationalResponse:
+    def __init__(self, file, api_key):
+        self.file = file
+        embeddings = OpenAIEmbeddings(openai_api_key=api_key)
+        self.data = load_data(self.file)
+        self.chunks = split_data(self.data)
+        self.vector_store = ingest_data(self.chunks, embeddings)
+        self.chain_type = "stuff"
+        self.k = 5
+        self.chain = create_conversational_response(self.vector_store, self.chain_type, self.k)
+    def __call__(self, question, callbacks=None):
+        response = self.chain(question, callbacks=callbacks)
+        return response['answer']