File size: 2,368 Bytes
fe5256f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
060fa1e
 
 
 
fe5256f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# import required libraries
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import HuggingFaceHub
from langchain.vectorstores import Chroma
from langchain_community.vectorstores import Chroma
from langchain.chains import ConversationalRetrievalChain
#from langchain.text_splitter import NLTKTextSplitter
from langchain.text_splitter import RecursiveCharacterTextSplitter

import streamlit as st
import sys,yaml,Utilities as ut

def get_data(query):
    chat_history = []
    initdict={}
    initdict = ut.get_tokens()
    hf_token = initdict["hf_token"]
    embedding_model_id = initdict["embedding_model"]
    chromadbpath = initdict["chatPDF_chroma_db"]
    llm_repo_id = initdict["llm_repoid"]
    
    
    # We will use HuggingFace embeddings 
    embeddings = HuggingFaceEmbeddings(model_name=embedding_model_id)

    #retriever = db.as_retriever(search_type="mmr", search_kwargs={'k': 1})
    # load from disk
    db = Chroma(persist_directory=chromadbpath, embedding_function=embeddings)
    retriever = db.as_retriever(search_type="mmr", search_kwargs={'k': 2})

    """llm = HuggingFaceHub(huggingfacehub_api_token=hf_token, 
                        repo_id=llm_repo_id, model_kwargs={"temperature":0.2, "max_new_tokens":50})"""
    
    llm = HuggingFaceHub(repo_id=llm_repo_id, model_kwargs={"temperature":0.2, "max_new_tokens":50})

    # Create the Conversational Retrieval Chain
    qa_chain = ConversationalRetrievalChain.from_llm(llm, retriever,return_source_documents=True)
    result = qa_chain({'question': query, 'chat_history': chat_history})
    chat_history.append(result)
    print('Answer: ' + result['answer'] + '\n')
    print (result)
    return result['answer']
    
st.title("PatentGuru Document Reader")

# Main chat form
with st.form("chat_form"):
    query = st.text_input("Chat with PDF: ")
    clear_history = st.checkbox('Clear Chat History') 
    submit_button = st.form_submit_button("Send")    

if submit_button:
    if clear_history:
        st.write("Cleared previous chat history")
    
    response = get_data(query)
    if len(response)>0: 
        response  = str(response.partition("Answer: ")[-1])
    else: response = "No results"
    
    # write results
    st.write (response)