File size: 3,317 Bytes
9a66b4f
 
ec9e166
9a66b4f
ec9e166
cbf031d
9a66b4f
751653d
8e8ccf4
9c39b4d
9a66b4f
01481a7
9a66b4f
 
4ac050d
9a66b4f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96cad08
ba6247e
 
 
 
 
 
751653d
ba6247e
ec9e166
 
33ac479
ec9e166
 
8c29218
33ac479
7e93132
9a66b4f
 
 
049d3a1
73989f9
049d3a1
9a66b4f
049d3a1
9a66b4f
 
 
446feac
 
ba6247e
751653d
9a66b4f
0c95774
652d1d7
 
 
0c95774
8c29218
9a66b4f
 
 
ec9e166
907eaa3
2a31b94
1d9366f
a7dd90a
9f78fda
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.llms import openai
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import HuggingFaceBgeEmbeddings
import streamlit as st
###########################################################################################

def get_pdf_load():
    loader=PyPDFDirectoryLoader("./")
    document=loader.load()
    return document
########################################################################################
def get_text_split(document):
    text_splitter= RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    texts =text_splitter.split_documents(document)
    return texts
#########################################################################################
def get_vectorstore(texts):
    #Vector and Embeddings
    DB_FAISS_PATH = 'vectore_Imstudio/faiss'
    #Vector and Embeddings
    embeddings= HuggingFaceBgeEmbeddings(model_name='Avditvs/multilingual-e5-small-distill-base-0.1', model_kwargs={'device': 'cpu'})
    db= FAISS.from_documents(texts,embeddings)
    db.save_local(DB_FAISS_PATH)
    return db
############################################################################################
def get_chain(db):
    llm=ChatOpenAI(base_url="https://bd4c-85-9-86-142.ngrok-free.app/v1", api_key="lm-studio",temperature=0.1,model="lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF")
    #Build a chain
    qa_chain = ConversationalRetrievalChain.from_llm(
        llm,db.as_retriever (search_kwargs={'k':2}),return_source_documents=True)
    return  qa_chain
####################################################################################################################
def get_conversation(query_user):
    chat_history=[]
    query_1="این سوال را به زبان فارسی تشریح کن:"
    query=query_1+query_user
    result= qa_chain({'question': query, 'chat_history': chat_history})
    st.write('Answer of you question:' +result['answer'] +'\n')
    return result
####################################################################################################################
def main():
    st.set_page_config(
        page_title="Chat Bot PDFs",
        page_icon=":books:",
    )
    
    st.header("Chat Bot PDFs :books:")
    user_question = st.text_input("Ask a question about your documents:")
    if st.button("Build Model"):
        with st.spinner("Waiting"):
            document=get_pdf_load()
            st.write("load pdf")
            texts=get_text_split(document)
            st.write("text split")
            db=get_vectorstore(texts)
            st.write("vectore store")
            qa_chain=get_chain(db)
            st.write("compelete build model")
  
    if st.button("Answer"):
            with st.spinner("Answering"):
                get_conversation(query_user=user_question)
                
              
          
    #if st.button("CLEAR"):
            #with st.spinner("CLEARING"):
              #st.cache_data.clear()
         
    
    #with st.sidebar:
        #if st.button("Process build model"):
 


if __name__ == "__main__": 
    main()