ali121300's picture
Update app.py
01481a7 verified
raw
history blame
3.32 kB
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.llms import openai
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import HuggingFaceBgeEmbeddings
import streamlit as st
###########################################################################################
def get_pdf_load():
loader=PyPDFDirectoryLoader("./")
document=loader.load()
return document
########################################################################################
def get_text_split(document):
text_splitter= RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
texts =text_splitter.split_documents(document)
return texts
#########################################################################################
def get_vectorstore(texts):
#Vector and Embeddings
DB_FAISS_PATH = 'vectore_Imstudio/faiss'
#Vector and Embeddings
embeddings= HuggingFaceBgeEmbeddings(model_name='Avditvs/multilingual-e5-small-distill-base-0.1', model_kwargs={'device': 'cpu'})
db= FAISS.from_documents(texts,embeddings)
db.save_local(DB_FAISS_PATH)
return db
############################################################################################
def get_chain(db):
llm=ChatOpenAI(base_url="https://bd4c-85-9-86-142.ngrok-free.app/v1", api_key="lm-studio",temperature=0.1,model="lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF")
#Build a chain
qa_chain = ConversationalRetrievalChain.from_llm(
llm,db.as_retriever (search_kwargs={'k':2}),return_source_documents=True)
return qa_chain
####################################################################################################################
def get_conversation(query_user):
chat_history=[]
query_1="این سوال را به زبان فارسی تشریح کن:"
query=query_1+query_user
result= qa_chain({'question': query, 'chat_history': chat_history})
st.write('Answer of you question:' +result['answer'] +'\n')
return result
####################################################################################################################
def main():
st.set_page_config(
page_title="Chat Bot PDFs",
page_icon=":books:",
)
st.header("Chat Bot PDFs :books:")
user_question = st.text_input("Ask a question about your documents:")
if st.button("Build Model"):
with st.spinner("Waiting"):
document=get_pdf_load()
st.write("load pdf")
texts=get_text_split(document)
st.write("text split")
db=get_vectorstore(texts)
st.write("vectore store")
qa_chain=get_chain(db)
st.write("compelete build model")
if st.button("Answer"):
with st.spinner("Answering"):
get_conversation(query_user=user_question)
#if st.button("CLEAR"):
#with st.spinner("CLEARING"):
#st.cache_data.clear()
#with st.sidebar:
#if st.button("Process build model"):
if __name__ == "__main__":
main()