from langchain.document_loaders import PyPDFDirectoryLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.vectorstores import FAISS from langchain.llms import openai from langchain.chains import ConversationalRetrievalChain from langchain.chat_models import ChatOpenAI from langchain.embeddings import HuggingFaceBgeEmbeddings ########################################################################################### def get_pdf_load(): loader=PyPDFDirectoryLoader("./data") document=loader.load() return document ######################################################################################## def get_text_split(document): text_splitter= RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) texts =text_splitter.split_documents(document) return texts ######################################################################################### def get_vectorstore(texts): #Vector and Embeddings DB_FAISS_PATH = 'vectore_Imstudio/faiss' #Vector and Embeddings embeddings= HuggingFaceBgeEmbeddings(model_name='Avditvs/multilingual-e5-small-distill-base-0.1', model_kwargs={'device': 'cpu'}) db= FAISS.from_documents(texts,embeddings) db.save_local(DB_FAISS_PATH) return db ############################################################################################ def get_chain(db): llm=ChatOpenAI(base_url="https://bd4c-85-9-86-142.ngrok-free.app/v1", api_key="lm-studio",temperature=0.1,model="lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF") #Build a chain qa_chain = ConversationalRetrievalChain.from_llm( llm,db.as_retriever (search_kwargs={'k':2}),return_source_documents=True) return qa_chain #################################################################################################################### def main(): st.set_page_config( page_title="Chat Bot PDFs", page_icon=":books:", ) st.header("Chat Bot PDFs :books:") user_question = st.text_input("Ask a question about your documents:") if st.button("Build Model"): with st.spinner("Waiting"): document=get_pdf_load() texts=et_text_split(document) db=get_vectorstore(texts) qa_chain=get_chain(db) st.write("compelete build model") if st.button("Answer"): with st.spinner("Answering"): if st.button("CLEAR"): with st.spinner("CLEARING"): st.cache_data.clear() #with st.sidebar: #if st.button("Process build model"): if __name__ == "__main__": main()