Spaces:
Sleeping
Sleeping
from langchain.document_loaders import PyPDFDirectoryLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.vectorstores import FAISS | |
from langchain.llms import openai | |
from langchain.chains import ConversationalRetrievalChain | |
from langchain.chat_models import ChatOpenAI | |
from langchain.embeddings import HuggingFaceBgeEmbeddings | |
import streamlit as st | |
########################################################################################### | |
def get_pdf_load(): | |
loader=PyPDFDirectoryLoader("./") | |
document=loader.load() | |
return document | |
######################################################################################## | |
def get_text_split(document): | |
text_splitter= RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) | |
texts =text_splitter.split_documents(document) | |
return texts | |
######################################################################################### | |
def get_vectorstore(texts): | |
#Vector and Embeddings | |
DB_FAISS_PATH = 'vectore_Imstudio/faiss' | |
#Vector and Embeddings | |
embeddings= HuggingFaceBgeEmbeddings(model_name='Avditvs/multilingual-e5-small-distill-base-0.1', model_kwargs={'device': 'cpu'}) | |
db= FAISS.from_documents(texts,embeddings) | |
db.save_local(DB_FAISS_PATH) | |
return db | |
############################################################################################ | |
def get_chain(db): | |
llm=ChatOpenAI(base_url="https://bd4c-85-9-86-142.ngrok-free.app/v1", api_key="lm-studio",temperature=0.1,model="lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF") | |
#Build a chain | |
qa_chain = ConversationalRetrievalChain.from_llm( | |
llm,db.as_retriever (search_kwargs={'k':2}),return_source_documents=True) | |
return qa_chain | |
#################################################################################################################### | |
def get_conversation(query_user): | |
chat_history=[] | |
query_1="این سوال را به زبان فارسی تشریح کن:" | |
query=query_1+query_user | |
result= qa_chain({'question': query, 'chat_history': chat_history}) | |
st.write('Answer of you question:' +result['answer'] +'\n') | |
return result | |
#################################################################################################################### | |
def main(): | |
st.set_page_config( | |
page_title="Chat Bot PDFs", | |
page_icon=":books:", | |
) | |
st.header("Chat Bot PDFs :books:") | |
user_question = st.text_input("Ask a question about your documents:") | |
if st.button("Build Model"): | |
with st.spinner("Waiting"): | |
document=get_pdf_load() | |
st.write("load pdf") | |
texts=get_text_split(document) | |
st.write("text split") | |
db=get_vectorstore(texts) | |
st.write("vectore store") | |
qa_chain=get_chain(db) | |
st.write("compelete build model") | |
if st.button("Answer"): | |
with st.spinner("Answering"): | |
get_conversation(query_user=user_question) | |
#if st.button("CLEAR"): | |
#with st.spinner("CLEARING"): | |
#st.cache_data.clear() | |
#with st.sidebar: | |
#if st.button("Process build model"): | |
if __name__ == "__main__": | |
main() | |