import streamlit as st import tempfile import pandas as pd from langchain import HuggingFacePipeline from transformers import AutoTokenizer from langchain.embeddings import HuggingFaceEmbeddings from langchain.document_loaders.csv_loader import CSVLoader from langchain.vectorstores import FAISS from langchain.chains import RetrievalQA import transformers import torch import textwrap def main(): st.set_page_config(page_title="👨‍💻 Talk with BORROWER data") st.title("👨‍💻 Talk with BORROWER data") uploaded_file = st.sidebar.file_uploader("Upload your Data", type="csv") query = st.text_input("Send a Message") if st.button("Submit Query", type="primary"): DB_FAISS_PATH = "vectorstore/db_faiss" if uploaded_file : #use tempfile because CSVLoader only accepts a file_path with tempfile.NamedTemporaryFile(delete=False) as tmp_file: tmp_file.write(uploaded_file.getvalue()) tmp_file_path = tmp_file.name loader = CSVLoader(file_path=tmp_file_path, encoding="utf-8", csv_args={ 'delimiter': ','}) data = loader.load() st.write(data) model = "daryl149/llama-2-7b-chat-hf" tokenizer = AutoTokenizer.from_pretrained(model) pipeline = transformers.pipeline("text-generation", #task model=model, tokenizer=tokenizer, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto", max_length=1000, do_sample=True, top_k=10, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id ) llm = HuggingFacePipeline(pipeline = pipeline, model_kwargs = {'temperature':0}) embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2') vectorstore = FAISS.from_documents(data, embeddings) vectorstore.save_local(DB_FAISS_PATH) chain =  RetrievalQA.from_chain_type(llm=llm, chain_type = "stuff",return_source_documents=True, retriever=vectorstore.as_retriever()) result=chain(query) wrapped_text = textwrap.fill(result['result'], width=500) st.write(wrapped_text) if __name__ == '__main__': main()