chat_csv / app.py
darshan8950's picture
Update app.py
39729db verified
raw
history blame
No virus
2.73 kB
import streamlit as st
import tempfile
import pandas as pd
from langchain import HuggingFacePipeline
from transformers import AutoTokenizer
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
import transformers
import torch
import textwrap
def main():
st.set_page_config(page_title="👨‍💻 Talk with BORROWER data")
st.title("👨‍💻 Talk with BORROWER data")
uploaded_file = st.sidebar.file_uploader("Upload your Data", type="csv")
query = st.text_input("Send a Message")
if st.button("Submit Query", type="primary"):
DB_FAISS_PATH = "vectorstore/db_faiss"
if uploaded_file :
#use tempfile because CSVLoader only accepts a file_path
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
tmp_file.write(uploaded_file.getvalue())
tmp_file_path = tmp_file.name
loader = CSVLoader(file_path=tmp_file_path, encoding="utf-8", csv_args={
'delimiter': ','})
data = loader.load()
st.write(data)
model = "daryl149/llama-2-7b-chat-hf"
tokenizer = AutoTokenizer.from_pretrained(model)
pipeline = transformers.pipeline("text-generation", #task
model=model,
tokenizer=tokenizer,
torch_dtype=torch.bfloat16,
trust_remote_code=True,
device_map="auto",
max_length=1000,
do_sample=True,
top_k=10,
num_return_sequences=1,
eos_token_id=tokenizer.eos_token_id
)
llm = HuggingFacePipeline(pipeline = pipeline, model_kwargs = {'temperature':0})
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
vectorstore = FAISS.from_documents(data, embeddings)
vectorstore.save_local(DB_FAISS_PATH)
chain =  RetrievalQA.from_chain_type(llm=llm, chain_type = "stuff",return_source_documents=True, retriever=vectorstore.as_retriever())
result=chain(query)
wrapped_text = textwrap.fill(result['result'], width=500)
st.write(wrapped_text)
if __name__ == '__main__':
main()