darshan8950 commited on
Commit
39729db
1 Parent(s): 7d04a4b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -29
app.py CHANGED
@@ -1,16 +1,19 @@
1
-
2
- from langchain.document_loaders.csv_loader import CSVLoader
3
- from langchain.text_splitter import RecursiveCharacterTextSplitter
4
- from langchain.embeddings import HuggingFaceEmbeddings
5
- from langchain.vectorstores import FAISS
6
- from langchain.llms import CTransformers
7
- from langchain.chains import ConversationalRetrievalChain
8
  import streamlit as st
9
  import tempfile
 
 
 
 
 
 
 
 
 
 
10
 
11
  def main():
12
- st.set_page_config(page_title="👨‍💻 Talk with your CSV")
13
- st.title("👨‍💻 Talk with borrower data")
14
  uploaded_file = st.sidebar.file_uploader("Upload your Data", type="csv")
15
 
16
  query = st.text_input("Send a Message")
@@ -18,6 +21,7 @@ def main():
18
  DB_FAISS_PATH = "vectorstore/db_faiss"
19
 
20
  if uploaded_file :
 
21
  with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
22
  tmp_file.write(uploaded_file.getvalue())
23
  tmp_file_path = tmp_file.name
@@ -26,27 +30,33 @@ def main():
26
  'delimiter': ','})
27
  data = loader.load()
28
  st.write(data)
29
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=40)
30
- text_chunks = text_splitter.split_documents(data)
31
-
32
- embeddings = HuggingFaceEmbeddings(model_name = 'sentence-transformers/all-MiniLM-L6-v2')
33
-
34
- docsearch = FAISS.from_documents(text_chunks, embeddings)
35
-
36
- docsearch = docsearch.as_retriever(search_type="similarity", search_kwargs={"k":1})
37
 
38
- docsearch.save_local(DB_FAISS_PATH)
39
-
40
- llm = CTransformers(model="models/llama-2-7b-chat.ggmlv3.q4_0.bin",
41
- model_type="llama",
42
- max_new_tokens=512,
43
- temperature=0.1)
44
-
45
- qa = ConversationalRetrievalChain.from_llm(llm, retriever=docsearch)
46
-
47
- result = qa(query)
48
- st.write(result)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
  if __name__ == '__main__':
51
  main()
52
-
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import tempfile
3
+ import pandas as pd
4
+ from langchain import HuggingFacePipeline
5
+ from transformers import AutoTokenizer
6
+ from langchain.embeddings import HuggingFaceEmbeddings
7
+ from langchain.document_loaders.csv_loader import CSVLoader
8
+ from langchain.vectorstores import FAISS
9
+ from langchain.chains import RetrievalQA
10
+ import transformers
11
+ import torch
12
+ import textwrap
13
 
14
  def main():
15
+ st.set_page_config(page_title="👨‍💻 Talk with BORROWER data")
16
+ st.title("👨‍💻 Talk with BORROWER data")
17
  uploaded_file = st.sidebar.file_uploader("Upload your Data", type="csv")
18
 
19
  query = st.text_input("Send a Message")
 
21
  DB_FAISS_PATH = "vectorstore/db_faiss"
22
 
23
  if uploaded_file :
24
+ #use tempfile because CSVLoader only accepts a file_path
25
  with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
26
  tmp_file.write(uploaded_file.getvalue())
27
  tmp_file_path = tmp_file.name
 
30
  'delimiter': ','})
31
  data = loader.load()
32
  st.write(data)
 
 
 
 
 
 
 
 
33
 
34
+ model = "daryl149/llama-2-7b-chat-hf"
35
+ tokenizer = AutoTokenizer.from_pretrained(model)
36
+ pipeline = transformers.pipeline("text-generation", #task
37
+ model=model,
38
+ tokenizer=tokenizer,
39
+ torch_dtype=torch.bfloat16,
40
+ trust_remote_code=True,
41
+ device_map="auto",
42
+ max_length=1000,
43
+ do_sample=True,
44
+ top_k=10,
45
+ num_return_sequences=1,
46
+ eos_token_id=tokenizer.eos_token_id
47
+ )
48
+
49
+ llm = HuggingFacePipeline(pipeline = pipeline, model_kwargs = {'temperature':0})
50
+ embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
51
+ vectorstore = FAISS.from_documents(data, embeddings)
52
+
53
+ vectorstore.save_local(DB_FAISS_PATH)
54
+
55
+ chain =  RetrievalQA.from_chain_type(llm=llm, chain_type = "stuff",return_source_documents=True, retriever=vectorstore.as_retriever())
56
+ result=chain(query)
57
+ wrapped_text = textwrap.fill(result['result'], width=500)
58
+
59
+ st.write(wrapped_text)
60
 
61
  if __name__ == '__main__':
62
  main()