Waflon commited on
Commit
8c29218
·
verified ·
1 Parent(s): 3e1c3a5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -84
app.py CHANGED
@@ -1,10 +1,3 @@
1
- """
2
- Question Answering with Retrieval QA and LangChain Language Models featuring FAISS vector stores.
3
- This script uses the LangChain Language Model API to answer questions using Retrieval QA
4
- and FAISS vector stores. It also uses the Mistral huggingface inference endpoint to
5
- generate responses.
6
- """
7
-
8
  import os
9
  import streamlit as st
10
  from dotenv import load_dotenv
@@ -21,21 +14,7 @@ from langchain.llms import HuggingFaceHub
21
  # set this key as an environment variable
22
  os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets['huggingface_token']
23
 
24
- def get_pdf_text(pdf_docs):
25
- """
26
- Extract text from a list of PDF documents.
27
-
28
- Parameters
29
- ----------
30
- pdf_docs : list
31
- List of PDF documents to extract text from.
32
-
33
- Returns
34
- -------
35
- str
36
- Extracted text from all the PDF documents.
37
-
38
- """
39
  text = ""
40
  for pdf in pdf_docs:
41
  pdf_reader = PdfReader(pdf)
@@ -44,21 +23,7 @@ def get_pdf_text(pdf_docs):
44
  return text
45
 
46
 
47
- def get_text_chunks(text):
48
- """
49
- Split the input text into chunks.
50
-
51
- Parameters
52
- ----------
53
- text : str
54
- The input text to be split.
55
-
56
- Returns
57
- -------
58
- list
59
- List of text chunks.
60
-
61
- """
62
  text_splitter = CharacterTextSplitter(
63
  separator="\n", chunk_size=1500, chunk_overlap=300, length_function=len
64
  )
@@ -66,22 +31,7 @@ def get_text_chunks(text):
66
  return chunks
67
 
68
 
69
- def get_vectorstore(text_chunks):
70
- """
71
- Generate a vector store from a list of text chunks using HuggingFace BgeEmbeddings.
72
-
73
- Parameters
74
- ----------
75
- text_chunks : list
76
- List of text chunks to be embedded.
77
-
78
- Returns
79
- -------
80
- FAISS
81
- A FAISS vector store containing the embeddings of the text chunks.
82
-
83
- """
84
- #model = "BAAI/bge-base-en-v1.5"
85
  model = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
86
  encode_kwargs = {
87
  "normalize_embeddings": True
@@ -93,26 +43,13 @@ def get_vectorstore(text_chunks):
93
  return vectorstore
94
 
95
 
96
- def get_conversation_chain(vectorstore):
97
- """
98
- Create a conversational retrieval chain using a vector store and a language model.
99
-
100
- Parameters
101
- ----------
102
- vectorstore : FAISS
103
- A FAISS vector store containing the embeddings of the text chunks.
104
-
105
- Returns
106
- -------
107
- ConversationalRetrievalChain
108
- A conversational retrieval chain for generating responses.
109
-
110
- """
111
  llm = HuggingFaceHub(
112
- repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
 
113
  model_kwargs={"temperature": 0.5, "max_length": 1048},
114
  )
115
- # llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613")
116
 
117
  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
118
  conversation_chain = ConversationalRetrievalChain.from_llm(
@@ -121,28 +58,18 @@ def get_conversation_chain(vectorstore):
121
  return conversation_chain
122
 
123
 
124
- def handle_userinput(user_question):
125
- """
126
- Handle user input and generate a response using the conversational retrieval chain.
127
- Parameters
128
- ----------
129
- user_question : str
130
- The user's question.
131
- """
132
  response = st.session_state.conversation({"question": user_question})
133
  st.session_state.chat_history = response["chat_history"]
134
 
135
  for i, message in enumerate(st.session_state.chat_history):
136
  if i % 2 == 0:
137
- st.write("//_^ User: " + message.content)
138
  else:
139
  st.write("🤖 ChatBot: " + message.content)
140
 
141
 
142
  def main():
143
- """
144
- Putting it all together.
145
- """
146
  st.set_page_config(
147
  page_title="Chat with a Bot that tries to answer questions about multiple PDFs",
148
  page_icon=":books:",
@@ -153,18 +80,19 @@ def main():
153
 
154
  st.write(css, unsafe_allow_html=True)
155
 
156
-
157
-
158
  if "conversation" not in st.session_state:
159
  st.session_state.conversation = None
160
  if "chat_history" not in st.session_state:
161
  st.session_state.chat_history = None
162
 
 
163
  st.header("Chat with a Bot 🤖🦾 that tries to answer questions about multiple PDFs :books:")
164
  user_question = st.text_input("Ask a question about your documents:")
165
  if user_question:
166
  handle_userinput(user_question)
167
 
 
168
  with st.sidebar:
169
  st.subheader("Your documents")
170
  pdf_docs = st.file_uploader(
 
 
 
 
 
 
 
 
1
  import os
2
  import streamlit as st
3
  from dotenv import load_dotenv
 
14
  # set this key as an environment variable
15
  os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets['huggingface_token']
16
 
17
+ def get_pdf_text(pdf_docs : list) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  text = ""
19
  for pdf in pdf_docs:
20
  pdf_reader = PdfReader(pdf)
 
23
  return text
24
 
25
 
26
+ def get_text_chunks(text:str) ->list:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  text_splitter = CharacterTextSplitter(
28
  separator="\n", chunk_size=1500, chunk_overlap=300, length_function=len
29
  )
 
31
  return chunks
32
 
33
 
34
+ def get_vectorstore(text_chunks : list) -> FAISS:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  model = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
36
  encode_kwargs = {
37
  "normalize_embeddings": True
 
43
  return vectorstore
44
 
45
 
46
+ def get_conversation_chain(vectorstore:FAISS) -> ConversationalRetrievalChain:
47
+ # llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613")
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  llm = HuggingFaceHub(
49
+ #repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
50
+ repo_id="TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF"
51
  model_kwargs={"temperature": 0.5, "max_length": 1048},
52
  )
 
53
 
54
  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
55
  conversation_chain = ConversationalRetrievalChain.from_llm(
 
58
  return conversation_chain
59
 
60
 
61
+ def handle_userinput(user_question:str):
 
 
 
 
 
 
 
62
  response = st.session_state.conversation({"question": user_question})
63
  st.session_state.chat_history = response["chat_history"]
64
 
65
  for i, message in enumerate(st.session_state.chat_history):
66
  if i % 2 == 0:
67
+ st.write(" Usuario: " + message.content)
68
  else:
69
  st.write("🤖 ChatBot: " + message.content)
70
 
71
 
72
  def main():
 
 
 
73
  st.set_page_config(
74
  page_title="Chat with a Bot that tries to answer questions about multiple PDFs",
75
  page_icon=":books:",
 
80
 
81
  st.write(css, unsafe_allow_html=True)
82
 
83
+
 
84
  if "conversation" not in st.session_state:
85
  st.session_state.conversation = None
86
  if "chat_history" not in st.session_state:
87
  st.session_state.chat_history = None
88
 
89
+
90
  st.header("Chat with a Bot 🤖🦾 that tries to answer questions about multiple PDFs :books:")
91
  user_question = st.text_input("Ask a question about your documents:")
92
  if user_question:
93
  handle_userinput(user_question)
94
 
95
+
96
  with st.sidebar:
97
  st.subheader("Your documents")
98
  pdf_docs = st.file_uploader(