devve1 commited on
Commit
6143b5b
1 Parent(s): 6d74746

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +115 -87
app.py CHANGED
@@ -1,22 +1,20 @@
1
  import os
2
  import re
3
- import sys
4
- import copy
5
  import time
 
6
  import numpy as np
7
  import streamlit as st
8
- from typing import Optional
9
- from stqdm import stqdm
10
  from numpy import ndarray
11
- from typing import Iterable
12
  from qdrant_client import QdrantClient, models
13
  from fastembed.sparse.splade_pp import supported_splade_models
14
  from fastembed import SparseTextEmbedding, SparseEmbedding
15
- from langchain_community.llms.exllamav2 import ExLlamaV2
16
- from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
17
  from fastembed_ext import FastEmbedEmbeddingsLc
 
18
  from langchain_community.document_loaders.wikipedia import WikipediaLoader
19
  from langchain_community.document_loaders.unstructured import UnstructuredFileLoader
 
 
20
  from langchain_experimental.text_splitter import SemanticChunker
21
  from langchain_core.documents import Document
22
  from qdrant_client.models import (
@@ -27,10 +25,6 @@ from qdrant_client.models import (
27
  SearchRequest,
28
  ScoredPoint,
29
  )
30
- from langchain_core.prompts import PromptTemplate
31
- from langchain.chains.summarize import load_summarize_chain
32
- from huggingface_hub import snapshot_download
33
- from exllamav2.generator import ExLlamaV2Sampler
34
 
35
  MAP_PROMPT = """
36
  You will be given a single passage of a book. This section will be enclosed in triple backticks (```)
@@ -50,21 +44,11 @@ The reader should be able to grasp what happened in the book.
50
  VERBOSE SUMMARY:
51
  """
52
 
53
- supported_splade_models[0] = {
54
- "model": "prithivida/Splade_PP_en_v2",
55
- "vocab_size": 30522,
56
- "description": "Implementation of SPLADE++ Model for English v2",
57
- "size_in_GB": 0.532,
58
- "sources": {
59
- "hf": "devve1/Splade_PP_en_v2_onnx"
60
- },
61
- "model_file": "model.onnx"
62
- }
63
 
64
- def make_points(chunks: list[str], dense: list[ndarray], sparse)-> Iterable[PointStruct]:
65
  points = []
66
- for idx, (sparse_vec, chunk, dense_vector) in enumerate(zip(sparse, chunks, dense)):
67
- sparse_vector = SparseVector(indices=sparse_vec.indices.tolist(), values=sparse_vec.values.tolist())
68
  point = PointStruct(
69
  id=idx,
70
  vector={
@@ -131,7 +115,7 @@ def rrf(rank_lists, alpha=60, default_rank=1000):
131
  return sorted_items
132
 
133
 
134
- def main(query: str, client: QdrantClient, collection_name: str, llm, dense_model, sparse_model):
135
  # name = 'Kia_EV6'
136
  # filepath = os.path.join(os.getcwd(), name + '.pdf')
137
 
@@ -145,6 +129,7 @@ def main(query: str, client: QdrantClient, collection_name: str, llm, dense_mode
145
  # )
146
 
147
  # docs = docs.load()
 
148
 
149
  dense_query = list(dense_model.embed_query(query, 32))
150
  sparse_query = list(sparse_model.embed(query, 32))
@@ -166,8 +151,6 @@ def main(query: str, client: QdrantClient, collection_name: str, llm, dense_mode
166
 
167
  docs = [Document(record.payload['text']) for record in records_list[:3]]
168
 
169
- print(docs)
170
-
171
  map_prompt = PromptTemplate(
172
  template=MAP_PROMPT,
173
  input_variables=['text']
@@ -198,39 +181,44 @@ def main(query: str, client: QdrantClient, collection_name: str, llm, dense_mode
198
  output = reduce_chain.invoke([summaries])
199
  return output['output_text']
200
 
201
- @st.cache_resource
202
- def load_models_and_components(show_spinner="Loading models..."):
203
- settings = ExLlamaV2Sampler.Settings()
204
- settings.temperature = 0.75
205
- settings.top_k = 50
206
- settings.top_p = 0.8
207
- settings.token_repetition_penalty = 1.05
208
-
209
- model_path = snapshot_download(repo_id='Zoyd/NousResearch_Hermes-2-Theta-Llama-3-8B-6_5bpw_exl2')
210
- callbacks = [StreamingStdOutCallbackHandler()]
211
-
212
- llm = ExLlamaV2(
213
- model_path=model_path,
214
- callbacks=callbacks,
215
- settings=settings,
216
- streaming=True,
217
- max_new_tokens=3000
218
- )
219
-
220
- provider = ['CPUExecutionProvider']
221
-
222
- sparse_model = SparseTextEmbedding(
223
- 'Qdrant/bm42-all-minilm-l6-v2-attentions',
224
- cache_dir=os.getenv('HF_HOME'),
225
- providers=provider
226
- )
227
 
228
- dense_model = FastEmbedEmbeddingsLc(
229
- model_name='mixedbread-ai/mxbai-embed-large-v1',
230
- providers=provider,
231
- cache_dir=os.getenv('HF_HOME'),
232
- batch_size=32
233
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
 
235
  client = QdrantClient(path=os.getenv('HF_HOME'))
236
  collection_name = 'collection_demo'
@@ -262,7 +250,7 @@ def load_models_and_components(show_spinner="Loading models..."):
262
  on_disk_payload=True,
263
  optimizers_config=models.OptimizersConfigDiff(
264
  memmap_threshold=10000,
265
- indexing_treshold=0
266
  ),
267
  hnsw_config=models.HnswConfigDiff(
268
  on_disk=True,
@@ -270,18 +258,65 @@ def load_models_and_components(show_spinner="Loading models..."):
270
  ef_construct=100
271
  )
272
  )
273
-
274
- docs = WikipediaLoader(query='Action-RPG').load()
275
- chunks, dense, sparse = chunk_documents(docs, dense_model, sparse_model)
276
-
277
- client.upsert(
278
- collection_name,
279
- make_points(
280
- chunks,
281
- dense,
282
- sparse
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
  )
284
- )
285
  client.update_collection(
286
  collection_name=collection_name,
287
  optimizer_config=models.OptimizersConfigDiff(indexing_threshold=20000)
@@ -289,7 +324,7 @@ def load_models_and_components(show_spinner="Loading models..."):
289
 
290
  return client, collection_name, llm, dense_model, sparse_model
291
 
292
- def chunk_documents(docs, dense_model, sparse_model, show_spinner="Parsing and chunking texts..."):
293
  text_splitter = SemanticChunker(
294
  dense_model,
295
  breakpoint_threshold_type='standard_deviation'
@@ -297,20 +332,20 @@ def chunk_documents(docs, dense_model, sparse_model, show_spinner="Parsing and c
297
 
298
  documents = [doc.page_content for doc in text_splitter.transform_documents(list(docs))]
299
 
300
- dense_embeddings = dense_model.embed_documents(stqdm(documents,desc='Generate dense embeddings...', backend=True), 32)
301
- sparse_embeddings = list(sparse_model.embed(stqdm(documents, desc='Generate sparse embeddings...', backend=True), 32))
302
 
303
  return documents, dense_embeddings, sparse_embeddings
304
 
305
  if __name__ == '__main__':
306
  st.set_page_config(page_title="Video Game Assistant",
307
  layout="wide"
308
- )
 
 
309
  if 'models_loaded' not in st.session_state:
310
- st.session_state.client, st.session_state.collection_name, st.session_state.llm, st.session_state.dense_model, st.session_state.sparse_model = load_models_and_components()
311
  st.session_state.models_loaded = True
312
-
313
- st.title("Video Game Assistant")
314
 
315
  if "messages" not in st.session_state:
316
  st.session_state.messages = []
@@ -323,13 +358,7 @@ if __name__ == '__main__':
323
  st.chat_message("user").markdown(prompt)
324
  st.session_state.messages.append({"role": "user", "content": prompt})
325
 
326
- client = st.session_state.client
327
- collection_name = st.session_state.collection_name
328
- llm = st.session_state.llm
329
- dense_model = st.session_state.dense_model
330
- sparse_model = st.session_state.sparse_model
331
-
332
- ai_response = main(prompt, client, collection_name, llm, dense_model, sparse_model)
333
  response = f"Echo: {ai_response}"
334
  with st.chat_message("assistant"):
335
  message_placeholder = st.empty()
@@ -338,5 +367,4 @@ if __name__ == '__main__':
338
  full_response += chunk + " "
339
  time.sleep(0.01)
340
  message_placeholder.markdown(full_response + "▌")
341
- st.session_state.messages.append({"role": "assistant", "content": full_response})
342
-
 
1
  import os
2
  import re
 
 
3
  import time
4
+ import msgpack
5
  import numpy as np
6
  import streamlit as st
 
 
7
  from numpy import ndarray
8
+ from scipy.sparse import csr_matrix, save_npz, load_npz, vstack
9
  from qdrant_client import QdrantClient, models
10
  from fastembed.sparse.splade_pp import supported_splade_models
11
  from fastembed import SparseTextEmbedding, SparseEmbedding
 
 
12
  from fastembed_ext import FastEmbedEmbeddingsLc
13
+ from langchain_community.chat_models.ollama import ChatOllama
14
  from langchain_community.document_loaders.wikipedia import WikipediaLoader
15
  from langchain_community.document_loaders.unstructured import UnstructuredFileLoader
16
+ from langchain_core.prompts import PromptTemplate
17
+ from langchain.chains.summarize import load_summarize_chain
18
  from langchain_experimental.text_splitter import SemanticChunker
19
  from langchain_core.documents import Document
20
  from qdrant_client.models import (
 
25
  SearchRequest,
26
  ScoredPoint,
27
  )
 
 
 
 
28
 
29
  MAP_PROMPT = """
30
  You will be given a single passage of a book. This section will be enclosed in triple backticks (```)
 
44
  VERBOSE SUMMARY:
45
  """
46
 
 
 
 
 
 
 
 
 
 
 
47
 
48
+ def make_points(chunks: list[str], dense: list[ndarray], sparse: list[SparseEmbedding])-> list[PointStruct]:
49
  points = []
50
+ for idx, (sparse_vector, chunk, dense_vector) in enumerate(zip(sparse, chunks, dense)):
51
+ sparse_vector = SparseVector(indices=sparse_vector.indices.tolist(), values=sparse_vector.values.tolist())
52
  point = PointStruct(
53
  id=idx,
54
  vector={
 
115
  return sorted_items
116
 
117
 
118
+ def main(query: str, client: QdrantClient, collection_name: str, llm, dense_model: FastEmbedEmbeddingsLc, sparse_model: SparseTextEmbedding):
119
  # name = 'Kia_EV6'
120
  # filepath = os.path.join(os.getcwd(), name + '.pdf')
121
 
 
129
  # )
130
 
131
  # docs = docs.load()
132
+
133
 
134
  dense_query = list(dense_model.embed_query(query, 32))
135
  sparse_query = list(sparse_model.embed(query, 32))
 
151
 
152
  docs = [Document(record.payload['text']) for record in records_list[:3]]
153
 
 
 
154
  map_prompt = PromptTemplate(
155
  template=MAP_PROMPT,
156
  input_variables=['text']
 
181
  output = reduce_chain.invoke([summaries])
182
  return output['output_text']
183
 
184
+ def load_models_and_documents():
185
+ supported_splade_models[0] = {
186
+ "model": "prithivida/Splade_PP_en_v2",
187
+ "vocab_size": 30522,
188
+ "description": "Implementation of SPLADE++ Model for English v2",
189
+ "size_in_GB": 0.532,
190
+ "sources": {
191
+ "hf": "devve1/Splade_PP_en_v2_onnx"
192
+ },
193
+ "model_file": "model.onnx"
194
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
 
196
+ with st.spinner('Load models...'):
197
+ settings = ExLlamaV2Sampler.Settings()
198
+ settings.temperature = 0.75
199
+
200
+ model_path = snapshot_download(repo_id='Zoyd/NousResearch_Hermes-2-Theta-Llama-3-8B-6_5bpw_exl2')
201
+
202
+ llm = ExLlamaV2(
203
+ model_path=model_path,
204
+ settings=settings,
205
+ max_new_tokens=3000
206
+ )
207
+
208
+ provider = ['CPUExecutionProvider']
209
+
210
+ dense_model = FastEmbedEmbeddingsLc(
211
+ model_name='mixedbread-ai/mxbai-embed-large-v1',
212
+ providers=provider,
213
+ cache_dir=os.getenv('HF_HOME'),
214
+ batch_size=32
215
+ )
216
+
217
+ sparse_model = SparseTextEmbedding(
218
+ 'Qdrant/bm42-all-minilm-l6-v2-attentions',
219
+ cache_dir=os.getenv('HF_HOME'),
220
+ providers=provider
221
+ )
222
 
223
  client = QdrantClient(path=os.getenv('HF_HOME'))
224
  collection_name = 'collection_demo'
 
250
  on_disk_payload=True,
251
  optimizers_config=models.OptimizersConfigDiff(
252
  memmap_threshold=10000,
253
+ indexing_threshold=0
254
  ),
255
  hnsw_config=models.HnswConfigDiff(
256
  on_disk=True,
 
258
  ef_construct=100
259
  )
260
  )
261
+
262
+ with st.spinner('Parse and chunk documents...'):
263
+ name = 'action_rpg'
264
+ embeddings_path = os.path.join(os.getenv('HF_HOME'), 'collection', 'embeddings')
265
+
266
+ chunks_path = os.path.join(embeddings_path, name + '_chunks.msgpack')
267
+ dense_path = os.path.join(embeddings_path, name + '_dense.npz')
268
+ sparse_path = os.path.join(embeddings_path, name + '_sparse.npz')
269
+
270
+ if not os.path.exists(embeddings_path):
271
+ os.mkdir(embeddings_path)
272
+
273
+ docs = WikipediaLoader(query='Action-RPG').load()
274
+ chunks, dense_embeddings, sparse_embeddings = chunk_documents(docs, dense_model, sparse_model)
275
+
276
+ with open(chunks_path, "wb") as outfile:
277
+ packed = msgpack.packb(chunks, use_bin_type=True)
278
+ outfile.write(packed)
279
+
280
+ np.savez_compressed(dense_path, *dense_embeddings)
281
+ max_index = max(np.max(embedding.indices) for embedding in sparse_embeddings)
282
+
283
+ sparse_matrices = []
284
+ for embedding in sparse_embeddings:
285
+ data = embedding.values
286
+ indices = embedding.indices
287
+ indptr = np.array([0, len(data)])
288
+ matrix = csr_matrix((data, indices, indptr), shape=(1, max_index + 1))
289
+ sparse_matrices.append(matrix)
290
+
291
+ combined_sparse_matrix = vstack(sparse_matrices)
292
+ save_npz(sparse_path, combined_sparse_matrix)
293
+ else:
294
+ with open(chunks_path, "rb") as data_file:
295
+ byte_data = data_file.read()
296
+
297
+ chunks = msgpack.unpackb(byte_data, raw=False)
298
+
299
+ dense_embeddings = list(np.load(dense_path).values())
300
+
301
+ sparse_embeddings = []
302
+ loaded_sparse_matrix = load_npz(sparse_path)
303
+
304
+ for i in range(loaded_sparse_matrix.shape[0]):
305
+ row = loaded_sparse_matrix.getrow(i)
306
+ values = row.data
307
+ indices = row.indices
308
+ embedding = SparseEmbedding(values, indices)
309
+ sparse_embeddings.append(embedding)
310
+
311
+ with st.spinner('Save documents...'):
312
+ client.upsert(
313
+ collection_name,
314
+ make_points(
315
+ chunks,
316
+ dense_embeddings,
317
+ sparse_embeddings
318
+ )
319
  )
 
320
  client.update_collection(
321
  collection_name=collection_name,
322
  optimizer_config=models.OptimizersConfigDiff(indexing_threshold=20000)
 
324
 
325
  return client, collection_name, llm, dense_model, sparse_model
326
 
327
+ def chunk_documents(docs, dense_model, sparse_model):
328
  text_splitter = SemanticChunker(
329
  dense_model,
330
  breakpoint_threshold_type='standard_deviation'
 
332
 
333
  documents = [doc.page_content for doc in text_splitter.transform_documents(list(docs))]
334
 
335
+ dense_embeddings = dense_model.embed_documents(documents,32)
336
+ sparse_embeddings = list(sparse_model.embed(documents, 32))
337
 
338
  return documents, dense_embeddings, sparse_embeddings
339
 
340
  if __name__ == '__main__':
341
  st.set_page_config(page_title="Video Game Assistant",
342
  layout="wide"
343
+ )
344
+ st.title("Video Game Assistant :sunglasses:")
345
+
346
  if 'models_loaded' not in st.session_state:
347
+ st.session_state.client, st.session_state.collection_name, st.session_state.llm, st.session_state.dense_model, st.session_state.sparse_model = load_models_and_documents()
348
  st.session_state.models_loaded = True
 
 
349
 
350
  if "messages" not in st.session_state:
351
  st.session_state.messages = []
 
358
  st.chat_message("user").markdown(prompt)
359
  st.session_state.messages.append({"role": "user", "content": prompt})
360
 
361
+ ai_response = main(prompt, st.session_state.client, st.session_state.collection_name, st.session_state.llm, st.session_state.dense_model, st.session_state.sparse_model)
 
 
 
 
 
 
362
  response = f"Echo: {ai_response}"
363
  with st.chat_message("assistant"):
364
  message_placeholder = st.empty()
 
367
  full_response += chunk + " "
368
  time.sleep(0.01)
369
  message_placeholder.markdown(full_response + "▌")
370
+ st.session_state.messages.append({"role": "assistant", "content": full_response})