Update app.py
Browse files
app.py
CHANGED
@@ -1,22 +1,20 @@
|
|
1 |
import os
|
2 |
import re
|
3 |
-
import sys
|
4 |
-
import copy
|
5 |
import time
|
|
|
6 |
import numpy as np
|
7 |
import streamlit as st
|
8 |
-
from typing import Optional
|
9 |
-
from stqdm import stqdm
|
10 |
from numpy import ndarray
|
11 |
-
from
|
12 |
from qdrant_client import QdrantClient, models
|
13 |
from fastembed.sparse.splade_pp import supported_splade_models
|
14 |
from fastembed import SparseTextEmbedding, SparseEmbedding
|
15 |
-
from langchain_community.llms.exllamav2 import ExLlamaV2
|
16 |
-
from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
|
17 |
from fastembed_ext import FastEmbedEmbeddingsLc
|
|
|
18 |
from langchain_community.document_loaders.wikipedia import WikipediaLoader
|
19 |
from langchain_community.document_loaders.unstructured import UnstructuredFileLoader
|
|
|
|
|
20 |
from langchain_experimental.text_splitter import SemanticChunker
|
21 |
from langchain_core.documents import Document
|
22 |
from qdrant_client.models import (
|
@@ -27,10 +25,6 @@ from qdrant_client.models import (
|
|
27 |
SearchRequest,
|
28 |
ScoredPoint,
|
29 |
)
|
30 |
-
from langchain_core.prompts import PromptTemplate
|
31 |
-
from langchain.chains.summarize import load_summarize_chain
|
32 |
-
from huggingface_hub import snapshot_download
|
33 |
-
from exllamav2.generator import ExLlamaV2Sampler
|
34 |
|
35 |
MAP_PROMPT = """
|
36 |
You will be given a single passage of a book. This section will be enclosed in triple backticks (```)
|
@@ -50,21 +44,11 @@ The reader should be able to grasp what happened in the book.
|
|
50 |
VERBOSE SUMMARY:
|
51 |
"""
|
52 |
|
53 |
-
supported_splade_models[0] = {
|
54 |
-
"model": "prithivida/Splade_PP_en_v2",
|
55 |
-
"vocab_size": 30522,
|
56 |
-
"description": "Implementation of SPLADE++ Model for English v2",
|
57 |
-
"size_in_GB": 0.532,
|
58 |
-
"sources": {
|
59 |
-
"hf": "devve1/Splade_PP_en_v2_onnx"
|
60 |
-
},
|
61 |
-
"model_file": "model.onnx"
|
62 |
-
}
|
63 |
|
64 |
-
def make_points(chunks: list[str], dense: list[ndarray], sparse)->
|
65 |
points = []
|
66 |
-
for idx, (
|
67 |
-
sparse_vector = SparseVector(indices=
|
68 |
point = PointStruct(
|
69 |
id=idx,
|
70 |
vector={
|
@@ -131,7 +115,7 @@ def rrf(rank_lists, alpha=60, default_rank=1000):
|
|
131 |
return sorted_items
|
132 |
|
133 |
|
134 |
-
def main(query: str, client: QdrantClient, collection_name: str, llm, dense_model, sparse_model):
|
135 |
# name = 'Kia_EV6'
|
136 |
# filepath = os.path.join(os.getcwd(), name + '.pdf')
|
137 |
|
@@ -145,6 +129,7 @@ def main(query: str, client: QdrantClient, collection_name: str, llm, dense_mode
|
|
145 |
# )
|
146 |
|
147 |
# docs = docs.load()
|
|
|
148 |
|
149 |
dense_query = list(dense_model.embed_query(query, 32))
|
150 |
sparse_query = list(sparse_model.embed(query, 32))
|
@@ -166,8 +151,6 @@ def main(query: str, client: QdrantClient, collection_name: str, llm, dense_mode
|
|
166 |
|
167 |
docs = [Document(record.payload['text']) for record in records_list[:3]]
|
168 |
|
169 |
-
print(docs)
|
170 |
-
|
171 |
map_prompt = PromptTemplate(
|
172 |
template=MAP_PROMPT,
|
173 |
input_variables=['text']
|
@@ -198,39 +181,44 @@ def main(query: str, client: QdrantClient, collection_name: str, llm, dense_mode
|
|
198 |
output = reduce_chain.invoke([summaries])
|
199 |
return output['output_text']
|
200 |
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
llm = ExLlamaV2(
|
213 |
-
model_path=model_path,
|
214 |
-
callbacks=callbacks,
|
215 |
-
settings=settings,
|
216 |
-
streaming=True,
|
217 |
-
max_new_tokens=3000
|
218 |
-
)
|
219 |
-
|
220 |
-
provider = ['CPUExecutionProvider']
|
221 |
-
|
222 |
-
sparse_model = SparseTextEmbedding(
|
223 |
-
'Qdrant/bm42-all-minilm-l6-v2-attentions',
|
224 |
-
cache_dir=os.getenv('HF_HOME'),
|
225 |
-
providers=provider
|
226 |
-
)
|
227 |
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
234 |
|
235 |
client = QdrantClient(path=os.getenv('HF_HOME'))
|
236 |
collection_name = 'collection_demo'
|
@@ -262,7 +250,7 @@ def load_models_and_components(show_spinner="Loading models..."):
|
|
262 |
on_disk_payload=True,
|
263 |
optimizers_config=models.OptimizersConfigDiff(
|
264 |
memmap_threshold=10000,
|
265 |
-
|
266 |
),
|
267 |
hnsw_config=models.HnswConfigDiff(
|
268 |
on_disk=True,
|
@@ -270,18 +258,65 @@ def load_models_and_components(show_spinner="Loading models..."):
|
|
270 |
ef_construct=100
|
271 |
)
|
272 |
)
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
283 |
)
|
284 |
-
)
|
285 |
client.update_collection(
|
286 |
collection_name=collection_name,
|
287 |
optimizer_config=models.OptimizersConfigDiff(indexing_threshold=20000)
|
@@ -289,7 +324,7 @@ def load_models_and_components(show_spinner="Loading models..."):
|
|
289 |
|
290 |
return client, collection_name, llm, dense_model, sparse_model
|
291 |
|
292 |
-
def chunk_documents(docs, dense_model, sparse_model
|
293 |
text_splitter = SemanticChunker(
|
294 |
dense_model,
|
295 |
breakpoint_threshold_type='standard_deviation'
|
@@ -297,20 +332,20 @@ def chunk_documents(docs, dense_model, sparse_model, show_spinner="Parsing and c
|
|
297 |
|
298 |
documents = [doc.page_content for doc in text_splitter.transform_documents(list(docs))]
|
299 |
|
300 |
-
dense_embeddings = dense_model.embed_documents(
|
301 |
-
sparse_embeddings = list(sparse_model.embed(
|
302 |
|
303 |
return documents, dense_embeddings, sparse_embeddings
|
304 |
|
305 |
if __name__ == '__main__':
|
306 |
st.set_page_config(page_title="Video Game Assistant",
|
307 |
layout="wide"
|
308 |
-
|
|
|
|
|
309 |
if 'models_loaded' not in st.session_state:
|
310 |
-
st.session_state.client, st.session_state.collection_name, st.session_state.llm, st.session_state.dense_model, st.session_state.sparse_model =
|
311 |
st.session_state.models_loaded = True
|
312 |
-
|
313 |
-
st.title("Video Game Assistant")
|
314 |
|
315 |
if "messages" not in st.session_state:
|
316 |
st.session_state.messages = []
|
@@ -323,13 +358,7 @@ if __name__ == '__main__':
|
|
323 |
st.chat_message("user").markdown(prompt)
|
324 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
325 |
|
326 |
-
|
327 |
-
collection_name = st.session_state.collection_name
|
328 |
-
llm = st.session_state.llm
|
329 |
-
dense_model = st.session_state.dense_model
|
330 |
-
sparse_model = st.session_state.sparse_model
|
331 |
-
|
332 |
-
ai_response = main(prompt, client, collection_name, llm, dense_model, sparse_model)
|
333 |
response = f"Echo: {ai_response}"
|
334 |
with st.chat_message("assistant"):
|
335 |
message_placeholder = st.empty()
|
@@ -338,5 +367,4 @@ if __name__ == '__main__':
|
|
338 |
full_response += chunk + " "
|
339 |
time.sleep(0.01)
|
340 |
message_placeholder.markdown(full_response + "▌")
|
341 |
-
st.session_state.messages.append({"role": "assistant", "content": full_response})
|
342 |
-
|
|
|
1 |
import os
|
2 |
import re
|
|
|
|
|
3 |
import time
|
4 |
+
import msgpack
|
5 |
import numpy as np
|
6 |
import streamlit as st
|
|
|
|
|
7 |
from numpy import ndarray
|
8 |
+
from scipy.sparse import csr_matrix, save_npz, load_npz, vstack
|
9 |
from qdrant_client import QdrantClient, models
|
10 |
from fastembed.sparse.splade_pp import supported_splade_models
|
11 |
from fastembed import SparseTextEmbedding, SparseEmbedding
|
|
|
|
|
12 |
from fastembed_ext import FastEmbedEmbeddingsLc
|
13 |
+
from langchain_community.chat_models.ollama import ChatOllama
|
14 |
from langchain_community.document_loaders.wikipedia import WikipediaLoader
|
15 |
from langchain_community.document_loaders.unstructured import UnstructuredFileLoader
|
16 |
+
from langchain_core.prompts import PromptTemplate
|
17 |
+
from langchain.chains.summarize import load_summarize_chain
|
18 |
from langchain_experimental.text_splitter import SemanticChunker
|
19 |
from langchain_core.documents import Document
|
20 |
from qdrant_client.models import (
|
|
|
25 |
SearchRequest,
|
26 |
ScoredPoint,
|
27 |
)
|
|
|
|
|
|
|
|
|
28 |
|
29 |
MAP_PROMPT = """
|
30 |
You will be given a single passage of a book. This section will be enclosed in triple backticks (```)
|
|
|
44 |
VERBOSE SUMMARY:
|
45 |
"""
|
46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
+
def make_points(chunks: list[str], dense: list[ndarray], sparse: list[SparseEmbedding])-> list[PointStruct]:
|
49 |
points = []
|
50 |
+
for idx, (sparse_vector, chunk, dense_vector) in enumerate(zip(sparse, chunks, dense)):
|
51 |
+
sparse_vector = SparseVector(indices=sparse_vector.indices.tolist(), values=sparse_vector.values.tolist())
|
52 |
point = PointStruct(
|
53 |
id=idx,
|
54 |
vector={
|
|
|
115 |
return sorted_items
|
116 |
|
117 |
|
118 |
+
def main(query: str, client: QdrantClient, collection_name: str, llm, dense_model: FastEmbedEmbeddingsLc, sparse_model: SparseTextEmbedding):
|
119 |
# name = 'Kia_EV6'
|
120 |
# filepath = os.path.join(os.getcwd(), name + '.pdf')
|
121 |
|
|
|
129 |
# )
|
130 |
|
131 |
# docs = docs.load()
|
132 |
+
|
133 |
|
134 |
dense_query = list(dense_model.embed_query(query, 32))
|
135 |
sparse_query = list(sparse_model.embed(query, 32))
|
|
|
151 |
|
152 |
docs = [Document(record.payload['text']) for record in records_list[:3]]
|
153 |
|
|
|
|
|
154 |
map_prompt = PromptTemplate(
|
155 |
template=MAP_PROMPT,
|
156 |
input_variables=['text']
|
|
|
181 |
output = reduce_chain.invoke([summaries])
|
182 |
return output['output_text']
|
183 |
|
184 |
+
def load_models_and_documents():
|
185 |
+
supported_splade_models[0] = {
|
186 |
+
"model": "prithivida/Splade_PP_en_v2",
|
187 |
+
"vocab_size": 30522,
|
188 |
+
"description": "Implementation of SPLADE++ Model for English v2",
|
189 |
+
"size_in_GB": 0.532,
|
190 |
+
"sources": {
|
191 |
+
"hf": "devve1/Splade_PP_en_v2_onnx"
|
192 |
+
},
|
193 |
+
"model_file": "model.onnx"
|
194 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
195 |
|
196 |
+
with st.spinner('Load models...'):
|
197 |
+
settings = ExLlamaV2Sampler.Settings()
|
198 |
+
settings.temperature = 0.75
|
199 |
+
|
200 |
+
model_path = snapshot_download(repo_id='Zoyd/NousResearch_Hermes-2-Theta-Llama-3-8B-6_5bpw_exl2')
|
201 |
+
|
202 |
+
llm = ExLlamaV2(
|
203 |
+
model_path=model_path,
|
204 |
+
settings=settings,
|
205 |
+
max_new_tokens=3000
|
206 |
+
)
|
207 |
+
|
208 |
+
provider = ['CPUExecutionProvider']
|
209 |
+
|
210 |
+
dense_model = FastEmbedEmbeddingsLc(
|
211 |
+
model_name='mixedbread-ai/mxbai-embed-large-v1',
|
212 |
+
providers=provider,
|
213 |
+
cache_dir=os.getenv('HF_HOME'),
|
214 |
+
batch_size=32
|
215 |
+
)
|
216 |
+
|
217 |
+
sparse_model = SparseTextEmbedding(
|
218 |
+
'Qdrant/bm42-all-minilm-l6-v2-attentions',
|
219 |
+
cache_dir=os.getenv('HF_HOME'),
|
220 |
+
providers=provider
|
221 |
+
)
|
222 |
|
223 |
client = QdrantClient(path=os.getenv('HF_HOME'))
|
224 |
collection_name = 'collection_demo'
|
|
|
250 |
on_disk_payload=True,
|
251 |
optimizers_config=models.OptimizersConfigDiff(
|
252 |
memmap_threshold=10000,
|
253 |
+
indexing_threshold=0
|
254 |
),
|
255 |
hnsw_config=models.HnswConfigDiff(
|
256 |
on_disk=True,
|
|
|
258 |
ef_construct=100
|
259 |
)
|
260 |
)
|
261 |
+
|
262 |
+
with st.spinner('Parse and chunk documents...'):
|
263 |
+
name = 'action_rpg'
|
264 |
+
embeddings_path = os.path.join(os.getenv('HF_HOME'), 'collection', 'embeddings')
|
265 |
+
|
266 |
+
chunks_path = os.path.join(embeddings_path, name + '_chunks.msgpack')
|
267 |
+
dense_path = os.path.join(embeddings_path, name + '_dense.npz')
|
268 |
+
sparse_path = os.path.join(embeddings_path, name + '_sparse.npz')
|
269 |
+
|
270 |
+
if not os.path.exists(embeddings_path):
|
271 |
+
os.mkdir(embeddings_path)
|
272 |
+
|
273 |
+
docs = WikipediaLoader(query='Action-RPG').load()
|
274 |
+
chunks, dense_embeddings, sparse_embeddings = chunk_documents(docs, dense_model, sparse_model)
|
275 |
+
|
276 |
+
with open(chunks_path, "wb") as outfile:
|
277 |
+
packed = msgpack.packb(chunks, use_bin_type=True)
|
278 |
+
outfile.write(packed)
|
279 |
+
|
280 |
+
np.savez_compressed(dense_path, *dense_embeddings)
|
281 |
+
max_index = max(np.max(embedding.indices) for embedding in sparse_embeddings)
|
282 |
+
|
283 |
+
sparse_matrices = []
|
284 |
+
for embedding in sparse_embeddings:
|
285 |
+
data = embedding.values
|
286 |
+
indices = embedding.indices
|
287 |
+
indptr = np.array([0, len(data)])
|
288 |
+
matrix = csr_matrix((data, indices, indptr), shape=(1, max_index + 1))
|
289 |
+
sparse_matrices.append(matrix)
|
290 |
+
|
291 |
+
combined_sparse_matrix = vstack(sparse_matrices)
|
292 |
+
save_npz(sparse_path, combined_sparse_matrix)
|
293 |
+
else:
|
294 |
+
with open(chunks_path, "rb") as data_file:
|
295 |
+
byte_data = data_file.read()
|
296 |
+
|
297 |
+
chunks = msgpack.unpackb(byte_data, raw=False)
|
298 |
+
|
299 |
+
dense_embeddings = list(np.load(dense_path).values())
|
300 |
+
|
301 |
+
sparse_embeddings = []
|
302 |
+
loaded_sparse_matrix = load_npz(sparse_path)
|
303 |
+
|
304 |
+
for i in range(loaded_sparse_matrix.shape[0]):
|
305 |
+
row = loaded_sparse_matrix.getrow(i)
|
306 |
+
values = row.data
|
307 |
+
indices = row.indices
|
308 |
+
embedding = SparseEmbedding(values, indices)
|
309 |
+
sparse_embeddings.append(embedding)
|
310 |
+
|
311 |
+
with st.spinner('Save documents...'):
|
312 |
+
client.upsert(
|
313 |
+
collection_name,
|
314 |
+
make_points(
|
315 |
+
chunks,
|
316 |
+
dense_embeddings,
|
317 |
+
sparse_embeddings
|
318 |
+
)
|
319 |
)
|
|
|
320 |
client.update_collection(
|
321 |
collection_name=collection_name,
|
322 |
optimizer_config=models.OptimizersConfigDiff(indexing_threshold=20000)
|
|
|
324 |
|
325 |
return client, collection_name, llm, dense_model, sparse_model
|
326 |
|
327 |
+
def chunk_documents(docs, dense_model, sparse_model):
|
328 |
text_splitter = SemanticChunker(
|
329 |
dense_model,
|
330 |
breakpoint_threshold_type='standard_deviation'
|
|
|
332 |
|
333 |
documents = [doc.page_content for doc in text_splitter.transform_documents(list(docs))]
|
334 |
|
335 |
+
dense_embeddings = dense_model.embed_documents(documents,32)
|
336 |
+
sparse_embeddings = list(sparse_model.embed(documents, 32))
|
337 |
|
338 |
return documents, dense_embeddings, sparse_embeddings
|
339 |
|
340 |
if __name__ == '__main__':
|
341 |
st.set_page_config(page_title="Video Game Assistant",
|
342 |
layout="wide"
|
343 |
+
)
|
344 |
+
st.title("Video Game Assistant :sunglasses:")
|
345 |
+
|
346 |
if 'models_loaded' not in st.session_state:
|
347 |
+
st.session_state.client, st.session_state.collection_name, st.session_state.llm, st.session_state.dense_model, st.session_state.sparse_model = load_models_and_documents()
|
348 |
st.session_state.models_loaded = True
|
|
|
|
|
349 |
|
350 |
if "messages" not in st.session_state:
|
351 |
st.session_state.messages = []
|
|
|
358 |
st.chat_message("user").markdown(prompt)
|
359 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
360 |
|
361 |
+
ai_response = main(prompt, st.session_state.client, st.session_state.collection_name, st.session_state.llm, st.session_state.dense_model, st.session_state.sparse_model)
|
|
|
|
|
|
|
|
|
|
|
|
|
362 |
response = f"Echo: {ai_response}"
|
363 |
with st.chat_message("assistant"):
|
364 |
message_placeholder = st.empty()
|
|
|
367 |
full_response += chunk + " "
|
368 |
time.sleep(0.01)
|
369 |
message_placeholder.markdown(full_response + "▌")
|
370 |
+
st.session_state.messages.append({"role": "assistant", "content": full_response})
|
|