Spaces:
Running
on
T4
Running
on
T4
Update app.py
Browse files
app.py
CHANGED
@@ -3,7 +3,6 @@ import re
|
|
3 |
import lz4
|
4 |
import time
|
5 |
import uuid
|
6 |
-
import vllm
|
7 |
import torch
|
8 |
import spacy
|
9 |
import base64
|
@@ -20,10 +19,10 @@ from typing import List, Dict
|
|
20 |
from ppt_chunker import ppt_chunk
|
21 |
from outlines import models, generate
|
22 |
from qdrant_client import QdrantClient
|
23 |
-
from optimum_encoder import OptimumEncoder
|
24 |
from unstructured.cleaners.core import clean
|
25 |
from streamlit_navigation_bar import st_navbar
|
26 |
from vllm.sampling_params import SamplingParams
|
|
|
27 |
from fastembed import SparseTextEmbedding, SparseEmbedding
|
28 |
from unstructured.nlp.tokenize import download_nltk_packages
|
29 |
from huggingface_hub import snapshot_download, hf_hub_download
|
@@ -89,7 +88,7 @@ def transform_query(query: str) -> str:
|
|
89 |
"""
|
90 |
return f'Represent this sentence for searching relevant passages: {query}'
|
91 |
|
92 |
-
def query_hybrid_search(query: str, client: QdrantClient, collection_name: str, dense_model:
|
93 |
dense_embeddings = dense_model.embed_query(transform_query(query))[0]
|
94 |
sparse_embeddings = list(sparse_model.query_embed(query))[0]
|
95 |
|
@@ -103,7 +102,7 @@ def query_hybrid_search(query: str, client: QdrantClient, collection_name: str,
|
|
103 |
with_vectors=False,
|
104 |
with_payload=True,
|
105 |
limit=10,
|
106 |
-
score_threshold=0.
|
107 |
)
|
108 |
|
109 |
def build_prompt_conv():
|
@@ -304,11 +303,15 @@ def load_models_and_documents():
|
|
304 |
container = st.empty()
|
305 |
|
306 |
with container.status("Load AI Models and Prepare Documents...", expanded=True) as status:
|
307 |
-
st.write('Downloading and Loading MixedBread Mxbai Dense Embedding Model
|
308 |
|
309 |
-
dense_model =
|
310 |
-
|
311 |
-
|
|
|
|
|
|
|
|
|
312 |
)
|
313 |
|
314 |
st.write('Downloading and Loading Qdrant BM42 Sparse Embedding Model under ONNX using the CPU...')
|
@@ -319,15 +322,10 @@ def load_models_and_documents():
|
|
319 |
providers=['CPUExecutionProvider']
|
320 |
)
|
321 |
|
322 |
-
st.write('Downloading Mistral Nemo AI Model...')
|
323 |
-
|
324 |
-
model_path = snapshot_download('casperhansen/mistral-nemo-instruct-2407-awq')
|
325 |
-
|
326 |
-
st.write('Loading Mistral Nemo AI Model quantized with AWQ and using Outlines + vLLM Engine as backend...')
|
327 |
|
328 |
-
llm =
|
329 |
-
model=
|
330 |
-
tokenizer=model_path,
|
331 |
tensor_parallel_size=1,
|
332 |
trust_remote_code=True,
|
333 |
enforce_eager=True,
|
@@ -516,7 +514,8 @@ def chunk_documents(texts: List[str], metadatas: List[dict], dense_model: Optimu
|
|
516 |
documents.append(doc.page_content)
|
517 |
|
518 |
start_dense = time.time()
|
519 |
-
dense_embeddings = dense_model.
|
|
|
520 |
end_dense = time.time()
|
521 |
final_dense = end_dense - start_dense
|
522 |
print(f'DENSE TIME: {final_dense}')
|
@@ -529,7 +528,7 @@ def chunk_documents(texts: List[str], metadatas: List[dict], dense_model: Optimu
|
|
529 |
final_sparse = end_sparse - start_sparse
|
530 |
print(f'SPARSE TIME: {final_sparse}')
|
531 |
|
532 |
-
return payload_docs, dense_embeddings, sparse_embeddings
|
533 |
|
534 |
def on_change_documents_only():
|
535 |
if st.session_state.documents_only:
|
|
|
3 |
import lz4
|
4 |
import time
|
5 |
import uuid
|
|
|
6 |
import torch
|
7 |
import spacy
|
8 |
import base64
|
|
|
19 |
from ppt_chunker import ppt_chunk
|
20 |
from outlines import models, generate
|
21 |
from qdrant_client import QdrantClient
|
|
|
22 |
from unstructured.cleaners.core import clean
|
23 |
from streamlit_navigation_bar import st_navbar
|
24 |
from vllm.sampling_params import SamplingParams
|
25 |
+
from vllm import LLM, PoolingParams, PoolingType
|
26 |
from fastembed import SparseTextEmbedding, SparseEmbedding
|
27 |
from unstructured.nlp.tokenize import download_nltk_packages
|
28 |
from huggingface_hub import snapshot_download, hf_hub_download
|
|
|
88 |
"""
|
89 |
return f'Represent this sentence for searching relevant passages: {query}'
|
90 |
|
91 |
+
def query_hybrid_search(query: str, client: QdrantClient, collection_name: str, dense_model: LLM, sparse_model: SparseTextEmbedding):
|
92 |
dense_embeddings = dense_model.embed_query(transform_query(query))[0]
|
93 |
sparse_embeddings = list(sparse_model.query_embed(query))[0]
|
94 |
|
|
|
102 |
with_vectors=False,
|
103 |
with_payload=True,
|
104 |
limit=10,
|
105 |
+
score_threshold=0.95
|
106 |
)
|
107 |
|
108 |
def build_prompt_conv():
|
|
|
303 |
container = st.empty()
|
304 |
|
305 |
with container.status("Load AI Models and Prepare Documents...", expanded=True) as status:
|
306 |
+
st.write('Downloading and Loading MixedBread Mxbai Dense Embedding Model with vLLM as backend...')
|
307 |
|
308 |
+
dense_model = LLM(
|
309 |
+
model='mixedbread-ai/mxbai-embed-large-v1',
|
310 |
+
enforce_eager=True,
|
311 |
+
max_model_len=512,
|
312 |
+
max_num_seqs=32,
|
313 |
+
tensor_parallel_size=1,
|
314 |
+
dtype=torch.float16
|
315 |
)
|
316 |
|
317 |
st.write('Downloading and Loading Qdrant BM42 Sparse Embedding Model under ONNX using the CPU...')
|
|
|
322 |
providers=['CPUExecutionProvider']
|
323 |
)
|
324 |
|
325 |
+
st.write('Downloading and Loading Mistral Nemo AI Model quantized with AWQ and using Outlines + vLLM Engine as backend...')
|
|
|
|
|
|
|
|
|
326 |
|
327 |
+
llm = LLM(
|
328 |
+
model='casperhansen/mistral-nemo-instruct-2407-awq',
|
|
|
329 |
tensor_parallel_size=1,
|
330 |
trust_remote_code=True,
|
331 |
enforce_eager=True,
|
|
|
514 |
documents.append(doc.page_content)
|
515 |
|
516 |
start_dense = time.time()
|
517 |
+
dense_embeddings = dense_model.encode(documents, pooling_params=PoolingParams(pooling_type=PoolingType.MEAN))
|
518 |
+
print(f'DENSE EMBED : {dense_embeddings}')
|
519 |
end_dense = time.time()
|
520 |
final_dense = end_dense - start_dense
|
521 |
print(f'DENSE TIME: {final_dense}')
|
|
|
528 |
final_sparse = end_sparse - start_sparse
|
529 |
print(f'SPARSE TIME: {final_sparse}')
|
530 |
|
531 |
+
return payload_docs, dense_embeddings[0].outputs.embedding, sparse_embeddings
|
532 |
|
533 |
def on_change_documents_only():
|
534 |
if st.session_state.documents_only:
|