Update app.py
Browse files
app.py
CHANGED
@@ -1,23 +1,23 @@
|
|
1 |
import os
|
2 |
import re
|
3 |
import time
|
|
|
4 |
import msgpack
|
5 |
import numpy as np
|
6 |
import streamlit as st
|
7 |
from numpy import ndarray
|
|
|
8 |
from scipy.sparse import csr_matrix, save_npz, load_npz, vstack
|
9 |
from qdrant_client import QdrantClient, models
|
10 |
-
from fastembed.sparse.splade_pp import supported_splade_models
|
11 |
-
from fastembed import SparseTextEmbedding, SparseEmbedding
|
12 |
-
from fastembed_ext import FastEmbedEmbeddingsLc
|
13 |
from langchain_community.llms.llamacpp import LlamaCpp
|
14 |
from langchain_community.document_loaders.wikipedia import WikipediaLoader
|
15 |
from langchain_community.document_loaders.unstructured import UnstructuredFileLoader
|
16 |
from langchain_core.prompts import PromptTemplate
|
17 |
from langchain.chains.summarize import load_summarize_chain
|
18 |
from langchain_experimental.text_splitter import SemanticChunker
|
|
|
19 |
from langchain_core.documents import Document
|
20 |
-
from huggingface_hub import
|
21 |
from qdrant_client.models import (
|
22 |
NamedSparseVector,
|
23 |
NamedVector,
|
@@ -63,7 +63,7 @@ def make_points(chunks: list[str], dense: list[ndarray], indices, values)-> list
|
|
63 |
points.append(point)
|
64 |
return points
|
65 |
|
66 |
-
def search(client: QdrantClient, collection_name: str, dense
|
67 |
search_results = client.search_batch(
|
68 |
collection_name,
|
69 |
[
|
@@ -78,8 +78,8 @@ def search(client: QdrantClient, collection_name: str, dense: ndarray, sparse: l
|
|
78 |
vector=NamedSparseVector(
|
79 |
name="text-sparse",
|
80 |
vector=SparseVector(
|
81 |
-
indices=
|
82 |
-
values=
|
83 |
),
|
84 |
),
|
85 |
limit=10
|
@@ -132,7 +132,7 @@ def main(query: str, client: QdrantClient, collection_name: str, llm, dense_mode
|
|
132 |
# docs = docs.load()
|
133 |
|
134 |
|
135 |
-
dense_query =
|
136 |
sparse_query = compute_sparse(query, sparse_model, sparse_tokenizer)
|
137 |
|
138 |
search_results = search(
|
@@ -197,28 +197,22 @@ def compute_sparse(sentence, model, tokenizer):
|
|
197 |
max_val, _ = torch.max(weighted_log, dim=1)
|
198 |
vector = max_val.squeeze()
|
199 |
|
200 |
-
cols = vector.nonzero().
|
201 |
-
weights = vector.
|
202 |
|
203 |
return cols, weights
|
204 |
|
205 |
-
def
|
206 |
-
|
207 |
|
208 |
-
def
|
209 |
-
|
210 |
-
"model": "prithivida/Splade_PP_en_v2",
|
211 |
-
"vocab_size": 30522,
|
212 |
-
"description": "Implementation of SPLADE++ Model for English v2",
|
213 |
-
"size_in_GB": 0.532,
|
214 |
-
"sources": {
|
215 |
-
"hf": "devve1/Splade_PP_en_v2_onnx"
|
216 |
-
},
|
217 |
-
"model_file": "model.onnx"
|
218 |
-
}
|
219 |
|
|
|
220 |
with st.spinner('Load models...'):
|
221 |
-
model_path =
|
|
|
|
|
222 |
|
223 |
llm = LlamaCpp(
|
224 |
model_path=model_path,
|
@@ -233,6 +227,11 @@ def load_models_and_documents():
|
|
233 |
reverse_voc = {v: k for k, v in tokenizer.vocab.items()}
|
234 |
sparse_model = AutoModelForMaskedLM.from_pretrained('prithivida/Splade_PP_en_v2')
|
235 |
|
|
|
|
|
|
|
|
|
|
|
236 |
client = QdrantClient(path=os.getenv('HF_HOME'))
|
237 |
collection_name = 'collection_demo'
|
238 |
|
@@ -344,7 +343,7 @@ def chunk_documents(docs, dense_model, sparse_model, sparse_tokenizer):
|
|
344 |
|
345 |
documents = [doc.page_content for doc in text_splitter.transform_documents(list(docs))]
|
346 |
|
347 |
-
dense_embeddings =
|
348 |
indices, values = compute_sparse(documents, sparse_model, sparse_tokenizer)
|
349 |
|
350 |
return documents, dense_embeddings, indices, values
|
|
|
1 |
import os
|
2 |
import re
|
3 |
import time
|
4 |
+
import torch
|
5 |
import msgpack
|
6 |
import numpy as np
|
7 |
import streamlit as st
|
8 |
from numpy import ndarray
|
9 |
+
from transformers import AutoModelForMaskedLM, AutoTokenizer
|
10 |
from scipy.sparse import csr_matrix, save_npz, load_npz, vstack
|
11 |
from qdrant_client import QdrantClient, models
|
|
|
|
|
|
|
12 |
from langchain_community.llms.llamacpp import LlamaCpp
|
13 |
from langchain_community.document_loaders.wikipedia import WikipediaLoader
|
14 |
from langchain_community.document_loaders.unstructured import UnstructuredFileLoader
|
15 |
from langchain_core.prompts import PromptTemplate
|
16 |
from langchain.chains.summarize import load_summarize_chain
|
17 |
from langchain_experimental.text_splitter import SemanticChunker
|
18 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
19 |
from langchain_core.documents import Document
|
20 |
+
from huggingface_hub import hf_hub_download
|
21 |
from qdrant_client.models import (
|
22 |
NamedSparseVector,
|
23 |
NamedVector,
|
|
|
63 |
points.append(point)
|
64 |
return points
|
65 |
|
66 |
+
def search(client: QdrantClient, collection_name: str, dense, indices, values):
|
67 |
search_results = client.search_batch(
|
68 |
collection_name,
|
69 |
[
|
|
|
78 |
vector=NamedSparseVector(
|
79 |
name="text-sparse",
|
80 |
vector=SparseVector(
|
81 |
+
indices=indices.tolist(),
|
82 |
+
values=values.tolist(),
|
83 |
),
|
84 |
),
|
85 |
limit=10
|
|
|
132 |
# docs = docs.load()
|
133 |
|
134 |
|
135 |
+
dense_query = compute_dense_query(query, dense_model)
|
136 |
sparse_query = compute_sparse(query, sparse_model, sparse_tokenizer)
|
137 |
|
138 |
search_results = search(
|
|
|
197 |
max_val, _ = torch.max(weighted_log, dim=1)
|
198 |
vector = max_val.squeeze()
|
199 |
|
200 |
+
cols = vector.nonzero().squeeze().tolist()
|
201 |
+
weights = vector[cols].tolist()
|
202 |
|
203 |
return cols, weights
|
204 |
|
205 |
+
def compute_dense_query(sentence, model):
|
206 |
+
return model.embed_query(f'Represent this sentence for searching relevant passages: {sentence}')
|
207 |
|
208 |
+
def compute_dense_docs(docs, model):
|
209 |
+
return model.embed_documents(docs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
210 |
|
211 |
+
def load_models_and_documents():
|
212 |
with st.spinner('Load models...'):
|
213 |
+
model_path = hf_hub_download(repo_id='NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF',
|
214 |
+
filename='Hermes-2-Pro-Llama-3-Instruct-Merged-DPO-Q8_0.gguf'
|
215 |
+
)
|
216 |
|
217 |
llm = LlamaCpp(
|
218 |
model_path=model_path,
|
|
|
227 |
reverse_voc = {v: k for k, v in tokenizer.vocab.items()}
|
228 |
sparse_model = AutoModelForMaskedLM.from_pretrained('prithivida/Splade_PP_en_v2')
|
229 |
|
230 |
+
dense_model = HuggingFaceEmbeddings(model_name='mixedbread-ai/mxbai-embed-large-v1',
|
231 |
+
cache_folder=os.getenv('HF_HOME'),
|
232 |
+
model_kwargs={'truncate_dim':512}
|
233 |
+
)
|
234 |
+
|
235 |
client = QdrantClient(path=os.getenv('HF_HOME'))
|
236 |
collection_name = 'collection_demo'
|
237 |
|
|
|
343 |
|
344 |
documents = [doc.page_content for doc in text_splitter.transform_documents(list(docs))]
|
345 |
|
346 |
+
dense_embeddings = compute_dense_docs(documents, dense_model)
|
347 |
indices, values = compute_sparse(documents, sparse_model, sparse_tokenizer)
|
348 |
|
349 |
return documents, dense_embeddings, indices, values
|