Update app.py
Browse files
app.py
CHANGED
@@ -12,7 +12,7 @@ from typing import List
|
|
12 |
from numpy import ndarray
|
13 |
from llama_cpp import Llama
|
14 |
from statistical_chunker import StatisticalChunker
|
15 |
-
from
|
16 |
from scipy.sparse import csr_matrix, save_npz, load_npz, vstack
|
17 |
from qdrant_client import QdrantClient, models
|
18 |
from langchain_community.document_loaders import WikipediaLoader, WebBaseLoader
|
@@ -30,27 +30,22 @@ from pymilvus import (
|
|
30 |
RRFRanker
|
31 |
)
|
32 |
|
|
|
|
|
|
|
|
|
|
|
33 |
def query_hybrid_search(col: Collection, query: str):
|
34 |
-
|
|
|
35 |
|
36 |
-
sparse_req = AnnSearchRequest(
|
37 |
-
|
38 |
-
)
|
39 |
-
dense_req = AnnSearchRequest(
|
40 |
-
query_embeddings["dense"], "dense_vector", {"metric_type": "COSINE"}, limit=2
|
41 |
-
)
|
42 |
|
43 |
-
res = col.hybrid_search(
|
44 |
-
[sparse_req, dense_req], rerank=RRFRanker(), limit=3, output_fields=["text"]
|
45 |
-
)
|
46 |
|
47 |
return res
|
48 |
|
49 |
-
def transform_query(query: str) -> str:
|
50 |
-
""" For retrieval, add the prompt for query (not for documents).
|
51 |
-
"""
|
52 |
-
return f'Represent this sentence for searching relevant passages: {query}'
|
53 |
-
|
54 |
def main(query: str, client: QdrantClient, collection_name: str, llm, dense_model: FastEmbedEncoder, sparse_model: SparseTextEmbedding):
|
55 |
dense_query = list(dense_model(query,32))
|
56 |
sparse_query = list(sparse_model.embed(query, 32))
|
@@ -133,7 +128,7 @@ def load_models_and_documents():
|
|
133 |
n_gpu_layers=32
|
134 |
)
|
135 |
|
136 |
-
dense_model =
|
137 |
name='mixedbread-ai/mxbai-embed-large-v1',
|
138 |
device='cuda'
|
139 |
)
|
@@ -150,7 +145,6 @@ def load_models_and_documents():
|
|
150 |
collection_name = 'collection_demo'
|
151 |
|
152 |
fields = [
|
153 |
-
FieldSchema(name="pk", dtype=DataType.VARCHAR, is_primary=True, auto_id=True, max_length=100),
|
154 |
FieldSchema(name="text", dtype=DataType.VARCHAR, max_length=8192),
|
155 |
FieldSchema(name="sparse_vector", dtype=DataType.SPARSE_FLOAT_VECTOR),
|
156 |
FieldSchema(name="dense_vector", dtype=DataType.FLOAT_VECTOR, dim=1024)
|
@@ -159,49 +153,10 @@ def load_models_and_documents():
|
|
159 |
schema = CollectionSchema(fields, "")
|
160 |
col = Collection(collection_name, schema)
|
161 |
|
162 |
-
sparse_index = {"index_type": "
|
163 |
dense_index = {"index_type": "FLAT", "metric_type": "COSINE"}
|
164 |
col.create_index("sparse_vector", sparse_index)
|
165 |
col.create_index("dense_vector", dense_index)
|
166 |
-
|
167 |
-
entities = [
|
168 |
-
docs,
|
169 |
-
docs_embeddings["sparse"],
|
170 |
-
docs_embeddings["dense"]
|
171 |
-
]
|
172 |
-
|
173 |
-
client.create_collection(
|
174 |
-
collection_name,
|
175 |
-
{
|
176 |
-
"text-dense": models.VectorParams(
|
177 |
-
size=1024,
|
178 |
-
distance=models.Distance.COSINE,
|
179 |
-
on_disk=False,
|
180 |
-
quantization_config=models.BinaryQuantization(
|
181 |
-
binary=models.BinaryQuantizationConfig(
|
182 |
-
always_ram=True
|
183 |
-
)
|
184 |
-
)
|
185 |
-
)
|
186 |
-
},
|
187 |
-
{
|
188 |
-
"text-sparse": models.SparseVectorParams(
|
189 |
-
index=models.SparseIndexParams(
|
190 |
-
on_disk=False
|
191 |
-
)
|
192 |
-
)
|
193 |
-
},
|
194 |
-
2,
|
195 |
-
optimizers_config=models.OptimizersConfigDiff(
|
196 |
-
indexing_threshold=0,
|
197 |
-
default_segment_number=4
|
198 |
-
),
|
199 |
-
hnsw_config=models.HnswConfigDiff(
|
200 |
-
on_disk=False,
|
201 |
-
m=64,
|
202 |
-
ef_construct=512
|
203 |
-
)
|
204 |
-
)
|
205 |
|
206 |
with st.spinner('Parse and chunk documents...'):
|
207 |
name = 'action_rpg'
|
|
|
12 |
from numpy import ndarray
|
13 |
from llama_cpp import Llama
|
14 |
from statistical_chunker import StatisticalChunker
|
15 |
+
from semantic_router.encoders.huggingface import HuggingFaceEncoder
|
16 |
from scipy.sparse import csr_matrix, save_npz, load_npz, vstack
|
17 |
from qdrant_client import QdrantClient, models
|
18 |
from langchain_community.document_loaders import WikipediaLoader, WebBaseLoader
|
|
|
30 |
RRFRanker
|
31 |
)
|
32 |
|
33 |
+
def transform_query(query: str) -> str:
|
34 |
+
""" For retrieval, add the prompt for query (not for documents).
|
35 |
+
"""
|
36 |
+
return f'Represent this sentence for searching relevant passages: {query}'
|
37 |
+
|
38 |
def query_hybrid_search(col: Collection, query: str):
|
39 |
+
query_dense_embeddings = dense_model(transform_query(query))
|
40 |
+
query_sparse_embeddings = sparse_model.encode(query)
|
41 |
|
42 |
+
sparse_req = AnnSearchRequest(query_sparse_embeddings, "sparse_vector", {"metric_type": "IP"}, limit=10)
|
43 |
+
dense_req = AnnSearchRequest(query_dense_embeddings, "dense_vector", {"metric_type": "COSINE"}, limit=10)
|
|
|
|
|
|
|
|
|
44 |
|
45 |
+
res = col.hybrid_search([sparse_req, dense_req], rerank=RRFRanker(), limit=3, output_fields=["text"])
|
|
|
|
|
46 |
|
47 |
return res
|
48 |
|
|
|
|
|
|
|
|
|
|
|
49 |
def main(query: str, client: QdrantClient, collection_name: str, llm, dense_model: FastEmbedEncoder, sparse_model: SparseTextEmbedding):
|
50 |
dense_query = list(dense_model(query,32))
|
51 |
sparse_query = list(sparse_model.embed(query, 32))
|
|
|
128 |
n_gpu_layers=32
|
129 |
)
|
130 |
|
131 |
+
dense_model = HuggingFaceEncoder(
|
132 |
name='mixedbread-ai/mxbai-embed-large-v1',
|
133 |
device='cuda'
|
134 |
)
|
|
|
145 |
collection_name = 'collection_demo'
|
146 |
|
147 |
fields = [
|
|
|
148 |
FieldSchema(name="text", dtype=DataType.VARCHAR, max_length=8192),
|
149 |
FieldSchema(name="sparse_vector", dtype=DataType.SPARSE_FLOAT_VECTOR),
|
150 |
FieldSchema(name="dense_vector", dtype=DataType.FLOAT_VECTOR, dim=1024)
|
|
|
153 |
schema = CollectionSchema(fields, "")
|
154 |
col = Collection(collection_name, schema)
|
155 |
|
156 |
+
sparse_index = {"index_type": "SPARSE_WAND", "metric_type": "IP"}
|
157 |
dense_index = {"index_type": "FLAT", "metric_type": "COSINE"}
|
158 |
col.create_index("sparse_vector", sparse_index)
|
159 |
col.create_index("dense_vector", dense_index)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
|
161 |
with st.spinner('Parse and chunk documents...'):
|
162 |
name = 'action_rpg'
|