Spaces:
Starting
on
T4
Starting
on
T4
Update app.py
Browse files
app.py
CHANGED
@@ -56,7 +56,7 @@ def transform_query(query: str) -> str:
|
|
56 |
|
57 |
def query_hybrid_search(query: str, client: QdrantClient, collection_name: str, dense_model: OptimumEncoder, sparse_model: SparseTextEmbedding):
|
58 |
dense_embeddings = dense_model([transform_query(query)], 1, convert_to_numpy=True)[0]
|
59 |
-
sparse_embeddings = list(sparse_model.
|
60 |
|
61 |
return client.query_points(
|
62 |
collection_name=collection_name,
|
@@ -174,7 +174,8 @@ def load_models_and_documents():
|
|
174 |
"text-sparse": models.SparseVectorParams(
|
175 |
index=models.SparseIndexParams(
|
176 |
on_disk=False
|
177 |
-
)
|
|
|
178 |
)
|
179 |
},
|
180 |
2,
|
@@ -226,7 +227,10 @@ def load_models_and_documents():
|
|
226 |
outfile_metadatas.write(packed_metadatas)
|
227 |
|
228 |
np.savez_compressed(dense_path, *dense_embeddings)
|
229 |
-
max_index =
|
|
|
|
|
|
|
230 |
|
231 |
sparse_matrices = []
|
232 |
for embedding in sparse_embeddings:
|
|
|
56 |
|
57 |
def query_hybrid_search(query: str, client: QdrantClient, collection_name: str, dense_model: OptimumEncoder, sparse_model: SparseTextEmbedding):
|
58 |
dense_embeddings = dense_model([transform_query(query)], 1, convert_to_numpy=True)[0]
|
59 |
+
sparse_embeddings = list(sparse_model.query_embed(query))[0]
|
60 |
|
61 |
return client.query_points(
|
62 |
collection_name=collection_name,
|
|
|
174 |
"text-sparse": models.SparseVectorParams(
|
175 |
index=models.SparseIndexParams(
|
176 |
on_disk=False
|
177 |
+
),
|
178 |
+
modifier=models.Modifier.IDF
|
179 |
)
|
180 |
},
|
181 |
2,
|
|
|
227 |
outfile_metadatas.write(packed_metadatas)
|
228 |
|
229 |
np.savez_compressed(dense_path, *dense_embeddings)
|
230 |
+
max_index = 0
|
231 |
+
for embedding in sparse_embeddings:
|
232 |
+
if embedding.indices.size > 0:
|
233 |
+
max_index = max(max_index, np.max(embedding.indices))
|
234 |
|
235 |
sparse_matrices = []
|
236 |
for embedding in sparse_embeddings:
|