Spaces:
Starting
on
T4
Starting
on
T4
Update app.py
Browse files
app.py
CHANGED
@@ -37,7 +37,7 @@ def transform_query(query: str) -> str:
|
|
37 |
|
38 |
def query_hybrid_search(col: Collection, query: str):
|
39 |
query_dense_embeddings = dense_model(transform_query(query))
|
40 |
-
query_sparse_embeddings = sparse_model.
|
41 |
|
42 |
sparse_req = AnnSearchRequest(query_sparse_embeddings, "sparse_vector", {"metric_type": "IP"}, limit=10)
|
43 |
dense_req = AnnSearchRequest(query_dense_embeddings, "dense_vector", {"metric_type": "COSINE"}, limit=10)
|
@@ -46,7 +46,7 @@ def query_hybrid_search(col: Collection, query: str):
|
|
46 |
|
47 |
return res
|
48 |
|
49 |
-
def main(query: str, client: MilvusClient, collection_name: str, llm, dense_model:
|
50 |
dense_query = list(dense_model(query,32))
|
51 |
sparse_query = list(sparse_model.embed(query, 32))
|
52 |
|
@@ -247,7 +247,7 @@ def load_models_and_documents():
|
|
247 |
|
248 |
return client, collection_name, llm, dense_model, sparse_model
|
249 |
|
250 |
-
def chunk_documents(texts: List[str], metadatas: List[dict], dense_model:
|
251 |
import time
|
252 |
text_splitter = StatisticalChunker(
|
253 |
dense_model
|
@@ -262,7 +262,7 @@ def chunk_documents(texts: List[str], metadatas: List[dict], dense_model: FastEm
|
|
262 |
documents, metadatas_docs = [list(t) for t in zip(*documents_and_metadatas)]
|
263 |
|
264 |
dense_embeddings = dense_model(documents, 32)
|
265 |
-
sparse_embeddings = list(sparse_model.embed(documents, 32))
|
266 |
|
267 |
return documents, metadatas_docs, dense_embeddings, sparse_embeddings
|
268 |
|
|
|
37 |
|
38 |
def query_hybrid_search(col: Collection, query: str):
|
39 |
query_dense_embeddings = dense_model(transform_query(query))
|
40 |
+
query_sparse_embeddings = sparse_model.query_embed(query)
|
41 |
|
42 |
sparse_req = AnnSearchRequest(query_sparse_embeddings, "sparse_vector", {"metric_type": "IP"}, limit=10)
|
43 |
dense_req = AnnSearchRequest(query_dense_embeddings, "dense_vector", {"metric_type": "COSINE"}, limit=10)
|
|
|
46 |
|
47 |
return res
|
48 |
|
49 |
+
def main(query: str, client: MilvusClient, collection_name: str, llm, dense_model: OptimumEncoder, sparse_model: Bm42):
|
50 |
dense_query = list(dense_model(query,32))
|
51 |
sparse_query = list(sparse_model.embed(query, 32))
|
52 |
|
|
|
247 |
|
248 |
return client, collection_name, llm, dense_model, sparse_model
|
249 |
|
250 |
+
def chunk_documents(texts: List[str], metadatas: List[dict], dense_model: OptimumEncoder, sparse_model: Bm42):
|
251 |
import time
|
252 |
text_splitter = StatisticalChunker(
|
253 |
dense_model
|
|
|
262 |
documents, metadatas_docs = [list(t) for t in zip(*documents_and_metadatas)]
|
263 |
|
264 |
dense_embeddings = dense_model(documents, 32)
|
265 |
+
sparse_embeddings = list(sparse_model.embed(documents, 32, 0))
|
266 |
|
267 |
return documents, metadatas_docs, dense_embeddings, sparse_embeddings
|
268 |
|