devve1 commited on
Commit
1bd836f
1 Parent(s): 33834b7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -37,7 +37,7 @@ def transform_query(query: str) -> str:
37
 
38
  def query_hybrid_search(col: Collection, query: str):
39
  query_dense_embeddings = dense_model(transform_query(query))
40
- query_sparse_embeddings = sparse_model.encode(query)
41
 
42
  sparse_req = AnnSearchRequest(query_sparse_embeddings, "sparse_vector", {"metric_type": "IP"}, limit=10)
43
  dense_req = AnnSearchRequest(query_dense_embeddings, "dense_vector", {"metric_type": "COSINE"}, limit=10)
@@ -46,7 +46,7 @@ def query_hybrid_search(col: Collection, query: str):
46
 
47
  return res
48
 
49
- def main(query: str, client: MilvusClient, collection_name: str, llm, dense_model: FastEmbedEncoder, sparse_model: SparseTextEmbedding):
50
  dense_query = list(dense_model(query,32))
51
  sparse_query = list(sparse_model.embed(query, 32))
52
 
@@ -247,7 +247,7 @@ def load_models_and_documents():
247
 
248
  return client, collection_name, llm, dense_model, sparse_model
249
 
250
- def chunk_documents(texts: List[str], metadatas: List[dict], dense_model: FastEmbedEncoder, sparse_model: SparseTextEmbedding):
251
  import time
252
  text_splitter = StatisticalChunker(
253
  dense_model
@@ -262,7 +262,7 @@ def chunk_documents(texts: List[str], metadatas: List[dict], dense_model: FastEm
262
  documents, metadatas_docs = [list(t) for t in zip(*documents_and_metadatas)]
263
 
264
  dense_embeddings = dense_model(documents, 32)
265
- sparse_embeddings = list(sparse_model.embed(documents, 32))
266
 
267
  return documents, metadatas_docs, dense_embeddings, sparse_embeddings
268
 
 
37
 
38
  def query_hybrid_search(col: Collection, query: str):
39
  query_dense_embeddings = dense_model(transform_query(query))
40
+ query_sparse_embeddings = sparse_model.query_embed(query)
41
 
42
  sparse_req = AnnSearchRequest(query_sparse_embeddings, "sparse_vector", {"metric_type": "IP"}, limit=10)
43
  dense_req = AnnSearchRequest(query_dense_embeddings, "dense_vector", {"metric_type": "COSINE"}, limit=10)
 
46
 
47
  return res
48
 
49
+ def main(query: str, client: MilvusClient, collection_name: str, llm, dense_model: OptimumEncoder, sparse_model: Bm42):
50
  dense_query = list(dense_model(query,32))
51
  sparse_query = list(sparse_model.embed(query, 32))
52
 
 
247
 
248
  return client, collection_name, llm, dense_model, sparse_model
249
 
250
+ def chunk_documents(texts: List[str], metadatas: List[dict], dense_model: OptimumEncoder, sparse_model: Bm42):
251
  import time
252
  text_splitter = StatisticalChunker(
253
  dense_model
 
262
  documents, metadatas_docs = [list(t) for t in zip(*documents_and_metadatas)]
263
 
264
  dense_embeddings = dense_model(documents, 32)
265
+ sparse_embeddings = list(sparse_model.embed(documents, 32, 0))
266
 
267
  return documents, metadatas_docs, dense_embeddings, sparse_embeddings
268