Spaces:
Sleeping
Sleeping
File size: 1,379 Bytes
bd3532f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
from typing import List
from embedding_provider import EmbeddingProvider
from database.annoydb import AnnoyDB
class SearchManager:
def __init__(
self,
embedding_provider: EmbeddingProvider,
documents: List[str],
semantic_weight: float = 0.7,
keyword_weight: float = 0.3
) -> None:
"""Smart Search Manager
Args:
embedding_provider (EmbeddingProvider): embedding provider
documents (List[str]): list of documents
semantic_weight (float, optional): _description_. Defaults to 0.7.
keyword_weight (float, optional): _description_. Defaults to 0.3.
"""
self.embedding_provider = embedding_provider
self.semantic_embeddings = embedding_provider.embed_documents(documents)
# Vector Database Setup
self.vector_db = AnnoyDB(
embedding_dim=self.semantic_embeddings.shape[1]
)
for emb, doc in zip(self.semantic_embeddings, documents):
self.vector_db.add_item(emb, doc)
self.vector_db.build()
# Keyword Search Setup
self.keyword_search = KeywordSearchProvider(documents)
# Weights for hybrid search
self.semantic_weight = semantic_weight
self.keyword_weight = keyword_weight
self.documents = documents |