File size: 1,379 Bytes
bd3532f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
from typing import List

from embedding_provider import EmbeddingProvider
from database.annoydb import AnnoyDB

class SearchManager:
    def __init__(
        self,
        embedding_provider: EmbeddingProvider, 
        documents: List[str], 
        semantic_weight: float = 0.7, 
        keyword_weight: float = 0.3
    ) -> None:
        """Smart Search Manager

        Args:
            embedding_provider (EmbeddingProvider): embedding provider
            documents (List[str]): list of documents
            semantic_weight (float, optional): _description_. Defaults to 0.7.
            keyword_weight (float, optional): _description_. Defaults to 0.3.
        """
        self.embedding_provider = embedding_provider
        self.semantic_embeddings = embedding_provider.embed_documents(documents)
        
        # Vector Database Setup
        self.vector_db = AnnoyDB(
            embedding_dim=self.semantic_embeddings.shape[1]
        )
        for emb, doc in zip(self.semantic_embeddings, documents):
            self.vector_db.add_item(emb, doc)
        self.vector_db.build()
        
        # Keyword Search Setup
        self.keyword_search = KeywordSearchProvider(documents)
        
        # Weights for hybrid search
        self.semantic_weight = semantic_weight
        self.keyword_weight = keyword_weight
        
        self.documents = documents