Spaces:

tall-tree
/

ai-virtual-assistant

Running on CPU Upgrade

App Files Files

yrobel-lima commited on Mar 22, 2024

Commit

f2a1c22

verified ·

1 Parent(s): d612275

Upload update_vector_database.py

Browse files

Files changed (1) hide show

utils/update_vector_database.py +12 -8

utils/update_vector_database.py CHANGED Viewed

@@ -100,7 +100,7 @@ class SparseVectorStore:
     def __init__(self, documents: list[Document], collection_name: str, vector_name: str, k: int = 4, splade_model_id: str = "naver/splade-cocondenser-ensembledistil"):
         self.validate_environment_variables()
         self.client = QdrantClient(url=os.getenv(
-            "QDRANT_URL"), api_key=os.getenv("QDRANT_API_KEY"))
         self.model_id = splade_model_id
         self.tokenizer, self.model = self.set_tokenizer_config()
         self.collection_name = collection_name
@@ -124,7 +124,6 @@ class SparseVectorStore:
         model = AutoModelForMaskedLM.from_pretrained(self.model_id)
         return tokenizer, model
-    @cache
     def sparse_encoder(self, text: str) -> tuple[list[int], list[float]]:
         """This function encodes the input text into a sparse vector. The sparse_encoder is required for the QdrantSparseVectorRetriever.
         Adapted from the Qdrant documentation: Computing the Sparse Vector code.
@@ -135,17 +134,22 @@ class SparseVectorStore:
         Returns:
             tuple[list[int], list[float]]: Indices and values of the sparse vector
         """
-        tokens = self.tokenizer(
-            text, return_tensors="pt", max_length=512, padding="max_length", truncation=True)
-        output = self.model(**tokens)
         logits, attention_mask = output.logits, tokens.attention_mask
-        relu_log = torch.log(1 + torch.relu(logits))
         weighted_log = relu_log * attention_mask.unsqueeze(-1)
         max_val, _ = torch.max(weighted_log, dim=1)
         vec = max_val.squeeze()
-        indices = vec.nonzero().numpy().flatten()
-        values = vec.detach().numpy()[indices]
         return indices.tolist(), values.tolist()

     def __init__(self, documents: list[Document], collection_name: str, vector_name: str, k: int = 4, splade_model_id: str = "naver/splade-cocondenser-ensembledistil"):
         self.validate_environment_variables()
         self.client = QdrantClient(url=os.getenv(
+            "QDRANT_URL"), api_key=os.getenv("QDRANT_API_KEY"))  # TODO: prefer_grpc=True is not working
         self.model_id = splade_model_id
         self.tokenizer, self.model = self.set_tokenizer_config()
         self.collection_name = collection_name
         model = AutoModelForMaskedLM.from_pretrained(self.model_id)
         return tokenizer, model
     def sparse_encoder(self, text: str) -> tuple[list[int], list[float]]:
         """This function encodes the input text into a sparse vector. The sparse_encoder is required for the QdrantSparseVectorRetriever.
         Adapted from the Qdrant documentation: Computing the Sparse Vector code.
         Returns:
             tuple[list[int], list[float]]: Indices and values of the sparse vector
         """
+        tokens = self.tokenizer(text, return_tensors="pt",
+                                max_length=512, padding="max_length", truncation=True)
+        with torch.no_grad():
+            output = self.model(**tokens)
         logits, attention_mask = output.logits, tokens.attention_mask
+        relu_log = torch.log1p(torch.relu(logits))
         weighted_log = relu_log * attention_mask.unsqueeze(-1)
         max_val, _ = torch.max(weighted_log, dim=1)
         vec = max_val.squeeze()
+        indices = torch.nonzero(vec, as_tuple=False).squeeze().numpy()
+        values = vec[indices].numpy()
         return indices.tolist(), values.tolist()