Update optimum_encoder.py
Browse files- optimum_encoder.py +6 -1
optimum_encoder.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import requests
|
2 |
import time
|
3 |
import os
|
|
|
4 |
from typing import Any, List, Optional, Dict
|
5 |
from pydantic.v1 import PrivateAttr
|
6 |
|
@@ -101,7 +102,8 @@ class OptimumEncoder(BaseEncoder):
|
|
101 |
batch_size: int = 32,
|
102 |
normalize_embeddings: bool = True,
|
103 |
pooling_strategy: str = "mean",
|
104 |
-
matryoshka_dim: int = 512
|
|
|
105 |
) -> List[List[float]]:
|
106 |
all_embeddings = []
|
107 |
for i in range(0, len(docs), batch_size):
|
@@ -137,6 +139,9 @@ class OptimumEncoder(BaseEncoder):
|
|
137 |
|
138 |
embeddings = embeddings.tolist()
|
139 |
all_embeddings.extend(embeddings)
|
|
|
|
|
|
|
140 |
return all_embeddings
|
141 |
|
142 |
def _mean_pooling(self, model_output, attention_mask):
|
|
|
1 |
import requests
|
2 |
import time
|
3 |
import os
|
4 |
+
import numpy as np
|
5 |
from typing import Any, List, Optional, Dict
|
6 |
from pydantic.v1 import PrivateAttr
|
7 |
|
|
|
102 |
batch_size: int = 32,
|
103 |
normalize_embeddings: bool = True,
|
104 |
pooling_strategy: str = "mean",
|
105 |
+
matryoshka_dim: int = 512,
|
106 |
+
convert_to_numpy: bool = True
|
107 |
) -> List[List[float]]:
|
108 |
all_embeddings = []
|
109 |
for i in range(0, len(docs), batch_size):
|
|
|
139 |
|
140 |
embeddings = embeddings.tolist()
|
141 |
all_embeddings.extend(embeddings)
|
142 |
+
|
143 |
+
if convert_to_numpy:
|
144 |
+
return np.asarray([emb.numpy() for emb in all_embeddings])
|
145 |
return all_embeddings
|
146 |
|
147 |
def _mean_pooling(self, model_output, attention_mask):
|