devve1 commited on
Commit
5aa60f8
1 Parent(s): eb93a92

Update optimum_encoder.py

Browse files
Files changed (1) hide show
  1. optimum_encoder.py +6 -1
optimum_encoder.py CHANGED
@@ -1,6 +1,7 @@
1
  import requests
2
  import time
3
  import os
 
4
  from typing import Any, List, Optional, Dict
5
  from pydantic.v1 import PrivateAttr
6
 
@@ -101,7 +102,8 @@ class OptimumEncoder(BaseEncoder):
101
  batch_size: int = 32,
102
  normalize_embeddings: bool = True,
103
  pooling_strategy: str = "mean",
104
- matryoshka_dim: int = 512
 
105
  ) -> List[List[float]]:
106
  all_embeddings = []
107
  for i in range(0, len(docs), batch_size):
@@ -137,6 +139,9 @@ class OptimumEncoder(BaseEncoder):
137
 
138
  embeddings = embeddings.tolist()
139
  all_embeddings.extend(embeddings)
 
 
 
140
  return all_embeddings
141
 
142
  def _mean_pooling(self, model_output, attention_mask):
 
1
  import requests
2
  import time
3
  import os
4
+ import numpy as np
5
  from typing import Any, List, Optional, Dict
6
  from pydantic.v1 import PrivateAttr
7
 
 
102
  batch_size: int = 32,
103
  normalize_embeddings: bool = True,
104
  pooling_strategy: str = "mean",
105
+ matryoshka_dim: int = 512,
106
+ convert_to_numpy: bool = True
107
  ) -> List[List[float]]:
108
  all_embeddings = []
109
  for i in range(0, len(docs), batch_size):
 
139
 
140
  embeddings = embeddings.tolist()
141
  all_embeddings.extend(embeddings)
142
+
143
+ if convert_to_numpy:
144
+ return np.asarray([emb.numpy() for emb in all_embeddings])
145
  return all_embeddings
146
 
147
  def _mean_pooling(self, model_output, attention_mask):