jpohhhh
/

biencoder_embedding

Feature Extraction

text-embeddings-inference

Inference Endpoints

Model card Files Files and versions Community

jpohhhh commited on Apr 29, 2023

Commit

f2c3585

•

1 Parent(s): c62769e

Try quantizing model

8.2 seconds before quantize

Files changed (1) hide show

handler.py +4 -1

handler.py CHANGED Viewed

@@ -1,9 +1,12 @@
 from sentence_transformers import SentenceTransformer, util
 from typing import Dict, List, Any
 class EndpointHandler():
     def __init__(self, path=""):
-        self.model = SentenceTransformer('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         """

 from sentence_transformers import SentenceTransformer, util
 from typing import Dict, List, Any
+from torch.nn import Embedding, Linear
+from torch.quantization import quantize_dynamic
 class EndpointHandler():
     def __init__(self, path=""):
+        slowmodel = SentenceTransformer('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
+        self.model = quantize_dynamic(slowmodel, {Linear, Embedding})
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         """