jpohhhh commited on
Commit
f2c3585
1 Parent(s): c62769e

Try quantizing model

Browse files

8.2 seconds before quantize

Files changed (1) hide show
  1. handler.py +4 -1
handler.py CHANGED
@@ -1,9 +1,12 @@
1
  from sentence_transformers import SentenceTransformer, util
2
  from typing import Dict, List, Any
 
 
3
 
4
  class EndpointHandler():
5
  def __init__(self, path=""):
6
- self.model = SentenceTransformer('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
 
7
 
8
  def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
9
  """
 
1
  from sentence_transformers import SentenceTransformer, util
2
  from typing import Dict, List, Any
3
+ from torch.nn import Embedding, Linear
4
+ from torch.quantization import quantize_dynamic
5
 
6
  class EndpointHandler():
7
  def __init__(self, path=""):
8
+ slowmodel = SentenceTransformer('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
9
+ self.model = quantize_dynamic(slowmodel, {Linear, Embedding})
10
 
11
  def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
12
  """