jpohhhh commited on
Commit
713ed0a
1 Parent(s): 97be557

Disable quantization

Browse files

With GPU, torch says: AssertionError: Embedding quantization is only supported with float_qparams_weight_only_qconfig.

Files changed (1) hide show
  1. handler.py +1 -2
handler.py CHANGED
@@ -5,8 +5,7 @@ from torch.quantization import quantize_dynamic
5
 
6
  class EndpointHandler():
7
  def __init__(self, path=""):
8
- slowmodel = SentenceTransformer('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
9
- self.model = quantize_dynamic(slowmodel, {Linear, Embedding})
10
 
11
  def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
12
  """
 
5
 
6
  class EndpointHandler():
7
  def __init__(self, path=""):
8
+ self.model = SentenceTransformer('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
 
9
 
10
  def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
11
  """