Disable quantization
Browse filesWith GPU, torch says: AssertionError: Embedding quantization is only supported with float_qparams_weight_only_qconfig.
- handler.py +1 -2
handler.py
CHANGED
@@ -5,8 +5,7 @@ from torch.quantization import quantize_dynamic
|
|
5 |
|
6 |
class EndpointHandler():
|
7 |
def __init__(self, path=""):
|
8 |
-
|
9 |
-
self.model = quantize_dynamic(slowmodel, {Linear, Embedding})
|
10 |
|
11 |
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
12 |
"""
|
|
|
5 |
|
6 |
class EndpointHandler():
|
7 |
def __init__(self, path=""):
|
8 |
+
self.model = SentenceTransformer('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
|
|
|
9 |
|
10 |
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
11 |
"""
|