Try quantizing model
Browse files8.2 seconds before quantize
- handler.py +4 -1
handler.py
CHANGED
@@ -1,9 +1,12 @@
|
|
1 |
from sentence_transformers import SentenceTransformer, util
|
2 |
from typing import Dict, List, Any
|
|
|
|
|
3 |
|
4 |
class EndpointHandler():
|
5 |
def __init__(self, path=""):
|
6 |
-
|
|
|
7 |
|
8 |
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
9 |
"""
|
|
|
1 |
from sentence_transformers import SentenceTransformer, util
|
2 |
from typing import Dict, List, Any
|
3 |
+
from torch.nn import Embedding, Linear
|
4 |
+
from torch.quantization import quantize_dynamic
|
5 |
|
6 |
class EndpointHandler():
|
7 |
def __init__(self, path=""):
|
8 |
+
slowmodel = SentenceTransformer('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
|
9 |
+
self.model = quantize_dynamic(slowmodel, {Linear, Embedding})
|
10 |
|
11 |
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
12 |
"""
|