Update handler.py
Browse files- handler.py +4 -4
handler.py
CHANGED
@@ -6,9 +6,9 @@ import torch
|
|
6 |
# check for GPU
|
7 |
#device = 0 if torch.cuda.is_available() else -1
|
8 |
|
9 |
-
MAX_INPUT_TOKEN_LENGTH
|
10 |
-
MAX_MAX_NEW_TOKENS=2048
|
11 |
-
DEFAULT_MAX_NEW_TOKENS
|
12 |
|
13 |
class EndpointHandler():
|
14 |
def __init__(self, path=""):
|
@@ -28,7 +28,7 @@ class EndpointHandler():
|
|
28 |
if parameters["max_new_tokens"] > MAX_MAX_NEW_TOKENS:
|
29 |
return [{"generated_text": None, "error": f"requested max_new_tokens too high (> {MAX_MAX_NEW_TOKENS})"}]
|
30 |
|
31 |
-
input_token_length = get_input_token_length(inputs)
|
32 |
if input_token_length > MAX_INPUT_TOKEN_LENGTH:
|
33 |
return [{"generated_text": None, "error": f"input is too long ({input_token_length} > {MAX_INPUT_TOKEN_LENGTH})"}]
|
34 |
|
|
|
6 |
# check for GPU
|
7 |
#device = 0 if torch.cuda.is_available() else -1
|
8 |
|
9 |
+
MAX_INPUT_TOKEN_LENGTH = 4000
|
10 |
+
MAX_MAX_NEW_TOKENS = 2048
|
11 |
+
DEFAULT_MAX_NEW_TOKENS = 1024
|
12 |
|
13 |
class EndpointHandler():
|
14 |
def __init__(self, path=""):
|
|
|
28 |
if parameters["max_new_tokens"] > MAX_MAX_NEW_TOKENS:
|
29 |
return [{"generated_text": None, "error": f"requested max_new_tokens too high (> {MAX_MAX_NEW_TOKENS})"}]
|
30 |
|
31 |
+
input_token_length = self.get_input_token_length(inputs)
|
32 |
if input_token_length > MAX_INPUT_TOKEN_LENGTH:
|
33 |
return [{"generated_text": None, "error": f"input is too long ({input_token_length} > {MAX_INPUT_TOKEN_LENGTH})"}]
|
34 |
|