kajdun commited on
Commit
db34714
·
1 Parent(s): 9a254c2

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +3 -5
handler.py CHANGED
@@ -4,11 +4,6 @@ from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
4
  import torch
5
  from loguru import logger
6
 
7
- # check for GPU
8
- device = 0 if torch.cuda.is_available() else -1
9
-
10
- logger.info(f"cuda: {device}")
11
-
12
  MAX_INPUT_TOKEN_LENGTH = 4000
13
  MAX_MAX_NEW_TOKENS = 2048
14
  DEFAULT_MAX_NEW_TOKENS = 1024
@@ -29,12 +24,15 @@ class EndpointHandler():
29
  parameters["max_new_tokens"] = parameters.pop("max_new_tokens", DEFAULT_MAX_NEW_TOKENS)
30
 
31
  if parameters["max_new_tokens"] > MAX_MAX_NEW_TOKENS:
 
32
  return [{"generated_text": None, "error": f"requested max_new_tokens too high (> {MAX_MAX_NEW_TOKENS})"}]
33
 
34
  input_token_length = self.get_input_token_length(inputs)
35
  if input_token_length > MAX_INPUT_TOKEN_LENGTH:
 
36
  return [{"generated_text": None, "error": f"input is too long ({input_token_length} > {MAX_INPUT_TOKEN_LENGTH})"}]
37
 
 
38
  input_ids = self.tokenizer(inputs, return_tensors="pt").to(self.model.device)
39
 
40
  outputs = self.model.generate(**input_ids, **parameters)
 
4
  import torch
5
  from loguru import logger
6
 
 
 
 
 
 
7
  MAX_INPUT_TOKEN_LENGTH = 4000
8
  MAX_MAX_NEW_TOKENS = 2048
9
  DEFAULT_MAX_NEW_TOKENS = 1024
 
24
  parameters["max_new_tokens"] = parameters.pop("max_new_tokens", DEFAULT_MAX_NEW_TOKENS)
25
 
26
  if parameters["max_new_tokens"] > MAX_MAX_NEW_TOKENS:
27
+ logger.error(f"requested max_new_tokens too high (> {MAX_MAX_NEW_TOKENS})")
28
  return [{"generated_text": None, "error": f"requested max_new_tokens too high (> {MAX_MAX_NEW_TOKENS})"}]
29
 
30
  input_token_length = self.get_input_token_length(inputs)
31
  if input_token_length > MAX_INPUT_TOKEN_LENGTH:
32
+ logger.error(f"input is too long ({input_token_length} > {MAX_INPUT_TOKEN_LENGTH})")
33
  return [{"generated_text": None, "error": f"input is too long ({input_token_length} > {MAX_INPUT_TOKEN_LENGTH})"}]
34
 
35
+ logger.info(f"inputs: {inputs}")
36
  input_ids = self.tokenizer(inputs, return_tensors="pt").to(self.model.device)
37
 
38
  outputs = self.model.generate(**input_ids, **parameters)