huz-relay commited on
Commit
49d46c5
·
1 Parent(s): 8c2ba75

Add logging and move model to gpu

Browse files
Files changed (1) hide show
  1. handler.py +10 -3
handler.py CHANGED
@@ -1,12 +1,15 @@
1
  from typing import Any, Dict, List
2
  from transformers import Idefics2Processor, Idefics2Model
 
3
 
4
 
5
  class EndpointHandler:
6
  def __init__(self, path=""):
7
  # Preload all the elements you are going to need at inference.
 
8
  self.processor = Idefics2Processor.from_pretrained(path)
9
  self.model = Idefics2Model.from_pretrained(path)
 
10
 
11
  def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
12
  """
@@ -17,15 +20,19 @@ class EndpointHandler:
17
  A :obj:`list` | `dict`: will be serialized and returned
18
  """
19
  image = data.pop("inputs", data)
 
20
 
21
  # process image
22
- output = self.processor(images=image, return_tensors="pt")
23
- generated_ids = self.model.generate(**output)
 
 
24
 
25
  # run prediction
26
  generated_text = self.processor.batch_decode(
27
  generated_ids, skip_special_tokens=True
28
  )
 
29
 
30
  # decode output
31
- print(generated_text)
 
1
  from typing import Any, Dict, List
2
  from transformers import Idefics2Processor, Idefics2Model
3
+ import torch
4
 
5
 
6
  class EndpointHandler:
7
  def __init__(self, path=""):
8
  # Preload all the elements you are going to need at inference.
9
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
10
  self.processor = Idefics2Processor.from_pretrained(path)
11
  self.model = Idefics2Model.from_pretrained(path)
12
+ self.model.to(self.device)
13
 
14
  def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
15
  """
 
20
  A :obj:`list` | `dict`: will be serialized and returned
21
  """
22
  image = data.pop("inputs", data)
23
+ print("image reached")
24
 
25
  # process image
26
+ inputs = self.processor(images=image, return_tensors="pt").to(self.device)
27
+ print("inputs reached")
28
+ generated_ids = self.model.generate(**inputs, max_new_tokens=500)
29
+ print("generated")
30
 
31
  # run prediction
32
  generated_text = self.processor.batch_decode(
33
  generated_ids, skip_special_tokens=True
34
  )
35
+ print("decoded")
36
 
37
  # decode output
38
+ return generated_text