huz-relay
/

idefics2-8b-ocr

Image-Text-to-Text

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

huz-relay commited on Jun 10, 2024

Commit

49d46c5

·

1 Parent(s): 8c2ba75

Add logging and move model to gpu

Files changed (1) hide show

handler.py +10 -3

handler.py CHANGED Viewed

@@ -1,12 +1,15 @@
 from typing import Any, Dict, List
 from transformers import Idefics2Processor, Idefics2Model
 class EndpointHandler:
     def __init__(self, path=""):
         # Preload all the elements you are going to need at inference.
         self.processor = Idefics2Processor.from_pretrained(path)
         self.model = Idefics2Model.from_pretrained(path)
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         """
@@ -17,15 +20,19 @@ class EndpointHandler:
               A :obj:`list` | `dict`: will be serialized and returned
         """
         image = data.pop("inputs", data)
         # process image
-        output = self.processor(images=image, return_tensors="pt")
-        generated_ids = self.model.generate(**output)
         # run prediction
         generated_text = self.processor.batch_decode(
             generated_ids, skip_special_tokens=True
         )
         # decode output
-        print(generated_text)

 from typing import Any, Dict, List
 from transformers import Idefics2Processor, Idefics2Model
+import torch
 class EndpointHandler:
     def __init__(self, path=""):
         # Preload all the elements you are going to need at inference.
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.processor = Idefics2Processor.from_pretrained(path)
         self.model = Idefics2Model.from_pretrained(path)
+        self.model.to(self.device)
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         """
               A :obj:`list` | `dict`: will be serialized and returned
         """
         image = data.pop("inputs", data)
+        print("image reached")
         # process image
+        inputs = self.processor(images=image, return_tensors="pt").to(self.device)
+        print("inputs reached")
+        generated_ids = self.model.generate(**inputs, max_new_tokens=500)
+        print("generated")
         # run prediction
         generated_text = self.processor.batch_decode(
             generated_ids, skip_special_tokens=True
         )
+        print("decoded")
         # decode output
+        return generated_text