huz-relay
/

idefics2-8b-ocr

@@ -1,5 +1,5 @@
 from typing import Any, Dict, List
-from transformers import Idefics2Processor, Idefics2Model
 import torch
@@ -8,8 +8,9 @@ class EndpointHandler:
         # Preload all the elements you are going to need at inference.
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.processor = Idefics2Processor.from_pretrained(path)
-        self.model = Idefics2Model.from_pretrained(path)
         self.model.to(self.device)
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         """
@@ -25,11 +26,13 @@ class EndpointHandler:
         # process image
         inputs = self.processor(images=image, return_tensors="pt").to(self.device)
         print("inputs reached")
-        output = self.model.forward(input_ids=inputs.input_ids)
         print("generated")
         # run prediction
-        generated_text = self.processor.batch_decode(output, skip_special_tokens=True)
         print("decoded")
         # decode output

 from typing import Any, Dict, List
+from transformers import Idefics2Processor, Idefics2ForConditionalGeneration
 import torch
         # Preload all the elements you are going to need at inference.
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.processor = Idefics2Processor.from_pretrained(path)
+        self.model = Idefics2ForConditionalGeneration.from_pretrained(path)
         self.model.to(self.device)
+        print("Initialisation finished!")
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         """
         # process image
         inputs = self.processor(images=image, return_tensors="pt").to(self.device)
         print("inputs reached")
+        generated_ids = self.model.generate(**inputs)
         print("generated")
         # run prediction
+        generated_text = self.processor.batch_decode(
+            generated_ids, skip_special_tokens=True
+        )
         print("decoded")
         # decode output