moooji's picture
Update handler.py
07d2dac
raw
history blame
972 Bytes
from typing import Dict, List, Any
from PIL import Image
import torch
import base64
from io import BytesIO
from transformers import AutoProcessor, BlipForQuestionAnswering
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
class EndpointHandler():
def __init__(self, path=""):
self.processor = AutoProcessor.from_pretrained("Salesforce/blip-vqa-capfilt-large")
self.model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-capfilt-large").to(device)
def __call__(self, data: Any) -> List[float]:
inputs = data.pop("inputs", data)
image = Image.open(BytesIO(base64.b64decode(inputs['image'])))
inputs = self.processor(image, inputs['question'], return_tensors="pt").to(device)
with torch.no_grad():
outputs = self.model.generate(**inputs)
pooler_output = outputs.pooler_output
return processor.decode(out[0], skip_special_tokens=True)