sooh-j's picture
Upload folder using huggingface_hub
b2d68d7 verified
raw
history blame
2 kB
import numpy as np
from transformers import AutoProcessor, AutoTokenizer, AutoImageProcessor, AutoModelForCausalLM, BlipForConditionalGeneration, VisionEncoderDecoderModel
from typing import Dict, List, Any
from PIL import Image
from transformers import pipeline
import requests
import torch
from io import BytesIO
import base64
class EndpointHandler():
def __init__(self, path=""):
self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
print("device:",self.device)
self.model_name = "sooh-j/blip-image-captioning-base"
self.processor = AutoProcessor.from_pretrained(self.model_name)
self.model = BlipForConditionalGeneration.from_pretrained(self.model_name,
)
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
data args:
inputs (:obj: `str` | `PIL.Image` | `np.array`)
kwargs
Return:
A :obj:`list` | `dict`: will be serialized and returned
"""
inputs = data.get("inputs")
imageBase64 = inputs.get("image")
# question = inputs.get("question")
# imageURL = inputs.get("image")
# image = Image.open(requests.get(imageBase64, stream=True).raw)
if 'http:' in imageBase64:
image = Image.open(requests.get(imageBase64, stream=True).raw)
else:
image = Image.open(BytesIO(base64.b64decode(imageBase64.split(",")[0].encode())))
# prompt = f"Question: {question}, Answer:"
processed = self.processor(images=image, return_tensors="pt").to(self.device)
with torch.no_grad():
out = self.model.generate(**processed, max_new_tokens=50).to(self.device)
result = {}
text_output = self.processor.decode(out[0], skip_special_tokens=True)
result["text_output"] = text_output
score = 0
return [{"answer":text_output,"score":score}]