Missing feature_extractor_type or wrong model_type
#5
by
enkaya
- opened
Hi everyone,
I am trying to deploy this model to Sagemaker for inference.
Here is how I do deploy:
boto3.setup_default_session(profile_name="xxx")
role = "arn:aws:iam::xxxx"
model_id = "naver-clova-ix/donut-base-finetuned-rvlcdip"
model_name = model_id.split("/")[-1].lower()
sess = sagemaker.Session()
# Hub Model configuration. https://huggingface.co./models
hub = {
"HF_MODEL_ID": model_id,
"SM_NUM_GPUS": json.dumps(1),
'HF_TASK':'image-to-text',
'HF_TOKEN': 'xxxx',
}
# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
transformers_version='4.37.0',
pytorch_version='2.1.0',
py_version='py310',
env=hub,
role=role,
entry_point='inference.py',
)
sagemaker_session_bucket=None
if sagemaker_session_bucket is None and sess is not None:
# set to default bucket if a bucket name is not given
sagemaker_session_bucket = sess.default_bucket()
endpoint_name = f"{re.sub(r'[^a-zA-Z0-9]|(?<=-)-+|^-|-$', '', model_name)}-ep-v1"
instance_type = "ml.g5.4xlarge"
async_config = AsyncInferenceConfig(
output_path=s3_path_join("s3://",sagemaker_session_bucket,"async_inference/output")
)
# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
endpoint_name=endpoint_name,
initial_instance_count=1,
instance_type=instance_type,
async_inference_config= async_config,
container_startup_health_check_timeout=2400,
)
Here is the contents of inference.py:
import os
import torch
import base64
from PIL import Image
import io
from transformers import DonutProcessor, VisionEncoderDecoderModel
import re
def model_fn(model_dir):
"""Load the model for inference"""
processor = DonutProcessor.from_pretrained(model_dir)
model = VisionEncoderDecoderModel.from_pretrained(model_dir)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
return {"model": model, "processor": processor, "device": device}
def input_fn(request_body, request_content_type):
"""Deserialize and prepare the prediction input"""
if request_content_type == "application/json":
import json
input_data = json.loads(request_body)
# Decode base64 image
image_data = base64.b64decode(input_data["image"])
image = Image.open(io.BytesIO(image_data)).convert("RGB")
return {
"image": image,
"task_prompt": input_data.get("task_prompt", "<s_rvlcdip>"),
}
raise ValueError(f"Unsupported content type: {request_content_type}")
def predict_fn(input_data, model_dict):
"""Make a prediction using the input data"""
model = model_dict["model"]
processor = model_dict["processor"]
device = model_dict["device"]
# Process image
pixel_values = processor(input_data["image"], return_tensors="pt").pixel_values
# Prepare decoder inputs
decoder_input_ids = processor.tokenizer(
input_data["task_prompt"], add_special_tokens=False, return_tensors="pt"
).input_ids
# Generate output
outputs = model.generate(
pixel_values.to(device),
decoder_input_ids=decoder_input_ids.to(device),
max_length=model.decoder.config.max_position_embeddings,
pad_token_id=processor.tokenizer.pad_token_id,
eos_token_id=processor.tokenizer.eos_token_id,
use_cache=True,
bad_words_ids=[[processor.tokenizer.unk_token_id]],
return_dict_in_generate=True,
)
# Process output
sequence = processor.batch_decode(outputs.sequences)[0]
sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(
processor.tokenizer.pad_token, ""
)
sequence = re.sub(r"<.*?>", "", sequence, count=1).strip()
result = processor.token2json(sequence)
return result
def output_fn(prediction, response_content_type):
"""Serialize and prepare the prediction output"""
if response_content_type == "application/json":
import json
return json.dumps(prediction)
raise ValueError(f"Unsupported content type: {response_content_type}")
I keep getting this error:
ValueError: Unrecognized feature extractor in /.sagemaker/mms/models/naver-clova-ix__donut-base-finetuned-rvlcdip. Should have a `feature_extractor_type` key in its preprocessor_config.json of config.json, or one of the following `model_type` keys in its config.json: audio-spectrogram-transformer, beit, chinese_clip, clap, clip, clipseg, clvp, conditional_detr, convnext, cvt, data2vec-audio, data2vec-vision, deformable_detr, deit, detr, dinat, donut-swin, dpt, encodec, flava, glpn, groupvit, hubert, imagegpt, layoutlmv2, layoutlmv3, levit, maskformer, mctct, mobilenet_v1, mobilenet_v2, mobilevit, nat, owlvit, perceiver, poolformer, pop2piano, regnet, resnet, seamless_m4t, seamless_m4t_v2, segformer, sew, sew-d, speech_to_text, speecht5, swiftformer, swin, swinv2, table-transformer, timesformer, tvlt, unispeech, unispeech-sat, univnet, van, videomae, vilt, vit, vit_mae, vit_msn, wav2vec2, wav2vec2-bert, wav2vec2-conformer, wavlm, whisper, xclip, yolos
I have just recently started working with ML and would greatly appreciate your support!