Missing feature_extractor_type or wrong model_type

#5
by enkaya - opened

Hi everyone,

I am trying to deploy this model to Sagemaker for inference.

Here is how I do deploy:

boto3.setup_default_session(profile_name="xxx")

role = "arn:aws:iam::xxxx"
model_id = "naver-clova-ix/donut-base-finetuned-rvlcdip"
model_name = model_id.split("/")[-1].lower()
sess = sagemaker.Session()

# Hub Model configuration. https://huggingface.co./models
hub = {
    "HF_MODEL_ID": model_id,
    "SM_NUM_GPUS": json.dumps(1),
    'HF_TASK':'image-to-text',
    'HF_TOKEN': 'xxxx',

}

# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
   transformers_version='4.37.0',
    pytorch_version='2.1.0',
    py_version='py310',
    env=hub,
    role=role, 
    entry_point='inference.py',  

)
sagemaker_session_bucket=None
if sagemaker_session_bucket is None and sess is not None:
    # set to default bucket if a bucket name is not given
    sagemaker_session_bucket = sess.default_bucket()
    
    
endpoint_name = f"{re.sub(r'[^a-zA-Z0-9]|(?<=-)-+|^-|-$', '', model_name)}-ep-v1"
instance_type = "ml.g5.4xlarge"
async_config = AsyncInferenceConfig(
    output_path=s3_path_join("s3://",sagemaker_session_bucket,"async_inference/output") 
)
# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
    endpoint_name=endpoint_name,
    initial_instance_count=1,
    instance_type=instance_type,
    async_inference_config= async_config,
    container_startup_health_check_timeout=2400,
)

Here is the contents of inference.py:

import os
import torch
import base64
from PIL import Image
import io
from transformers import DonutProcessor, VisionEncoderDecoderModel
import re


def model_fn(model_dir):
    """Load the model for inference"""
    processor = DonutProcessor.from_pretrained(model_dir)
    model = VisionEncoderDecoderModel.from_pretrained(model_dir)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    return {"model": model, "processor": processor, "device": device}


def input_fn(request_body, request_content_type):
    """Deserialize and prepare the prediction input"""
    if request_content_type == "application/json":
        import json

        input_data = json.loads(request_body)

        # Decode base64 image
        image_data = base64.b64decode(input_data["image"])
        image = Image.open(io.BytesIO(image_data)).convert("RGB")

        return {
            "image": image,
            "task_prompt": input_data.get("task_prompt", "<s_rvlcdip>"),
        }
    raise ValueError(f"Unsupported content type: {request_content_type}")


def predict_fn(input_data, model_dict):
    """Make a prediction using the input data"""
    model = model_dict["model"]
    processor = model_dict["processor"]
    device = model_dict["device"]

    # Process image
    pixel_values = processor(input_data["image"], return_tensors="pt").pixel_values

    # Prepare decoder inputs
    decoder_input_ids = processor.tokenizer(
        input_data["task_prompt"], add_special_tokens=False, return_tensors="pt"
    ).input_ids

    # Generate output
    outputs = model.generate(
        pixel_values.to(device),
        decoder_input_ids=decoder_input_ids.to(device),
        max_length=model.decoder.config.max_position_embeddings,
        pad_token_id=processor.tokenizer.pad_token_id,
        eos_token_id=processor.tokenizer.eos_token_id,
        use_cache=True,
        bad_words_ids=[[processor.tokenizer.unk_token_id]],
        return_dict_in_generate=True,
    )

    # Process output
    sequence = processor.batch_decode(outputs.sequences)[0]
    sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(
        processor.tokenizer.pad_token, ""
    )
    sequence = re.sub(r"<.*?>", "", sequence, count=1).strip()

    result = processor.token2json(sequence)
    return result


def output_fn(prediction, response_content_type):
    """Serialize and prepare the prediction output"""
    if response_content_type == "application/json":
        import json

        return json.dumps(prediction)
    raise ValueError(f"Unsupported content type: {response_content_type}")

I keep getting this error:

ValueError: Unrecognized feature extractor in /.sagemaker/mms/models/naver-clova-ix__donut-base-finetuned-rvlcdip. Should have a `feature_extractor_type` key in its preprocessor_config.json of config.json, or one of the following `model_type` keys in its config.json: audio-spectrogram-transformer, beit, chinese_clip, clap, clip, clipseg, clvp, conditional_detr, convnext, cvt, data2vec-audio, data2vec-vision, deformable_detr, deit, detr, dinat, donut-swin, dpt, encodec, flava, glpn, groupvit, hubert, imagegpt, layoutlmv2, layoutlmv3, levit, maskformer, mctct, mobilenet_v1, mobilenet_v2, mobilevit, nat, owlvit, perceiver, poolformer, pop2piano, regnet, resnet, seamless_m4t, seamless_m4t_v2, segformer, sew, sew-d, speech_to_text, speecht5, swiftformer, swin, swinv2, table-transformer, timesformer, tvlt, unispeech, unispeech-sat, univnet, van, videomae, vilt, vit, vit_mae, vit_msn, wav2vec2, wav2vec2-bert, wav2vec2-conformer, wavlm, whisper, xclip, yolos

I have just recently started working with ML and would greatly appreciate your support!

Sign up or log in to comment