OpenGVLab/InternVL-Chat-V1-5 · You are trying to offload the whole model to the disk. Please use the `disk

I am using the V100 model and the following error is reported. How can I solve it?

os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
path = "OpenGVLab/InternVL-Chat-V1-5"
# If you have an 80G A100 GPU, you can put the entire model on a single GPU.
model = AutoModel.from_pretrained(
    path,
    torch_dtype=torch.bfloat16,
    low_cpu_mem_usage=True,
    trust_remote_code=True,
    device_map='auto').eval()


tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True)
# set the max number of tiles in `max_num`
pixel_values = load_image('./test.png', max_num=6).to(torch.bfloat16).cuda()

generation_config = dict(
    num_beams=1,
    max_new_tokens=512,
    do_sample=False,
)

# single-round single-image conversation
question = "请详细描述图片" # Please describe the picture in detail
response = model.chat(tokenizer, pixel_values, question, generation_config)
response

ValueError Traceback (most recent call last)
Cell In[12], line 4
2 path = "OpenGVLab/InternVL-Chat-V1-5"
3 # If you have an 80G A100 GPU, you can put the entire model on a single GPU.
----> 4 model = AutoModel.from_pretrained(
5 path,
6 torch_dtype=torch.bfloat16,
7 low_cpu_mem_usage=True,
8 trust_remote_code=True,
9 device_map='auto').eval()
12 tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True)
13 # set the max number of tiles in max_num

File ~/.local/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py:556, in _BaseAutoModelClass.from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
554 else:
555 cls.register(config.class, model_class, exist_ok=True)
--> 556 return model_class.from_pretrained(
557 pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs
558 )
559 elif type(config) in cls._model_mapping.keys():
560 model_class = _get_model_class(config, cls._model_mapping)

File ~/.local/lib/python3.10/site-packages/transformers/modeling_utils.py:3558, in PreTrainedModel.from_pretrained(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, *model_args, **kwargs)
3556 if "skip_keys" in inspect.signature(dispatch_model).parameters:
3557 device_map_kwargs["skip_keys"] = model._skip_keys_device_placement
-> 3558 dispatch_model(model, **device_map_kwargs)
3560 if hf_quantizer is not None:
3561 hf_quantizer.postprocess_model(model)

File ~/.local/lib/python3.10/site-packages/accelerate/big_modeling.py:470, in dispatch_model(model, device_map, main_device, state_dict, offload_dir, offload_index, offload_buffers, skip_keys, preload_module_classes, force_hooks)
468 model.to(device)
469 else:
--> 470 raise ValueError(
471 "You are trying to offload the whole model to the disk. Please use the disk_offload function instead."
472 )
473 # Convert OrderedDict back to dict for easier usage
474 model.hf_device_map = dict(device_map)

ValueError: You are trying to offload the whole model to the disk. Please use the disk_offload function instead.

OpenGVLab
/

InternVL-Chat-V1-5

You are trying to offload the whole model to the disk. Please use the `disk_offload` function instead.