You are trying to offload the whole model to the disk. Please use the `disk_offload` function instead.

#16
by proto2024 - opened

I am using the V100 model and the following error is reported. How can I solve it?

os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
path = "OpenGVLab/InternVL-Chat-V1-5"
# If you have an 80G A100 GPU, you can put the entire model on a single GPU.
model = AutoModel.from_pretrained(
    path,
    torch_dtype=torch.bfloat16,
    low_cpu_mem_usage=True,
    trust_remote_code=True,
    device_map='auto').eval()


tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True)
# set the max number of tiles in `max_num`
pixel_values = load_image('./test.png', max_num=6).to(torch.bfloat16).cuda()

generation_config = dict(
    num_beams=1,
    max_new_tokens=512,
    do_sample=False,
)

# single-round single-image conversation
question = "请详细描述图片" # Please describe the picture in detail
response = model.chat(tokenizer, pixel_values, question, generation_config)
response

ValueError Traceback (most recent call last)
Cell In[12], line 4
2 path = "OpenGVLab/InternVL-Chat-V1-5"
3 # If you have an 80G A100 GPU, you can put the entire model on a single GPU.
----> 4 model = AutoModel.from_pretrained(
5 path,
6 torch_dtype=torch.bfloat16,
7 low_cpu_mem_usage=True,
8 trust_remote_code=True,
9 device_map='auto').eval()
12 tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True)
13 # set the max number of tiles in max_num

File ~/.local/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py:556, in _BaseAutoModelClass.from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
554 else:
555 cls.register(config.class, model_class, exist_ok=True)
--> 556 return model_class.from_pretrained(
557 pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs
558 )
559 elif type(config) in cls._model_mapping.keys():
560 model_class = _get_model_class(config, cls._model_mapping)

File ~/.local/lib/python3.10/site-packages/transformers/modeling_utils.py:3558, in PreTrainedModel.from_pretrained(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, *model_args, **kwargs)
3556 if "skip_keys" in inspect.signature(dispatch_model).parameters:
3557 device_map_kwargs["skip_keys"] = model._skip_keys_device_placement
-> 3558 dispatch_model(model, **device_map_kwargs)
3560 if hf_quantizer is not None:
3561 hf_quantizer.postprocess_model(model)

File ~/.local/lib/python3.10/site-packages/accelerate/big_modeling.py:470, in dispatch_model(model, device_map, main_device, state_dict, offload_dir, offload_index, offload_buffers, skip_keys, preload_module_classes, force_hooks)
468 model.to(device)
469 else:
--> 470 raise ValueError(
471 "You are trying to offload the whole model to the disk. Please use the disk_offload function instead."
472 )
473 # Convert OrderedDict back to dict for easier usage
474 model.hf_device_map = dict(device_map)

ValueError: You are trying to offload the whole model to the disk. Please use the disk_offload function instead.

OpenGVLab org

Hello, thanks for your feedback. Our readme is now updated and you can try it out with the new code.

Sign up or log in to comment