an error about vLLM with this model

#1
by liusssyang - opened

An error occurs when I use vLLM to call this model, and despite searching for a solution, I am unsure how to resolve it.
'''code
NFO 02-17 12:17:53 model_runner.py:1110] Starting to load model /data/LLM/model/unsloth/DeepSeek-R1-Distill-Qwen-14B-unsloth-bnb-4bit...
Loading safetensors checkpoint shards: 0% Completed | 0/3 [00:00<?, ?it/s]
ERROR 02-17 12:17:54 engine.py:389]
Traceback (most recent call last):
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/engine/multiprocessing/engine.py", line 380, in run_mp_engine
engine = MQLLMEngine.from_engine_args(engine_args=engine_args,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/engine/multiprocessing/engine.py", line 123, in from_engine_args
return cls(ipc_path=ipc_path,
^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/engine/multiprocessing/engine.py", line 75, in init
self.engine = LLMEngine(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/engine/llm_engine.py", line 273, in init
self.model_executor = executor_class(vllm_config=vllm_config, )
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/executor/executor_base.py", line 262, in init
super().init(*args, **kwargs)
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/executor/executor_base.py", line 51, in init
self._init_executor()
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/executor/mp_distributed_executor.py", line 125, in _init_executor
self._run_workers("load_model",
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/executor/mp_distributed_executor.py", line 185, in _run_workers
driver_worker_output = run_method(self.driver_worker, sent_method,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/utils.py", line 2220, in run_method
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/worker/worker.py", line 183, in load_model
self.model_runner.load_model()
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/worker/model_runner.py", line 1112, in load_model
self.model = get_model(vllm_config=self.vllm_config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/model_loader/init.py", line 14, in get_model
return loader.load_model(vllm_config=vllm_config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/model_loader/loader.py", line 386, in load_model
loaded_weights = model.load_weights(
^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/models/qwen2.py", line 515, in load_weights
return loader.load_weights(weights)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/models/utils.py", line 235, in load_weights
autoloaded_weights = set(self._load_module("", self.module, weights))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/models/utils.py", line 196, in _load_module
yield from self._load_module(prefix,
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/models/utils.py", line 173, in _load_module
loaded_params = module_load_weights(weights)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/models/qwen2.py", line 415, in load_weights
weight_loader(param, loaded_weight)
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/layers/linear.py", line 1113, in weight_loader
assert param_data.shape == loaded_weight.shape
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError
Process SpawnProcess-1:
ERROR 02-17 12:17:54 multiproc_worker_utils.py:124] Worker VllmWorkerProcess pid 2248214 died, exit code: -15
INFO 02-17 12:17:54 multiproc_worker_utils.py:128] Killing local vLLM worker processes
Traceback (most recent call last):
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/multiprocessing/process.py", line 314, in _bootstrap
self.run()
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/engine/multiprocessing/engine.py", line 391, in run_mp_engine
raise e
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/engine/multiprocessing/engine.py", line 380, in run_mp_engine
engine = MQLLMEngine.from_engine_args(engine_args=engine_args,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/engine/multiprocessing/engine.py", line 123, in from_engine_args
return cls(ipc_path=ipc_path,
^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/engine/multiprocessing/engine.py", line 75, in init
self.engine = LLMEngine(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/engine/llm_engine.py", line 273, in init
self.model_executor = executor_class(vllm_config=vllm_config, )
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/executor/executor_base.py", line 262, in init
super().init(*args, **kwargs)
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/executor/executor_base.py", line 51, in init
self._init_executor()
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/executor/mp_distributed_executor.py", line 125, in _init_executor
self._run_workers("load_model",
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/executor/mp_distributed_executor.py", line 185, in _run_workers
driver_worker_output = run_method(self.driver_worker, sent_method,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/utils.py", line 2220, in run_method
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/worker/worker.py", line 183, in load_model
self.model_runner.load_model()
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/worker/model_runner.py", line 1112, in load_model
self.model = get_model(vllm_config=self.vllm_config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/model_loader/init.py", line 14, in get_model
return loader.load_model(vllm_config=vllm_config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/model_loader/loader.py", line 386, in load_model
loaded_weights = model.load_weights(
^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/models/qwen2.py", line 515, in load_weights
return loader.load_weights(weights)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/models/utils.py", line 235, in load_weights
autoloaded_weights = set(self._load_module("", self.module, weights))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/models/utils.py", line 196, in _load_module
yield from self._load_module(prefix,
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/models/utils.py", line 173, in _load_module
loaded_params = module_load_weights(weights)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/models/qwen2.py", line 415, in load_weights
weight_loader(param, loaded_weight)
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/layers/linear.py", line 1113, in weight_loader
assert param_data.shape == loaded_weight.shape
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError
Loading safetensors checkpoint shards: 0% Completed | 0/3 [00:00<?, ?it/s]

[rank0]:[W217 12:17:54.964333168 ProcessGroupNCCL.cpp:1250] Warning: WARNING: process group has NOT been destroyed before we destruct ProcessGroupNCCL. On normal program exit, the application should call destroy_process_group to ensure that any pending NCCL operations have finished in this process. In rare cases this process can exit before this point and block the progress of another member of the process group. This constraint has always been present, but this warning has only been added since PyTorch 2.4 (function operator())
Traceback (most recent call last):
File "/data/home/jhc258/conda_envs/vllm/bin/vllm", line 8, in
sys.exit(main())
^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/scripts.py", line 204, in main
args.dispatch_function(args)
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/scripts.py", line 44, in serve
uvloop.run(run_server(args))
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/uvloop/init.py", line 109, in run
return __asyncio.run(
^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/asyncio/runners.py", line 194, in run
return runner.run(main)
^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/asyncio/runners.py", line 118, in run
return self._loop.run_until_complete(task)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "uvloop/loop.pyx", line 1518, in uvloop.loop.Loop.run_until_complete
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/uvloop/init.py", line 61, in wrapper
return await main
^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/entrypoints/openai/api_server.py", line 875, in run_server
async with build_async_engine_client(args) as engine_client:
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/contextlib.py", line 210, in aenter
return await anext(self.gen)
^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/entrypoints/openai/api_server.py", line 136, in build_async_engine_client
async with build_async_engine_client_from_engine_args(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/contextlib.py", line 210, in aenter
return await anext(self.gen)
^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/entrypoints/openai/api_server.py", line 230, in build_async_engine_client_from_engine_args
raise RuntimeError(
RuntimeError: Engine process failed to start. See stack trace for the root cause.
/data/home/jhc258/conda_envs/vllm/lib/python3.12/multiprocessing/resource_tracker.py:254: UserWarning: resource_tracker: There appear to be 1 leaked semaphore objects to clean up at shutdown
warnings.warn('resource_tracker: There appear to be %d '
/data/home/jhc258/conda_envs/vllm/lib/python3.12/multiprocessing/resource_tracker.py:254: UserWarning: resource_tracker: There appear to be 1 leaked shared_memory objects to clean up at shutdown
warnings.warn('resource_tracker: There appear to be %d '
'''

An error occurs when I use vLLM to call this model, and despite searching for a solution, I am unsure how to resolve it.
'''code
NFO 02-17 12:17:53 model_runner.py:1110] Starting to load model /data/LLM/model/unsloth/DeepSeek-R1-Distill-Qwen-14B-unsloth-bnb-4bit...
Loading safetensors checkpoint shards: 0% Completed | 0/3 [00:00<?, ?it/s]
ERROR 02-17 12:17:54 engine.py:389]
Traceback (most recent call last):
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/engine/multiprocessing/engine.py", line 380, in run_mp_engine
engine = MQLLMEngine.from_engine_args(engine_args=engine_args,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/engine/multiprocessing/engine.py", line 123, in from_engine_args
return cls(ipc_path=ipc_path,
^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/engine/multiprocessing/engine.py", line 75, in init
self.engine = LLMEngine(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/engine/llm_engine.py", line 273, in init
self.model_executor = executor_class(vllm_config=vllm_config, )
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/executor/executor_base.py", line 262, in init
super().init(*args, **kwargs)
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/executor/executor_base.py", line 51, in init
self._init_executor()
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/executor/mp_distributed_executor.py", line 125, in _init_executor
self._run_workers("load_model",
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/executor/mp_distributed_executor.py", line 185, in _run_workers
driver_worker_output = run_method(self.driver_worker, sent_method,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/utils.py", line 2220, in run_method
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/worker/worker.py", line 183, in load_model
self.model_runner.load_model()
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/worker/model_runner.py", line 1112, in load_model
self.model = get_model(vllm_config=self.vllm_config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/model_loader/init.py", line 14, in get_model
return loader.load_model(vllm_config=vllm_config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/model_loader/loader.py", line 386, in load_model
loaded_weights = model.load_weights(
^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/models/qwen2.py", line 515, in load_weights
return loader.load_weights(weights)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/models/utils.py", line 235, in load_weights
autoloaded_weights = set(self._load_module("", self.module, weights))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/models/utils.py", line 196, in _load_module
yield from self._load_module(prefix,
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/models/utils.py", line 173, in _load_module
loaded_params = module_load_weights(weights)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/models/qwen2.py", line 415, in load_weights
weight_loader(param, loaded_weight)
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/layers/linear.py", line 1113, in weight_loader
assert param_data.shape == loaded_weight.shape
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError
Process SpawnProcess-1:
ERROR 02-17 12:17:54 multiproc_worker_utils.py:124] Worker VllmWorkerProcess pid 2248214 died, exit code: -15
INFO 02-17 12:17:54 multiproc_worker_utils.py:128] Killing local vLLM worker processes
Traceback (most recent call last):
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/multiprocessing/process.py", line 314, in _bootstrap
self.run()
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/engine/multiprocessing/engine.py", line 391, in run_mp_engine
raise e
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/engine/multiprocessing/engine.py", line 380, in run_mp_engine
engine = MQLLMEngine.from_engine_args(engine_args=engine_args,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/engine/multiprocessing/engine.py", line 123, in from_engine_args
return cls(ipc_path=ipc_path,
^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/engine/multiprocessing/engine.py", line 75, in init
self.engine = LLMEngine(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/engine/llm_engine.py", line 273, in init
self.model_executor = executor_class(vllm_config=vllm_config, )
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/executor/executor_base.py", line 262, in init
super().init(*args, **kwargs)
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/executor/executor_base.py", line 51, in init
self._init_executor()
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/executor/mp_distributed_executor.py", line 125, in _init_executor
self._run_workers("load_model",
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/executor/mp_distributed_executor.py", line 185, in _run_workers
driver_worker_output = run_method(self.driver_worker, sent_method,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/utils.py", line 2220, in run_method
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/worker/worker.py", line 183, in load_model
self.model_runner.load_model()
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/worker/model_runner.py", line 1112, in load_model
self.model = get_model(vllm_config=self.vllm_config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/model_loader/init.py", line 14, in get_model
return loader.load_model(vllm_config=vllm_config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/model_loader/loader.py", line 386, in load_model
loaded_weights = model.load_weights(
^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/models/qwen2.py", line 515, in load_weights
return loader.load_weights(weights)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/models/utils.py", line 235, in load_weights
autoloaded_weights = set(self._load_module("", self.module, weights))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/models/utils.py", line 196, in _load_module
yield from self._load_module(prefix,
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/models/utils.py", line 173, in _load_module
loaded_params = module_load_weights(weights)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/models/qwen2.py", line 415, in load_weights
weight_loader(param, loaded_weight)
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/layers/linear.py", line 1113, in weight_loader
assert param_data.shape == loaded_weight.shape
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError
Loading safetensors checkpoint shards: 0% Completed | 0/3 [00:00<?, ?it/s]

[rank0]:[W217 12:17:54.964333168 ProcessGroupNCCL.cpp:1250] Warning: WARNING: process group has NOT been destroyed before we destruct ProcessGroupNCCL. On normal program exit, the application should call destroy_process_group to ensure that any pending NCCL operations have finished in this process. In rare cases this process can exit before this point and block the progress of another member of the process group. This constraint has always been present, but this warning has only been added since PyTorch 2.4 (function operator())
Traceback (most recent call last):
File "/data/home/jhc258/conda_envs/vllm/bin/vllm", line 8, in
sys.exit(main())
^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/scripts.py", line 204, in main
args.dispatch_function(args)
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/scripts.py", line 44, in serve
uvloop.run(run_server(args))
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/uvloop/init.py", line 109, in run
return __asyncio.run(
^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/asyncio/runners.py", line 194, in run
return runner.run(main)
^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/asyncio/runners.py", line 118, in run
return self._loop.run_until_complete(task)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "uvloop/loop.pyx", line 1518, in uvloop.loop.Loop.run_until_complete
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/uvloop/init.py", line 61, in wrapper
return await main
^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/entrypoints/openai/api_server.py", line 875, in run_server
async with build_async_engine_client(args) as engine_client:
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/contextlib.py", line 210, in aenter
return await anext(self.gen)
^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/entrypoints/openai/api_server.py", line 136, in build_async_engine_client
async with build_async_engine_client_from_engine_args(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/contextlib.py", line 210, in aenter
return await anext(self.gen)
^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/entrypoints/openai/api_server.py", line 230, in build_async_engine_client_from_engine_args
raise RuntimeError(
RuntimeError: Engine process failed to start. See stack trace for the root cause.
/data/home/jhc258/conda_envs/vllm/lib/python3.12/multiprocessing/resource_tracker.py:254: UserWarning: resource_tracker: There appear to be 1 leaked semaphore objects to clean up at shutdown
warnings.warn('resource_tracker: There appear to be %d '
/data/home/jhc258/conda_envs/vllm/lib/python3.12/multiprocessing/resource_tracker.py:254: UserWarning: resource_tracker: There appear to be 1 leaked shared_memory objects to clean up at shutdown
warnings.warn('resource_tracker: There appear to be %d '
'''

Same ERROR
Is there a quantitative model that vLLM can run ???
I've been looking for it for a long time!!!!

Maybe you can try:

  1. DeepSeek-R1-Distill-Qwen-14B-quantized-w4a16
  2. DeepSeek-R1-Distill-Qwen-14B-Int8-W8A16

An error occurs when I use vLLM to call this model, and despite searching for a solution, I am unsure how to resolve it.
'''code
NFO 02-17 12:17:53 model_runner.py:1110] Starting to load model /data/LLM/model/unsloth/DeepSeek-R1-Distill-Qwen-14B-unsloth-bnb-4bit...
Loading safetensors checkpoint shards: 0% Completed | 0/3 [00:00<?, ?it/s]
ERROR 02-17 12:17:54 engine.py:389]
Traceback (most recent call last):
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/engine/multiprocessing/engine.py", line 380, in run_mp_engine
engine = MQLLMEngine.from_engine_args(engine_args=engine_args,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/engine/multiprocessing/engine.py", line 123, in from_engine_args
return cls(ipc_path=ipc_path,
^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/engine/multiprocessing/engine.py", line 75, in init
self.engine = LLMEngine(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/engine/llm_engine.py", line 273, in init
self.model_executor = executor_class(vllm_config=vllm_config, )
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/executor/executor_base.py", line 262, in init
super().init(*args, **kwargs)
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/executor/executor_base.py", line 51, in init
self._init_executor()
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/executor/mp_distributed_executor.py", line 125, in _init_executor
self._run_workers("load_model",
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/executor/mp_distributed_executor.py", line 185, in _run_workers
driver_worker_output = run_method(self.driver_worker, sent_method,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/utils.py", line 2220, in run_method
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/worker/worker.py", line 183, in load_model
self.model_runner.load_model()
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/worker/model_runner.py", line 1112, in load_model
self.model = get_model(vllm_config=self.vllm_config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/model_loader/init.py", line 14, in get_model
return loader.load_model(vllm_config=vllm_config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/model_loader/loader.py", line 386, in load_model
loaded_weights = model.load_weights(
^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/models/qwen2.py", line 515, in load_weights
return loader.load_weights(weights)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/models/utils.py", line 235, in load_weights
autoloaded_weights = set(self._load_module("", self.module, weights))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/models/utils.py", line 196, in _load_module
yield from self._load_module(prefix,
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/models/utils.py", line 173, in _load_module
loaded_params = module_load_weights(weights)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/models/qwen2.py", line 415, in load_weights
weight_loader(param, loaded_weight)
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/layers/linear.py", line 1113, in weight_loader
assert param_data.shape == loaded_weight.shape
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError
Process SpawnProcess-1:
ERROR 02-17 12:17:54 multiproc_worker_utils.py:124] Worker VllmWorkerProcess pid 2248214 died, exit code: -15
INFO 02-17 12:17:54 multiproc_worker_utils.py:128] Killing local vLLM worker processes
Traceback (most recent call last):
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/multiprocessing/process.py", line 314, in _bootstrap
self.run()
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/engine/multiprocessing/engine.py", line 391, in run_mp_engine
raise e
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/engine/multiprocessing/engine.py", line 380, in run_mp_engine
engine = MQLLMEngine.from_engine_args(engine_args=engine_args,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/engine/multiprocessing/engine.py", line 123, in from_engine_args
return cls(ipc_path=ipc_path,
^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/engine/multiprocessing/engine.py", line 75, in init
self.engine = LLMEngine(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/engine/llm_engine.py", line 273, in init
self.model_executor = executor_class(vllm_config=vllm_config, )
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/executor/executor_base.py", line 262, in init
super().init(*args, **kwargs)
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/executor/executor_base.py", line 51, in init
self._init_executor()
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/executor/mp_distributed_executor.py", line 125, in _init_executor
self._run_workers("load_model",
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/executor/mp_distributed_executor.py", line 185, in _run_workers
driver_worker_output = run_method(self.driver_worker, sent_method,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/utils.py", line 2220, in run_method
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/worker/worker.py", line 183, in load_model
self.model_runner.load_model()
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/worker/model_runner.py", line 1112, in load_model
self.model = get_model(vllm_config=self.vllm_config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/model_loader/init.py", line 14, in get_model
return loader.load_model(vllm_config=vllm_config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/model_loader/loader.py", line 386, in load_model
loaded_weights = model.load_weights(
^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/models/qwen2.py", line 515, in load_weights
return loader.load_weights(weights)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/models/utils.py", line 235, in load_weights
autoloaded_weights = set(self._load_module("", self.module, weights))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/models/utils.py", line 196, in _load_module
yield from self._load_module(prefix,
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/models/utils.py", line 173, in _load_module
loaded_params = module_load_weights(weights)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/models/qwen2.py", line 415, in load_weights
weight_loader(param, loaded_weight)
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/model_executor/layers/linear.py", line 1113, in weight_loader
assert param_data.shape == loaded_weight.shape
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError
Loading safetensors checkpoint shards: 0% Completed | 0/3 [00:00<?, ?it/s]

[rank0]:[W217 12:17:54.964333168 ProcessGroupNCCL.cpp:1250] Warning: WARNING: process group has NOT been destroyed before we destruct ProcessGroupNCCL. On normal program exit, the application should call destroy_process_group to ensure that any pending NCCL operations have finished in this process. In rare cases this process can exit before this point and block the progress of another member of the process group. This constraint has always been present, but this warning has only been added since PyTorch 2.4 (function operator())
Traceback (most recent call last):
File "/data/home/jhc258/conda_envs/vllm/bin/vllm", line 8, in
sys.exit(main())
^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/scripts.py", line 204, in main
args.dispatch_function(args)
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/scripts.py", line 44, in serve
uvloop.run(run_server(args))
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/uvloop/init.py", line 109, in run
return __asyncio.run(
^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/asyncio/runners.py", line 194, in run
return runner.run(main)
^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/asyncio/runners.py", line 118, in run
return self._loop.run_until_complete(task)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "uvloop/loop.pyx", line 1518, in uvloop.loop.Loop.run_until_complete
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/uvloop/init.py", line 61, in wrapper
return await main
^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/entrypoints/openai/api_server.py", line 875, in run_server
async with build_async_engine_client(args) as engine_client:
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/contextlib.py", line 210, in aenter
return await anext(self.gen)
^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/entrypoints/openai/api_server.py", line 136, in build_async_engine_client
async with build_async_engine_client_from_engine_args(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/contextlib.py", line 210, in aenter
return await anext(self.gen)
^^^^^^^^^^^^^^^^^^^^^
File "/data/home/jhc258/conda_envs/vllm/lib/python3.12/site-packages/vllm/entrypoints/openai/api_server.py", line 230, in build_async_engine_client_from_engine_args
raise RuntimeError(
RuntimeError: Engine process failed to start. See stack trace for the root cause.
/data/home/jhc258/conda_envs/vllm/lib/python3.12/multiprocessing/resource_tracker.py:254: UserWarning: resource_tracker: There appear to be 1 leaked semaphore objects to clean up at shutdown
warnings.warn('resource_tracker: There appear to be %d '
/data/home/jhc258/conda_envs/vllm/lib/python3.12/multiprocessing/resource_tracker.py:254: UserWarning: resource_tracker: There appear to be 1 leaked shared_memory objects to clean up at shutdown
warnings.warn('resource_tracker: There appear to be %d '
'''

Same ERROR
Is there a quantitative model that vLLM can run ???
I've been looking for it for a long time!!!!

Maybe you can try:

  1. DeepSeek-R1-Distill-Qwen-14B-quantized-w4a16
  2. DeepSeek-R1-Distill-Qwen-14B-Int8-W8A16

Sign up or log in to comment