deployment error with lmdeploy: RuntimeError: Could not find model architecture from config
#10
by
ismailyenigul
- opened
Hi,
When I try to run this model with lmdeploy as a pod on AWS EKS , getting the following error:
# lmdeploy serve api_server bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF
File "/opt/lmdeploy/lmdeploy/cli/entrypoint.py", line 42, in run
args.run(args)
File "/opt/lmdeploy/lmdeploy/cli/serve.py", line 303, in api_server
backend = autoget_backend(args.model_path)
File "/opt/lmdeploy/lmdeploy/archs.py", line 42, in autoget_backend
turbomind_has = is_supported_turbomind(model_path)
File "/opt/lmdeploy/lmdeploy/turbomind/supported_models.py", line 86, in is_supported
arch, cfg = get_model_arch(model_path)
File "/opt/lmdeploy/lmdeploy/archs.py", line 194, in get_model_arch
raise RuntimeError(
RuntimeError: Could not find model architecture from config: {'vocab_size': 50265, 'max_position_embeddings': 1024, 'd_model': 1024, 'encoder_ffn_dim'
: 4096, 'encoder_layers': 12, 'encoder_attention_heads': 16, 'decoder_ffn_dim': 4096, 'decoder_layers': 12, 'decoder_attention_heads': 16, 'dropout': 0.1,
'attention_dropout': 0.0, 'activation_dropout': 0.0, 'activation_function': 'gelu', 'init_std': 0.02, 'encoder_layerdrop': 0.0, 'decoder_layerdrop': 0.0,
'classifier_dropout': 0.0, 'use_cache': True, 'num_hidden_layers': 12, 'scale_embedding': False, 'return_dict': True, 'output_hidden_states': False,
'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings':
True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': True, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False,
'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1,
'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0,
'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False,
'forced_bos_token_id': None, 'forced_eos_token_id': 2, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None,
'begin_suppress_tokens': None, 'architectures': None, 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1', 2: 'LABEL_2'}, 'label2id': {'LABEL_0': 0,
'LABEL_1': 1, 'LABEL_2': 2}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 0, 'pad_token_id': 1, 'eos_token_id': 2, 'sep_token_id': None,
'decoder_start_token_id': 2, 'task_specific_params': None, 'problem_type': None, '_name_or_path': '/root/.cache/huggingface/hub/models--bartowski--
DeepSeek-R1-Distill-Qwen-32B-GGUF/snapshots/1dc8cf9ffa5dd333057ea1b09ccf4772d8726dec', '_attn_implementation_autoset': False, 'transformers_version': '4.48.0', 'model_type': 'bart'}