Upload folder using huggingface_hub

Browse files

Files changed (7) hide show

README.md +2 -2
config.json +1 -57
configuration_intern_vit.py +1 -1
configuration_internvl_chat.py +1 -1
conversation.py +7 -4
modeling_intern_vit.py +2 -1
modeling_internvl_chat.py +4 -0

README.md CHANGED Viewed

@@ -65,7 +65,7 @@ For more information about the pipeline parameters, please refer to [here](https
 LMDeploy's `api_server` enables models to be easily packed into services with a single command. The provided RESTful APIs are compatible with OpenAI's interfaces. Below are an example of service startup:
 ```shell
-lmdeploy serve api_server OpenGVLab/InternVL2-40B-AWQ --server-port 23333
 ```
 To use the OpenAI-style interface, you need to install OpenAI:
@@ -104,7 +104,7 @@ print(response)
 ## License
-This project is released under the MIT license, while InternLM is licensed under the Apache-2.0 license.
 ## Citation

 LMDeploy's `api_server` enables models to be easily packed into services with a single command. The provided RESTful APIs are compatible with OpenAI's interfaces. Below are an example of service startup:
 ```shell
+lmdeploy serve api_server OpenGVLab/InternVL2-40B-AWQ --backend turbomind --server-port 23333 --model-format awq
 ```
 To use the OpenAI-style interface, you need to install OpenAI:
 ## License
+This project is released under the MIT license, while InternLM2 is licensed under the Apache-2.0 license.
 ## Citation

config.json CHANGED Viewed

@@ -1,6 +1,5 @@
 {
   "_commit_hash": null,
-  "_name_or_path": "/nvme/shared/InternVL2-40B",
   "architectures": [
     "InternVLChatModel"
   ],
@@ -96,7 +95,7 @@
     "tie_word_embeddings": false,
     "tokenizer_class": null,
     "top_k": 50,
-    "top_p": null,
     "torch_dtype": "bfloat16",
     "torchscript": false,
     "transformers_version": "4.40.0",
@@ -112,91 +111,36 @@
   "select_layer": -1,
   "template": "Hermes-2",
   "torch_dtype": "float16",
-  "transformers_version": null,
   "use_backbone_lora": 0,
   "use_llm_lora": 0,
   "use_thumbnail": true,
   "vision_config": {
-    "_name_or_path": "",
-    "add_cross_attention": false,
     "architectures": [
       "InternVisionModel"
     ],
     "attention_dropout": 0.0,
-    "bad_words_ids": null,
-    "begin_suppress_tokens": null,
-    "bos_token_id": null,
-    "chunk_size_feed_forward": 0,
-    "cross_attention_hidden_size": null,
-    "decoder_start_token_id": null,
-    "diversity_penalty": 0.0,
-    "do_sample": false,
     "drop_path_rate": 0.0,
     "dropout": 0.0,
-    "early_stopping": false,
-    "encoder_no_repeat_ngram_size": 0,
-    "eos_token_id": null,
-    "exponential_decay_length_penalty": null,
-    "finetuning_task": null,
-    "forced_bos_token_id": null,
-    "forced_eos_token_id": null,
     "hidden_act": "gelu",
     "hidden_size": 3200,
-    "id2label": {
-      "0": "LABEL_0",
-      "1": "LABEL_1"
-    },
     "image_size": 448,
     "initializer_factor": 0.1,
     "initializer_range": 1e-10,
     "intermediate_size": 12800,
-    "is_decoder": false,
-    "is_encoder_decoder": false,
-    "label2id": {
-      "LABEL_0": 0,
-      "LABEL_1": 1
-    },
     "layer_norm_eps": 1e-06,
-    "length_penalty": 1.0,
-    "max_length": 20,
-    "min_length": 0,
     "model_type": "intern_vit_6b",
-    "no_repeat_ngram_size": 0,
     "norm_type": "rms_norm",
     "num_attention_heads": 25,
-    "num_beam_groups": 1,
-    "num_beams": 1,
     "num_channels": 3,
     "num_hidden_layers": 45,
-    "num_return_sequences": 1,
     "output_attentions": false,
     "output_hidden_states": false,
-    "output_scores": false,
-    "pad_token_id": null,
     "patch_size": 14,
-    "prefix": null,
-    "problem_type": null,
-    "pruned_heads": {},
     "qk_normalization": true,
     "qkv_bias": false,
-    "remove_invalid_values": false,
-    "repetition_penalty": 1.0,
     "return_dict": true,
-    "return_dict_in_generate": false,
-    "sep_token_id": null,
-    "suppress_tokens": null,
-    "task_specific_params": null,
-    "temperature": 1.0,
-    "tf_legacy_loss": false,
-    "tie_encoder_decoder": false,
-    "tie_word_embeddings": true,
-    "tokenizer_class": null,
-    "top_k": 50,
-    "top_p": 1.0,
     "torch_dtype": "bfloat16",
-    "torchscript": false,
     "transformers_version": "4.40.0",
-    "typical_p": 1.0,
     "use_bfloat16": true,
     "use_flash_attn": true
   }

 {
   "_commit_hash": null,
   "architectures": [
     "InternVLChatModel"
   ],
     "tie_word_embeddings": false,
     "tokenizer_class": null,
     "top_k": 50,
+    "top_p": 1.0,
     "torch_dtype": "bfloat16",
     "torchscript": false,
     "transformers_version": "4.40.0",
   "select_layer": -1,
   "template": "Hermes-2",
   "torch_dtype": "float16",
   "use_backbone_lora": 0,
   "use_llm_lora": 0,
   "use_thumbnail": true,
   "vision_config": {
     "architectures": [
       "InternVisionModel"
     ],
     "attention_dropout": 0.0,
     "drop_path_rate": 0.0,
     "dropout": 0.0,
     "hidden_act": "gelu",
     "hidden_size": 3200,
     "image_size": 448,
     "initializer_factor": 0.1,
     "initializer_range": 1e-10,
     "intermediate_size": 12800,
     "layer_norm_eps": 1e-06,
     "model_type": "intern_vit_6b",
     "norm_type": "rms_norm",
     "num_attention_heads": 25,
     "num_channels": 3,
     "num_hidden_layers": 45,
     "output_attentions": false,
     "output_hidden_states": false,
     "patch_size": 14,
     "qk_normalization": true,
     "qkv_bias": false,
     "return_dict": true,
     "torch_dtype": "bfloat16",
     "transformers_version": "4.40.0",
     "use_bfloat16": true,
     "use_flash_attn": true
   }

configuration_intern_vit.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # --------------------------------------------------------
 # InternVL
-# Copyright (c) 2023 OpenGVLab
 # Licensed under The MIT License [see LICENSE for details]
 # --------------------------------------------------------
 import os

 # --------------------------------------------------------
 # InternVL
+# Copyright (c) 2024 OpenGVLab
 # Licensed under The MIT License [see LICENSE for details]
 # --------------------------------------------------------
 import os

configuration_internvl_chat.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # --------------------------------------------------------
 # InternVL
-# Copyright (c) 2023 OpenGVLab
 # Licensed under The MIT License [see LICENSE for details]
 # --------------------------------------------------------

 # --------------------------------------------------------
 # InternVL
+# Copyright (c) 2024 OpenGVLab
 # Licensed under The MIT License [see LICENSE for details]
 # --------------------------------------------------------

conversation.py CHANGED Viewed

@@ -330,13 +330,16 @@ def get_conv_template(name: str) -> Conversation:
     return conv_templates[name].copy()
-# Note that for inference, using the Hermes-2 and internlm2-chat templates is equivalent.
 register_conv_template(
     Conversation(
         name='Hermes-2',
         system_template='<|im_start|>system\n{system_message}',
         # note: The new system prompt was not used here to avoid changes in benchmark performance.
-        # system_message='我是书生·万象，英文名是InternVL，是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。人工智能实验室致力于原始技术创新，开源开放，共享共创，推动科技进步和产业发展。',
         system_message='你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型，英文名叫InternVL, 是一个有用无害的人工智能助手。',
         roles=('<|im_start|>user\n', '<|im_start|>assistant\n'),
         sep_style=SeparatorStyle.MPT,
@@ -357,7 +360,7 @@ register_conv_template(
         name='internlm2-chat',
         system_template='<|im_start|>system\n{system_message}',
         # note: The new system prompt was not used here to avoid changes in benchmark performance.
-        # system_message='我是书生·万象，英文名是InternVL，是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。人工智能实验室致力于原始技术创新，开源开放，共享共创，推动科技进步和产业发展。',
         system_message='你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型，英文名叫InternVL, 是一个有用无害的人工智能助手。',
         roles=('<|im_start|>user\n', '<|im_start|>assistant\n'),
         sep_style=SeparatorStyle.MPT,
@@ -376,7 +379,7 @@ register_conv_template(
         name='phi3-chat',
         system_template='<|system|>\n{system_message}',
         # note: The new system prompt was not used here to avoid changes in benchmark performance.
-        # system_message='我是书生·万象，英文名是InternVL，是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。人工智能实验室致力于原始技术创新，开源开放，共享共创，推动科技进步和产业发展。',
         system_message='你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型，英文名叫InternVL, 是一个有用无害的人工智能助手。',
         roles=('<|user|>\n', '<|assistant|>\n'),
         sep_style=SeparatorStyle.MPT,

     return conv_templates[name].copy()
+# Both Hermes-2 and internlm2-chat are chatml-format conversation templates. The difference
+# is that during training, the preprocessing function for the Hermes-2 template doesn't add
+# <s> at the beginning of the tokenized sequence, while the internlm2-chat template does.
+# Therefore, they are completely equivalent during inference.
 register_conv_template(
     Conversation(
         name='Hermes-2',
         system_template='<|im_start|>system\n{system_message}',
         # note: The new system prompt was not used here to avoid changes in benchmark performance.
+        # system_message='我是书生·万象，英文名是InternVL，是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。',
         system_message='你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型，英文名叫InternVL, 是一个有用无害的人工智能助手。',
         roles=('<|im_start|>user\n', '<|im_start|>assistant\n'),
         sep_style=SeparatorStyle.MPT,
         name='internlm2-chat',
         system_template='<|im_start|>system\n{system_message}',
         # note: The new system prompt was not used here to avoid changes in benchmark performance.
+        # system_message='我是书生·万象，英文名是InternVL，是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。',
         system_message='你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型，英文名叫InternVL, 是一个有用无害的人工智能助手。',
         roles=('<|im_start|>user\n', '<|im_start|>assistant\n'),
         sep_style=SeparatorStyle.MPT,
         name='phi3-chat',
         system_template='<|system|>\n{system_message}',
         # note: The new system prompt was not used here to avoid changes in benchmark performance.
+        # system_message='我是书生·万象，英文名是InternVL，是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。',
         system_message='你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型，英文名叫InternVL, 是一个有用无害的人工智能助手。',
         roles=('<|user|>\n', '<|assistant|>\n'),
         sep_style=SeparatorStyle.MPT,

modeling_intern_vit.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # --------------------------------------------------------
 # InternVL
-# Copyright (c) 2023 OpenGVLab
 # Licensed under The MIT License [see LICENSE for details]
 # --------------------------------------------------------
 from typing import Optional, Tuple, Union
@@ -368,6 +368,7 @@ class InternVisionEncoder(nn.Module):
 class InternVisionModel(PreTrainedModel):
     main_input_name = 'pixel_values'
     config_class = InternVisionConfig
     _no_split_modules = ['InternVisionEncoderLayer']

 # --------------------------------------------------------
 # InternVL
+# Copyright (c) 2024 OpenGVLab
 # Licensed under The MIT License [see LICENSE for details]
 # --------------------------------------------------------
 from typing import Optional, Tuple, Union
 class InternVisionModel(PreTrainedModel):
     main_input_name = 'pixel_values'
+    _supports_flash_attn_2 = True
     config_class = InternVisionConfig
     _no_split_modules = ['InternVisionEncoderLayer']

modeling_internvl_chat.py CHANGED Viewed

@@ -33,6 +33,7 @@ def version_cmp(v1, v2, op='eq'):
 class InternVLChatModel(PreTrainedModel):
     config_class = InternVLChatConfig
     main_input_name = 'pixel_values'
     _no_split_modules = ['InternVisionModel', 'LlamaDecoderLayer']
     def __init__(self, config: InternVLChatConfig, vision_model=None, language_model=None):
@@ -73,6 +74,8 @@ class InternVLChatModel(PreTrainedModel):
         )
         self.img_context_token_id = None
     def forward(
             self,
@@ -252,6 +255,7 @@ class InternVLChatModel(PreTrainedModel):
         self.img_context_token_id = img_context_token_id
         template = get_conv_template(self.template)
         eos_token_id = tokenizer.convert_tokens_to_ids(template.sep)
         history = [] if history is None else history

 class InternVLChatModel(PreTrainedModel):
     config_class = InternVLChatConfig
     main_input_name = 'pixel_values'
+    _supports_flash_attn_2 = True
     _no_split_modules = ['InternVisionModel', 'LlamaDecoderLayer']
     def __init__(self, config: InternVLChatConfig, vision_model=None, language_model=None):
         )
         self.img_context_token_id = None
+        self.conv_template = get_conv_template(self.template)
+        self.system_message = self.conv_template.system_message
     def forward(
             self,
         self.img_context_token_id = img_context_token_id
         template = get_conv_template(self.template)
+        template.system_message = self.system_message
         eos_token_id = tokenizer.convert_tokens_to_ids(template.sep)
         history = [] if history is None else history