cuierfei commited on
Commit
4f1df8c
·
verified ·
1 Parent(s): 681aacb

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -65,7 +65,7 @@ For more information about the pipeline parameters, please refer to [here](https
65
  LMDeploy's `api_server` enables models to be easily packed into services with a single command. The provided RESTful APIs are compatible with OpenAI's interfaces. Below are an example of service startup:
66
 
67
  ```shell
68
- lmdeploy serve api_server OpenGVLab/InternVL2-40B-AWQ --server-port 23333
69
  ```
70
 
71
  To use the OpenAI-style interface, you need to install OpenAI:
@@ -104,7 +104,7 @@ print(response)
104
 
105
  ## License
106
 
107
- This project is released under the MIT license, while InternLM is licensed under the Apache-2.0 license.
108
 
109
  ## Citation
110
 
 
65
  LMDeploy's `api_server` enables models to be easily packed into services with a single command. The provided RESTful APIs are compatible with OpenAI's interfaces. Below are an example of service startup:
66
 
67
  ```shell
68
+ lmdeploy serve api_server OpenGVLab/InternVL2-40B-AWQ --backend turbomind --server-port 23333 --model-format awq
69
  ```
70
 
71
  To use the OpenAI-style interface, you need to install OpenAI:
 
104
 
105
  ## License
106
 
107
+ This project is released under the MIT license, while InternLM2 is licensed under the Apache-2.0 license.
108
 
109
  ## Citation
110
 
config.json CHANGED
@@ -1,6 +1,5 @@
1
  {
2
  "_commit_hash": null,
3
- "_name_or_path": "/nvme/shared/InternVL2-40B",
4
  "architectures": [
5
  "InternVLChatModel"
6
  ],
@@ -96,7 +95,7 @@
96
  "tie_word_embeddings": false,
97
  "tokenizer_class": null,
98
  "top_k": 50,
99
- "top_p": null,
100
  "torch_dtype": "bfloat16",
101
  "torchscript": false,
102
  "transformers_version": "4.40.0",
@@ -112,91 +111,36 @@
112
  "select_layer": -1,
113
  "template": "Hermes-2",
114
  "torch_dtype": "float16",
115
- "transformers_version": null,
116
  "use_backbone_lora": 0,
117
  "use_llm_lora": 0,
118
  "use_thumbnail": true,
119
  "vision_config": {
120
- "_name_or_path": "",
121
- "add_cross_attention": false,
122
  "architectures": [
123
  "InternVisionModel"
124
  ],
125
  "attention_dropout": 0.0,
126
- "bad_words_ids": null,
127
- "begin_suppress_tokens": null,
128
- "bos_token_id": null,
129
- "chunk_size_feed_forward": 0,
130
- "cross_attention_hidden_size": null,
131
- "decoder_start_token_id": null,
132
- "diversity_penalty": 0.0,
133
- "do_sample": false,
134
  "drop_path_rate": 0.0,
135
  "dropout": 0.0,
136
- "early_stopping": false,
137
- "encoder_no_repeat_ngram_size": 0,
138
- "eos_token_id": null,
139
- "exponential_decay_length_penalty": null,
140
- "finetuning_task": null,
141
- "forced_bos_token_id": null,
142
- "forced_eos_token_id": null,
143
  "hidden_act": "gelu",
144
  "hidden_size": 3200,
145
- "id2label": {
146
- "0": "LABEL_0",
147
- "1": "LABEL_1"
148
- },
149
  "image_size": 448,
150
  "initializer_factor": 0.1,
151
  "initializer_range": 1e-10,
152
  "intermediate_size": 12800,
153
- "is_decoder": false,
154
- "is_encoder_decoder": false,
155
- "label2id": {
156
- "LABEL_0": 0,
157
- "LABEL_1": 1
158
- },
159
  "layer_norm_eps": 1e-06,
160
- "length_penalty": 1.0,
161
- "max_length": 20,
162
- "min_length": 0,
163
  "model_type": "intern_vit_6b",
164
- "no_repeat_ngram_size": 0,
165
  "norm_type": "rms_norm",
166
  "num_attention_heads": 25,
167
- "num_beam_groups": 1,
168
- "num_beams": 1,
169
  "num_channels": 3,
170
  "num_hidden_layers": 45,
171
- "num_return_sequences": 1,
172
  "output_attentions": false,
173
  "output_hidden_states": false,
174
- "output_scores": false,
175
- "pad_token_id": null,
176
  "patch_size": 14,
177
- "prefix": null,
178
- "problem_type": null,
179
- "pruned_heads": {},
180
  "qk_normalization": true,
181
  "qkv_bias": false,
182
- "remove_invalid_values": false,
183
- "repetition_penalty": 1.0,
184
  "return_dict": true,
185
- "return_dict_in_generate": false,
186
- "sep_token_id": null,
187
- "suppress_tokens": null,
188
- "task_specific_params": null,
189
- "temperature": 1.0,
190
- "tf_legacy_loss": false,
191
- "tie_encoder_decoder": false,
192
- "tie_word_embeddings": true,
193
- "tokenizer_class": null,
194
- "top_k": 50,
195
- "top_p": 1.0,
196
  "torch_dtype": "bfloat16",
197
- "torchscript": false,
198
  "transformers_version": "4.40.0",
199
- "typical_p": 1.0,
200
  "use_bfloat16": true,
201
  "use_flash_attn": true
202
  }
 
1
  {
2
  "_commit_hash": null,
 
3
  "architectures": [
4
  "InternVLChatModel"
5
  ],
 
95
  "tie_word_embeddings": false,
96
  "tokenizer_class": null,
97
  "top_k": 50,
98
+ "top_p": 1.0,
99
  "torch_dtype": "bfloat16",
100
  "torchscript": false,
101
  "transformers_version": "4.40.0",
 
111
  "select_layer": -1,
112
  "template": "Hermes-2",
113
  "torch_dtype": "float16",
 
114
  "use_backbone_lora": 0,
115
  "use_llm_lora": 0,
116
  "use_thumbnail": true,
117
  "vision_config": {
 
 
118
  "architectures": [
119
  "InternVisionModel"
120
  ],
121
  "attention_dropout": 0.0,
 
 
 
 
 
 
 
 
122
  "drop_path_rate": 0.0,
123
  "dropout": 0.0,
 
 
 
 
 
 
 
124
  "hidden_act": "gelu",
125
  "hidden_size": 3200,
 
 
 
 
126
  "image_size": 448,
127
  "initializer_factor": 0.1,
128
  "initializer_range": 1e-10,
129
  "intermediate_size": 12800,
 
 
 
 
 
 
130
  "layer_norm_eps": 1e-06,
 
 
 
131
  "model_type": "intern_vit_6b",
 
132
  "norm_type": "rms_norm",
133
  "num_attention_heads": 25,
 
 
134
  "num_channels": 3,
135
  "num_hidden_layers": 45,
 
136
  "output_attentions": false,
137
  "output_hidden_states": false,
 
 
138
  "patch_size": 14,
 
 
 
139
  "qk_normalization": true,
140
  "qkv_bias": false,
 
 
141
  "return_dict": true,
 
 
 
 
 
 
 
 
 
 
 
142
  "torch_dtype": "bfloat16",
 
143
  "transformers_version": "4.40.0",
 
144
  "use_bfloat16": true,
145
  "use_flash_attn": true
146
  }
configuration_intern_vit.py CHANGED
@@ -1,6 +1,6 @@
1
  # --------------------------------------------------------
2
  # InternVL
3
- # Copyright (c) 2023 OpenGVLab
4
  # Licensed under The MIT License [see LICENSE for details]
5
  # --------------------------------------------------------
6
  import os
 
1
  # --------------------------------------------------------
2
  # InternVL
3
+ # Copyright (c) 2024 OpenGVLab
4
  # Licensed under The MIT License [see LICENSE for details]
5
  # --------------------------------------------------------
6
  import os
configuration_internvl_chat.py CHANGED
@@ -1,6 +1,6 @@
1
  # --------------------------------------------------------
2
  # InternVL
3
- # Copyright (c) 2023 OpenGVLab
4
  # Licensed under The MIT License [see LICENSE for details]
5
  # --------------------------------------------------------
6
 
 
1
  # --------------------------------------------------------
2
  # InternVL
3
+ # Copyright (c) 2024 OpenGVLab
4
  # Licensed under The MIT License [see LICENSE for details]
5
  # --------------------------------------------------------
6
 
conversation.py CHANGED
@@ -330,13 +330,16 @@ def get_conv_template(name: str) -> Conversation:
330
  return conv_templates[name].copy()
331
 
332
 
333
- # Note that for inference, using the Hermes-2 and internlm2-chat templates is equivalent.
 
 
 
334
  register_conv_template(
335
  Conversation(
336
  name='Hermes-2',
337
  system_template='<|im_start|>system\n{system_message}',
338
  # note: The new system prompt was not used here to avoid changes in benchmark performance.
339
- # system_message='我是书生·万象,英文名是InternVL,是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。人工智能实验室致力于原始技术创新,开源开放,共享共创,推动科技进步和产业发展。',
340
  system_message='你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型,英文名叫InternVL, 是一个有用无害的人工智能助手。',
341
  roles=('<|im_start|>user\n', '<|im_start|>assistant\n'),
342
  sep_style=SeparatorStyle.MPT,
@@ -357,7 +360,7 @@ register_conv_template(
357
  name='internlm2-chat',
358
  system_template='<|im_start|>system\n{system_message}',
359
  # note: The new system prompt was not used here to avoid changes in benchmark performance.
360
- # system_message='我是书生·万象,英文名是InternVL,是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。人工智能实验室致力于原始技术创新,开源开放,共享共创,推动科技进步和产业发展。',
361
  system_message='你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型,英文名叫InternVL, 是一个有用无害的人工智能助手。',
362
  roles=('<|im_start|>user\n', '<|im_start|>assistant\n'),
363
  sep_style=SeparatorStyle.MPT,
@@ -376,7 +379,7 @@ register_conv_template(
376
  name='phi3-chat',
377
  system_template='<|system|>\n{system_message}',
378
  # note: The new system prompt was not used here to avoid changes in benchmark performance.
379
- # system_message='我是书生·万象,英文名是InternVL,是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。人工智能实验室致力于原始技术创新,开源开放,共享共创,推动科技进步和产业发展。',
380
  system_message='你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型,英文名叫InternVL, 是一个有用无害的人工智能助手。',
381
  roles=('<|user|>\n', '<|assistant|>\n'),
382
  sep_style=SeparatorStyle.MPT,
 
330
  return conv_templates[name].copy()
331
 
332
 
333
+ # Both Hermes-2 and internlm2-chat are chatml-format conversation templates. The difference
334
+ # is that during training, the preprocessing function for the Hermes-2 template doesn't add
335
+ # <s> at the beginning of the tokenized sequence, while the internlm2-chat template does.
336
+ # Therefore, they are completely equivalent during inference.
337
  register_conv_template(
338
  Conversation(
339
  name='Hermes-2',
340
  system_template='<|im_start|>system\n{system_message}',
341
  # note: The new system prompt was not used here to avoid changes in benchmark performance.
342
+ # system_message='我是书生·万象,英文名是InternVL,是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。',
343
  system_message='你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型,英文名叫InternVL, 是一个有用无害的人工智能助手。',
344
  roles=('<|im_start|>user\n', '<|im_start|>assistant\n'),
345
  sep_style=SeparatorStyle.MPT,
 
360
  name='internlm2-chat',
361
  system_template='<|im_start|>system\n{system_message}',
362
  # note: The new system prompt was not used here to avoid changes in benchmark performance.
363
+ # system_message='我是书生·万象,英文名是InternVL,是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。',
364
  system_message='你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型,英文名叫InternVL, 是一个有用无害的人工智能助手。',
365
  roles=('<|im_start|>user\n', '<|im_start|>assistant\n'),
366
  sep_style=SeparatorStyle.MPT,
 
379
  name='phi3-chat',
380
  system_template='<|system|>\n{system_message}',
381
  # note: The new system prompt was not used here to avoid changes in benchmark performance.
382
+ # system_message='我是书生·万象,英文名是InternVL,是由上海人工智能实验室及多家合作单位联合开发的多模态大语言模型。',
383
  system_message='你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型,英文名叫InternVL, 是一个有用无害的人工智能助手。',
384
  roles=('<|user|>\n', '<|assistant|>\n'),
385
  sep_style=SeparatorStyle.MPT,
modeling_intern_vit.py CHANGED
@@ -1,6 +1,6 @@
1
  # --------------------------------------------------------
2
  # InternVL
3
- # Copyright (c) 2023 OpenGVLab
4
  # Licensed under The MIT License [see LICENSE for details]
5
  # --------------------------------------------------------
6
  from typing import Optional, Tuple, Union
@@ -368,6 +368,7 @@ class InternVisionEncoder(nn.Module):
368
 
369
  class InternVisionModel(PreTrainedModel):
370
  main_input_name = 'pixel_values'
 
371
  config_class = InternVisionConfig
372
  _no_split_modules = ['InternVisionEncoderLayer']
373
 
 
1
  # --------------------------------------------------------
2
  # InternVL
3
+ # Copyright (c) 2024 OpenGVLab
4
  # Licensed under The MIT License [see LICENSE for details]
5
  # --------------------------------------------------------
6
  from typing import Optional, Tuple, Union
 
368
 
369
  class InternVisionModel(PreTrainedModel):
370
  main_input_name = 'pixel_values'
371
+ _supports_flash_attn_2 = True
372
  config_class = InternVisionConfig
373
  _no_split_modules = ['InternVisionEncoderLayer']
374
 
modeling_internvl_chat.py CHANGED
@@ -33,6 +33,7 @@ def version_cmp(v1, v2, op='eq'):
33
  class InternVLChatModel(PreTrainedModel):
34
  config_class = InternVLChatConfig
35
  main_input_name = 'pixel_values'
 
36
  _no_split_modules = ['InternVisionModel', 'LlamaDecoderLayer']
37
 
38
  def __init__(self, config: InternVLChatConfig, vision_model=None, language_model=None):
@@ -73,6 +74,8 @@ class InternVLChatModel(PreTrainedModel):
73
  )
74
 
75
  self.img_context_token_id = None
 
 
76
 
77
  def forward(
78
  self,
@@ -252,6 +255,7 @@ class InternVLChatModel(PreTrainedModel):
252
  self.img_context_token_id = img_context_token_id
253
 
254
  template = get_conv_template(self.template)
 
255
  eos_token_id = tokenizer.convert_tokens_to_ids(template.sep)
256
 
257
  history = [] if history is None else history
 
33
  class InternVLChatModel(PreTrainedModel):
34
  config_class = InternVLChatConfig
35
  main_input_name = 'pixel_values'
36
+ _supports_flash_attn_2 = True
37
  _no_split_modules = ['InternVisionModel', 'LlamaDecoderLayer']
38
 
39
  def __init__(self, config: InternVLChatConfig, vision_model=None, language_model=None):
 
74
  )
75
 
76
  self.img_context_token_id = None
77
+ self.conv_template = get_conv_template(self.template)
78
+ self.system_message = self.conv_template.system_message
79
 
80
  def forward(
81
  self,
 
255
  self.img_context_token_id = img_context_token_id
256
 
257
  template = get_conv_template(self.template)
258
+ template.system_message = self.system_message
259
  eos_token_id = tokenizer.convert_tokens_to_ids(template.sep)
260
 
261
  history = [] if history is None else history