fix the runtimeError of in-place operation when use transformers for training
Browse filesalign the `input_embeds` with internvl2 original code [Line 167](https://github.com/OpenGVLab/InternVL/blob/74126371d0190a316109ddb22e4f2ca4405a7991/internvl_chat/internvl/model/internvl_chat/modeling_internvl_chat.py#L167), which may lead in a runtime error when using hf version code for training.
modeling_internvl_chat.py
CHANGED
@@ -101,7 +101,7 @@ class InternVLChatModel(PreTrainedModel):
|
|
101 |
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
102 |
|
103 |
image_flags = image_flags.squeeze(-1)
|
104 |
-
input_embeds = self.language_model.get_input_embeddings()(input_ids)
|
105 |
|
106 |
vit_embeds = self.extract_feature(pixel_values)
|
107 |
vit_embeds = vit_embeds[image_flags == 1]
|
|
|
101 |
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
102 |
|
103 |
image_flags = image_flags.squeeze(-1)
|
104 |
+
input_embeds = self.language_model.get_input_embeddings()(input_ids).clone()
|
105 |
|
106 |
vit_embeds = self.extract_feature(pixel_values)
|
107 |
vit_embeds = vit_embeds[image_flags == 1]
|