small fix
Browse files- README.md +1 -1
- modeling_lsg_mbart.py +12 -4
README.md
CHANGED
@@ -9,7 +9,7 @@ pipeline_tag: fill-mask
|
|
9 |
---
|
10 |
|
11 |
# LSG model
|
12 |
-
**Transformers >= 4.
|
13 |
**This model relies on a custom modeling file, you need to add trust_remote_code=True**\
|
14 |
**See [\#13467](https://github.com/huggingface/transformers/pull/13467)**
|
15 |
|
|
|
9 |
---
|
10 |
|
11 |
# LSG model
|
12 |
+
**Transformers >= 4.36.1**\
|
13 |
**This model relies on a custom modeling file, you need to add trust_remote_code=True**\
|
14 |
**See [\#13467](https://github.com/huggingface/transformers/pull/13467)**
|
15 |
|
modeling_lsg_mbart.py
CHANGED
@@ -816,17 +816,19 @@ class LSGMBartEncoder(LSGMBartPretrainedModel, MBartEncoder):
|
|
816 |
if input_ids is not None and inputs_embeds is not None:
|
817 |
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
|
818 |
elif input_ids is not None:
|
819 |
-
|
|
|
820 |
input_ids = input_ids.view(-1, input_shape[-1])
|
821 |
elif inputs_embeds is not None:
|
822 |
-
|
823 |
else:
|
824 |
raise ValueError("You have to specify either input_ids or inputs_embeds")
|
825 |
|
826 |
if inputs_embeds is None:
|
827 |
inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale
|
828 |
|
829 |
-
|
|
|
830 |
hidden_states = inputs_embeds + embed_pos
|
831 |
|
832 |
# Add global tokens
|
@@ -922,6 +924,12 @@ class LSGMBartModel(LSGMBartPretrainedModel, MBartModel):
|
|
922 |
self.encoder = LSGMBartEncoder(config, self.shared)
|
923 |
self.decoder = MBartDecoder(config, self.shared)
|
924 |
|
|
|
|
|
|
|
|
|
|
|
|
|
925 |
# Initialize weights and apply final processing
|
926 |
self.post_init()
|
927 |
|
@@ -1091,4 +1099,4 @@ try:
|
|
1091 |
str_to_class(value.split(".")[-1]).register_for_auto_class(key)
|
1092 |
except:
|
1093 |
warn("AutoRegister isn't available, you'll have to manually copy modeling.py after .save_pretrained(...).")
|
1094 |
-
warn("Update to transformers >= 4.
|
|
|
816 |
if input_ids is not None and inputs_embeds is not None:
|
817 |
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
|
818 |
elif input_ids is not None:
|
819 |
+
input = input_ids
|
820 |
+
input_shape = input.shape
|
821 |
input_ids = input_ids.view(-1, input_shape[-1])
|
822 |
elif inputs_embeds is not None:
|
823 |
+
input = inputs_embeds[:, :, -1]
|
824 |
else:
|
825 |
raise ValueError("You have to specify either input_ids or inputs_embeds")
|
826 |
|
827 |
if inputs_embeds is None:
|
828 |
inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale
|
829 |
|
830 |
+
|
831 |
+
embed_pos = self.embed_positions(input).to(inputs_embeds.device)
|
832 |
hidden_states = inputs_embeds + embed_pos
|
833 |
|
834 |
# Add global tokens
|
|
|
924 |
self.encoder = LSGMBartEncoder(config, self.shared)
|
925 |
self.decoder = MBartDecoder(config, self.shared)
|
926 |
|
927 |
+
self._use_flash_attention_2 = config._attn_implementation == "flash_attention_2"
|
928 |
+
if self._use_flash_attention_2:
|
929 |
+
logger.warning(
|
930 |
+
"[WARNING flash-attention]: LSG doesnt support flash-attention currently"
|
931 |
+
)
|
932 |
+
|
933 |
# Initialize weights and apply final processing
|
934 |
self.post_init()
|
935 |
|
|
|
1099 |
str_to_class(value.split(".")[-1]).register_for_auto_class(key)
|
1100 |
except:
|
1101 |
warn("AutoRegister isn't available, you'll have to manually copy modeling.py after .save_pretrained(...).")
|
1102 |
+
warn("Update to transformers >= 4.36.1 to fix.")
|