ccdv commited on
Commit
1e5c9b9
·
1 Parent(s): 7f98c56
Files changed (2) hide show
  1. README.md +1 -1
  2. modeling_lsg_mbart.py +12 -4
README.md CHANGED
@@ -9,7 +9,7 @@ pipeline_tag: fill-mask
9
  ---
10
 
11
  # LSG model
12
- **Transformers >= 4.35.2**\
13
  **This model relies on a custom modeling file, you need to add trust_remote_code=True**\
14
  **See [\#13467](https://github.com/huggingface/transformers/pull/13467)**
15
 
 
9
  ---
10
 
11
  # LSG model
12
+ **Transformers >= 4.36.1**\
13
  **This model relies on a custom modeling file, you need to add trust_remote_code=True**\
14
  **See [\#13467](https://github.com/huggingface/transformers/pull/13467)**
15
 
modeling_lsg_mbart.py CHANGED
@@ -816,17 +816,19 @@ class LSGMBartEncoder(LSGMBartPretrainedModel, MBartEncoder):
816
  if input_ids is not None and inputs_embeds is not None:
817
  raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
818
  elif input_ids is not None:
819
- input_shape = input_ids.size()
 
820
  input_ids = input_ids.view(-1, input_shape[-1])
821
  elif inputs_embeds is not None:
822
- input_shape = inputs_embeds.size()[:-1]
823
  else:
824
  raise ValueError("You have to specify either input_ids or inputs_embeds")
825
 
826
  if inputs_embeds is None:
827
  inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale
828
 
829
- embed_pos = self.embed_positions(inputs_embeds)
 
830
  hidden_states = inputs_embeds + embed_pos
831
 
832
  # Add global tokens
@@ -922,6 +924,12 @@ class LSGMBartModel(LSGMBartPretrainedModel, MBartModel):
922
  self.encoder = LSGMBartEncoder(config, self.shared)
923
  self.decoder = MBartDecoder(config, self.shared)
924
 
 
 
 
 
 
 
925
  # Initialize weights and apply final processing
926
  self.post_init()
927
 
@@ -1091,4 +1099,4 @@ try:
1091
  str_to_class(value.split(".")[-1]).register_for_auto_class(key)
1092
  except:
1093
  warn("AutoRegister isn't available, you'll have to manually copy modeling.py after .save_pretrained(...).")
1094
- warn("Update to transformers >= 4.35.2 to fix.")
 
816
  if input_ids is not None and inputs_embeds is not None:
817
  raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
818
  elif input_ids is not None:
819
+ input = input_ids
820
+ input_shape = input.shape
821
  input_ids = input_ids.view(-1, input_shape[-1])
822
  elif inputs_embeds is not None:
823
+ input = inputs_embeds[:, :, -1]
824
  else:
825
  raise ValueError("You have to specify either input_ids or inputs_embeds")
826
 
827
  if inputs_embeds is None:
828
  inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale
829
 
830
+
831
+ embed_pos = self.embed_positions(input).to(inputs_embeds.device)
832
  hidden_states = inputs_embeds + embed_pos
833
 
834
  # Add global tokens
 
924
  self.encoder = LSGMBartEncoder(config, self.shared)
925
  self.decoder = MBartDecoder(config, self.shared)
926
 
927
+ self._use_flash_attention_2 = config._attn_implementation == "flash_attention_2"
928
+ if self._use_flash_attention_2:
929
+ logger.warning(
930
+ "[WARNING flash-attention]: LSG doesnt support flash-attention currently"
931
+ )
932
+
933
  # Initialize weights and apply final processing
934
  self.post_init()
935
 
 
1099
  str_to_class(value.split(".")[-1]).register_for_auto_class(key)
1100
  except:
1101
  warn("AutoRegister isn't available, you'll have to manually copy modeling.py after .save_pretrained(...).")
1102
+ warn("Update to transformers >= 4.36.1 to fix.")