damerajee commited on
Commit
7c59a8a
·
verified ·
1 Parent(s): ee7740f

Update modeling_Llamoe.py

Browse files
Files changed (1) hide show
  1. modeling_Llamoe.py +2 -0
modeling_Llamoe.py CHANGED
@@ -589,9 +589,11 @@ class LlamoeSdpaAttention(LlamoeAttention):
589
  print("after_rb_value_states:",value_states)
590
 
591
  causal_mask = attention_mask
 
592
  if attention_mask is not None and cache_position is not None:
593
  causal_mask = causal_mask[:, :, cache_position, : key_states.shape[-2]]
594
 
 
595
  # SDPA with memory-efficient backend is currently (torch==2.1.2) bugged with non-contiguous inputs with custom attn_mask,
596
  # Reference: https://github.com/pytorch/pytorch/issues/112577.
597
  if query_states.device.type == "cuda" and causal_mask is not None:
 
589
  print("after_rb_value_states:",value_states)
590
 
591
  causal_mask = attention_mask
592
+ print("causal_mask:",causal_mask)
593
  if attention_mask is not None and cache_position is not None:
594
  causal_mask = causal_mask[:, :, cache_position, : key_states.shape[-2]]
595
 
596
+ print("after_causal_masks:",causal_mask)
597
  # SDPA with memory-efficient backend is currently (torch==2.1.2) bugged with non-contiguous inputs with custom attn_mask,
598
  # Reference: https://github.com/pytorch/pytorch/issues/112577.
599
  if query_states.device.type == "cuda" and causal_mask is not None: