Text Generation
Transformers
PyTorch
Safetensors
longllama
text-generation-inference
custom_code
Szymon Tworkowski commited on
Commit
1a5fcb5
1 Parent(s): b65129a

remove warning

Browse files
Files changed (1) hide show
  1. modeling_longllama.py +0 -3
modeling_longllama.py CHANGED
@@ -1027,9 +1027,6 @@ def _handle_long_input(
1027
  attn_length += past_key_values[0][0].shape[-2]
1028
  attention_mask = attention_mask[..., -attn_length:] if attention_mask is not None else None
1029
 
1030
- if past_key_values is not None and past_key_values[0][0].shape[-2] + remaining_input_length > context_window_length:
1031
- logger.warning("Currently, the code is not optimized for generating long outputs. "
1032
- "You see this warning as parts of the local (generation) cache are going to be moved to the memory cache.")
1033
  outputs = model(
1034
  input_ids=input_ids[..., beg:] if input_ids is not None else None,
1035
  attention_mask=attention_mask,
 
1027
  attn_length += past_key_values[0][0].shape[-2]
1028
  attention_mask = attention_mask[..., -attn_length:] if attention_mask is not None else None
1029
 
 
 
 
1030
  outputs = model(
1031
  input_ids=input_ids[..., beg:] if input_ids is not None else None,
1032
  attention_mask=attention_mask,