THUDM
/

chatglm2-6b-int4

Inference Endpoints

Model card Files Files and versions Community

zRzRzRzRzRzRzR commited on 26 days ago

Commit

548388c

·

verified ·

1 Parent(s): d93643f

support transformers 4.47

Files changed (1) hide show

modeling_chatglm.py +2 -4

modeling_chatglm.py CHANGED Viewed

@@ -865,12 +865,10 @@ class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel):
             outputs: ModelOutput,
             model_kwargs: Dict[str, Any],
             is_encoder_decoder: bool = False,
-            standardize_cache_format: bool = False,
     ) -> Dict[str, Any]:
         # update past_key_values
-        model_kwargs["past_key_values"] = self._extract_past_from_model_output(
-            outputs, standardize_cache_format=standardize_cache_format
-        )
         # update attention mask
         if "attention_mask" in model_kwargs:

             outputs: ModelOutput,
             model_kwargs: Dict[str, Any],
             is_encoder_decoder: bool = False,
     ) -> Dict[str, Any]:
         # update past_key_values
+        cache_name, cache = self._extract_past_from_model_output(outputs)
+        model_kwargs[cache_name] = cache
         # update attention mask
         if "attention_mask" in model_kwargs: