suayptalha
/

minGRU-LM

@@ -28,6 +28,7 @@ class MinGRULMWrapped(nn.Module):
         return self
 class MinGRULMPreTrainedModel(PreTrainedModel):
     config_class = MinGRULMConfig
     base_model_prefix = "model"
@@ -45,6 +46,7 @@ class MinGRULMPreTrainedModel(PreTrainedModel):
         elif isinstance(module, nn.LayerNorm):
             module.bias.data.zero_()
             module.weight.data.fill_(1.0)
 class MinGRULMForCausalLM(PreTrainedModel):
     config_class = MinGRULMConfig
@@ -119,6 +121,25 @@ class MinGRULMForCausalLM(PreTrainedModel):
             logits=logits,
         )
     @classmethod
     def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
         """
@@ -137,15 +158,18 @@ class MinGRULMForCausalLM(PreTrainedModel):
         """
         # Create the save directory if it doesn't exist
         os.makedirs(save_directory, exist_ok=True)
-        # Save the model's state_dict (model weights)
-        torch.save(self.state_dict(), os.path.join(save_directory, "pytorch_model.bin"))
-        # Save the configuration
-        self.config.save_pretrained(save_directory)
-        # Optionally print messages based on the safe_serialization flag
         if safe_serialization:
-            print("Model and configuration have been saved safely.")
         else:
-            print("Model and configuration have been saved (unsafe serialization).")

         return self
 class MinGRULMPreTrainedModel(PreTrainedModel):
     config_class = MinGRULMConfig
     base_model_prefix = "model"
         elif isinstance(module, nn.LayerNorm):
             module.bias.data.zero_()
             module.weight.data.fill_(1.0)
 class MinGRULMForCausalLM(PreTrainedModel):
     config_class = MinGRULMConfig
             logits=logits,
         )
+    def state_dict(self):
+        """
+        Custom state_dict function to return the model's state dict.
+        This includes the wrapped model and any extra components like the language model head.
+        """
+        state_dict = {}
+        # Add min_gru_model's state_dict
+        state_dict['model'] = self.model.min_gru_model.state_dict()
+        # Add lm_head's state_dict
+        state_dict['lm_head'] = self.lm_head.state_dict()
+        # Optionally, add config if needed
+        state_dict['config'] = self.config.state_dict()
+        return state_dict
     @classmethod
     def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
         """
         """
         # Create the save directory if it doesn't exist
         os.makedirs(save_directory, exist_ok=True)
+        # Check if safe_serialization is enabled
         if safe_serialization:
+            print("Saving with safe serialization.")
+            # Save the model's state_dict (model weights)
+            state_dict = self.state_dict()
+            torch.save(state_dict, os.path.join(save_directory, "pytorch_model.bin"))
+            # Save the configuration
+            self.config.save_pretrained(save_directory)
         else:
+            print("Saving without safe serialization.")
+            # If not safe_serialization, use the default save mechanism from the base class
+            super().save_pretrained(save_directory)