suayptalha
/

minGRU-LM

Text Generation

Model card Files Files and versions Community

suayptalha commited on Dec 21, 2024

Commit

783bbd7

·

verified ·

1 Parent(s): 8042f2b

Update modeling_minGRULM.py

Files changed (1) hide show

modeling_minGRULM.py +21 -6

modeling_minGRULM.py CHANGED Viewed

@@ -13,7 +13,7 @@ class MinGRULMWrapped(nn.Module):
     def __init__(self, min_gru_model):
         super().__init__()
         self.min_gru_model = min_gru_model
-        self.device = torch.device("cuda")  # Default device
     def forward(self, *args, **kwargs):
         # Move input tensors to the correct device
@@ -45,9 +45,11 @@ class MinGRULMPreTrainedModel(PreTrainedModel):
         elif isinstance(module, nn.LayerNorm):
             module.bias.data.zero_()
             module.weight.data.fill_(1.0)
-class MinGRULMForCausalLM(MinGRULMPreTrainedModel):
     def __init__(self, config: MinGRULMConfig):
         super().__init__(config)
@@ -68,9 +70,8 @@ class MinGRULMForCausalLM(MinGRULMPreTrainedModel):
         self.post_init()
     def post_init(self):
         super().post_init()
-        # Ensure tied weights
         self.tie_weights()
     def tie_weights(self):
@@ -116,4 +117,18 @@ class MinGRULMForCausalLM(MinGRULMPreTrainedModel):
         return CausalLMOutputWithPast(
             loss=loss,
             logits=logits,
-        )

     def __init__(self, min_gru_model):
         super().__init__()
         self.min_gru_model = min_gru_model
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     def forward(self, *args, **kwargs):
         # Move input tensors to the correct device
         elif isinstance(module, nn.LayerNorm):
             module.bias.data.zero_()
             module.weight.data.fill_(1.0)
+class MinGRULMForCausalLM(PreTrainedModel):
+    config_class = MinGRULMConfig
+    base_model_prefix = "model"
     def __init__(self, config: MinGRULMConfig):
         super().__init__(config)
         self.post_init()
     def post_init(self):
+        # Ensure tied weights and any additional setup
         super().post_init()
         self.tie_weights()
     def tie_weights(self):
         return CausalLMOutputWithPast(
             loss=loss,
             logits=logits,
+        )
+    @classmethod
+    def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
+        """
+        Load model from a pretrained checkpoint.
+        """
+        model = super().from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs)
+        return model
+    def save_pretrained(self, save_directory):
+        """
+        Save the model and configuration to a directory.
+        """
+        super().save_pretrained(save_directory)