suayptalha
/

minGRU-LM

Text Generation

Model card Files Files and versions Community

suayptalha commited on 23 days ago

Commit

498e304

·

verified ·

1 Parent(s): 2f89d54

Update modeling_minGRULM.py

Files changed (1) hide show

modeling_minGRULM.py +12 -1

modeling_minGRULM.py CHANGED Viewed

@@ -93,7 +93,7 @@ class MinGRULMForCausalLM(PreTrainedModel):
         # Ensure that inputs for generation are properly handled
         return {"input_ids": input_ids, "attention_mask": kwargs.get("attention_mask", None)}
-    def forward(
         self,
         input_ids: torch.LongTensor,
         labels: Optional[torch.LongTensor] = None,
@@ -103,6 +103,11 @@ class MinGRULMForCausalLM(PreTrainedModel):
         # Forward pass through the wrapped model
         logits = self.model(input_ids)
         loss = None
         if labels is not None:
             shift_logits = logits[..., :-1, :].contiguous()
@@ -113,6 +118,11 @@ class MinGRULMForCausalLM(PreTrainedModel):
                 shift_labels.view(-1),
             )
         if not return_dict:
             return (loss, logits) if loss is not None else (logits,)
@@ -121,6 +131,7 @@ class MinGRULMForCausalLM(PreTrainedModel):
             logits=logits,
         )
     @classmethod
     def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
         """

         # Ensure that inputs for generation are properly handled
         return {"input_ids": input_ids, "attention_mask": kwargs.get("attention_mask", None)}
+    ddef forward(
         self,
         input_ids: torch.LongTensor,
         labels: Optional[torch.LongTensor] = None,
         # Forward pass through the wrapped model
         logits = self.model(input_ids)
+        # NaN kontrolü
+        if torch.isnan(logits).any():
+            print("NaN detected in logits! Replacing with zeros.")
+            logits = torch.nan_to_num(logits, nan=0.0)  # NaN'ları sıfırla değiştir
         loss = None
         if labels is not None:
             shift_logits = logits[..., :-1, :].contiguous()
                 shift_labels.view(-1),
             )
+            # NaN kontrolü için loss üzerinde de aynı işlemi uygulayın
+            if torch.isnan(loss).any():
+                print("NaN detected in loss! Setting loss to 0.")
+                loss = torch.tensor(0.0, device=loss.device)  # NaN olan loss'u sıfırla değiştir
         if not return_dict:
             return (loss, logits) if loss is not None else (logits,)
             logits=logits,
         )
     @classmethod
     def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
         """