YL95 commited on
Commit
2f2c389
1 Parent(s): 9034b87

training state at step 55

Browse files
Files changed (1) hide show
  1. trainer_state.json +78 -3
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.6896551724137931,
5
  "eval_steps": 1,
6
- "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -727,6 +727,81 @@
727
  "eval_samples_per_second": 1.275,
728
  "eval_steps_per_second": 0.638,
729
  "step": 49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
730
  }
731
  ],
732
  "logging_steps": 1,
@@ -746,7 +821,7 @@
746
  "attributes": {}
747
  }
748
  },
749
- "total_flos": 6.308160393859891e+16,
750
  "train_batch_size": 2,
751
  "trial_name": null,
752
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7586206896551724,
5
  "eval_steps": 1,
6
+ "global_step": 55,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
727
  "eval_samples_per_second": 1.275,
728
  "eval_steps_per_second": 0.638,
729
  "step": 49
730
+ },
731
+ {
732
+ "epoch": 0.6896551724137931,
733
+ "grad_norm": 2.119081497192383,
734
+ "learning_rate": 2.272727272727273e-05,
735
+ "loss": 1.5447,
736
+ "step": 50
737
+ },
738
+ {
739
+ "epoch": 0.6896551724137931,
740
+ "eval_loss": 0.8692445755004883,
741
+ "eval_runtime": 15.6827,
742
+ "eval_samples_per_second": 1.275,
743
+ "eval_steps_per_second": 0.638,
744
+ "step": 50
745
+ },
746
+ {
747
+ "epoch": 0.7034482758620689,
748
+ "grad_norm": 1.9801068305969238,
749
+ "learning_rate": 2.5e-05,
750
+ "loss": 1.2777,
751
+ "step": 51
752
+ },
753
+ {
754
+ "epoch": 0.7034482758620689,
755
+ "eval_loss": 0.8668963313102722,
756
+ "eval_runtime": 15.7049,
757
+ "eval_samples_per_second": 1.273,
758
+ "eval_steps_per_second": 0.637,
759
+ "step": 51
760
+ },
761
+ {
762
+ "epoch": 0.7172413793103448,
763
+ "grad_norm": 2.0645248889923096,
764
+ "learning_rate": 2.7272727272727273e-05,
765
+ "loss": 1.3444,
766
+ "step": 52
767
+ },
768
+ {
769
+ "epoch": 0.7172413793103448,
770
+ "eval_loss": 0.8615155220031738,
771
+ "eval_runtime": 15.6899,
772
+ "eval_samples_per_second": 1.275,
773
+ "eval_steps_per_second": 0.637,
774
+ "step": 52
775
+ },
776
+ {
777
+ "epoch": 0.7310344827586207,
778
+ "grad_norm": 2.1377453804016113,
779
+ "learning_rate": 2.954545454545455e-05,
780
+ "loss": 1.4174,
781
+ "step": 53
782
+ },
783
+ {
784
+ "epoch": 0.7310344827586207,
785
+ "eval_loss": 0.8575263023376465,
786
+ "eval_runtime": 15.6427,
787
+ "eval_samples_per_second": 1.279,
788
+ "eval_steps_per_second": 0.639,
789
+ "step": 53
790
+ },
791
+ {
792
+ "epoch": 0.7448275862068966,
793
+ "grad_norm": 2.1462454795837402,
794
+ "learning_rate": 3.181818181818182e-05,
795
+ "loss": 1.429,
796
+ "step": 54
797
+ },
798
+ {
799
+ "epoch": 0.7448275862068966,
800
+ "eval_loss": 0.8533774614334106,
801
+ "eval_runtime": 15.7668,
802
+ "eval_samples_per_second": 1.268,
803
+ "eval_steps_per_second": 0.634,
804
+ "step": 54
805
  }
806
  ],
807
  "logging_steps": 1,
 
821
  "attributes": {}
822
  }
823
  },
824
+ "total_flos": 6.947646271379866e+16,
825
  "train_batch_size": 2,
826
  "trial_name": null,
827
  "trial_params": null