YL95 commited on
Commit
a6e6cd8
1 Parent(s): bc3efc8

training state at step 50

Browse files
Files changed (1) hide show
  1. trainer_state.json +78 -3
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.6206896551724138,
5
  "eval_steps": 1,
6
- "global_step": 45,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -652,6 +652,81 @@
652
  "eval_samples_per_second": 1.275,
653
  "eval_steps_per_second": 0.637,
654
  "step": 44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
655
  }
656
  ],
657
  "logging_steps": 1,
@@ -671,7 +746,7 @@
671
  "attributes": {}
672
  }
673
  },
674
- "total_flos": 5.643858048835584e+16,
675
  "train_batch_size": 2,
676
  "trial_name": null,
677
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.6896551724137931,
5
  "eval_steps": 1,
6
+ "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
652
  "eval_samples_per_second": 1.275,
653
  "eval_steps_per_second": 0.637,
654
  "step": 44
655
+ },
656
+ {
657
+ "epoch": 0.6206896551724138,
658
+ "grad_norm": 2.1096601486206055,
659
+ "learning_rate": 1.1363636363636365e-05,
660
+ "loss": 1.3981,
661
+ "step": 45
662
+ },
663
+ {
664
+ "epoch": 0.6206896551724138,
665
+ "eval_loss": 0.8982122540473938,
666
+ "eval_runtime": 15.707,
667
+ "eval_samples_per_second": 1.273,
668
+ "eval_steps_per_second": 0.637,
669
+ "step": 45
670
+ },
671
+ {
672
+ "epoch": 0.6344827586206897,
673
+ "grad_norm": 1.971846342086792,
674
+ "learning_rate": 1.3636363636363637e-05,
675
+ "loss": 1.263,
676
+ "step": 46
677
+ },
678
+ {
679
+ "epoch": 0.6344827586206897,
680
+ "eval_loss": 0.891434371471405,
681
+ "eval_runtime": 15.7993,
682
+ "eval_samples_per_second": 1.266,
683
+ "eval_steps_per_second": 0.633,
684
+ "step": 46
685
+ },
686
+ {
687
+ "epoch": 0.6482758620689655,
688
+ "grad_norm": 1.9724080562591553,
689
+ "learning_rate": 1.590909090909091e-05,
690
+ "loss": 1.355,
691
+ "step": 47
692
+ },
693
+ {
694
+ "epoch": 0.6482758620689655,
695
+ "eval_loss": 0.8870094418525696,
696
+ "eval_runtime": 15.6828,
697
+ "eval_samples_per_second": 1.275,
698
+ "eval_steps_per_second": 0.638,
699
+ "step": 47
700
+ },
701
+ {
702
+ "epoch": 0.6620689655172414,
703
+ "grad_norm": 2.0631349086761475,
704
+ "learning_rate": 1.8181818181818182e-05,
705
+ "loss": 1.3375,
706
+ "step": 48
707
+ },
708
+ {
709
+ "epoch": 0.6620689655172414,
710
+ "eval_loss": 0.8790606260299683,
711
+ "eval_runtime": 15.6783,
712
+ "eval_samples_per_second": 1.276,
713
+ "eval_steps_per_second": 0.638,
714
+ "step": 48
715
+ },
716
+ {
717
+ "epoch": 0.6758620689655173,
718
+ "grad_norm": 2.1942760944366455,
719
+ "learning_rate": 2.0454545454545457e-05,
720
+ "loss": 1.3937,
721
+ "step": 49
722
+ },
723
+ {
724
+ "epoch": 0.6758620689655173,
725
+ "eval_loss": 0.8732376098632812,
726
+ "eval_runtime": 15.6854,
727
+ "eval_samples_per_second": 1.275,
728
+ "eval_steps_per_second": 0.638,
729
+ "step": 49
730
  }
731
  ],
732
  "logging_steps": 1,
 
746
  "attributes": {}
747
  }
748
  },
749
+ "total_flos": 6.308160393859891e+16,
750
  "train_batch_size": 2,
751
  "trial_name": null,
752
  "trial_params": null