YL95 commited on
Commit
bafeaa8
1 Parent(s): 8d9e30d

training state at step 60

Browse files
Files changed (1) hide show
  1. trainer_state.json +63 -3
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7586206896551724,
5
  "eval_steps": 1,
6
- "global_step": 55,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -802,6 +802,66 @@
802
  "eval_samples_per_second": 1.268,
803
  "eval_steps_per_second": 0.634,
804
  "step": 54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
805
  }
806
  ],
807
  "logging_steps": 1,
@@ -821,7 +881,7 @@
821
  "attributes": {}
822
  }
823
  },
824
- "total_flos": 6.947646271379866e+16,
825
  "train_batch_size": 2,
826
  "trial_name": null,
827
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.8275862068965517,
5
  "eval_steps": 1,
6
+ "global_step": 60,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
802
  "eval_samples_per_second": 1.268,
803
  "eval_steps_per_second": 0.634,
804
  "step": 54
805
+ },
806
+ {
807
+ "epoch": 0.7724137931034483,
808
+ "grad_norm": 2.1803667545318604,
809
+ "learning_rate": 2.2727272727272728e-06,
810
+ "loss": 1.4271,
811
+ "step": 56
812
+ },
813
+ {
814
+ "epoch": 0.7724137931034483,
815
+ "eval_loss": 0.8433731198310852,
816
+ "eval_runtime": 17.9885,
817
+ "eval_samples_per_second": 1.112,
818
+ "eval_steps_per_second": 0.556,
819
+ "step": 56
820
+ },
821
+ {
822
+ "epoch": 0.7862068965517242,
823
+ "grad_norm": 2.3162448406219482,
824
+ "learning_rate": 4.5454545454545455e-06,
825
+ "loss": 1.4689,
826
+ "step": 57
827
+ },
828
+ {
829
+ "epoch": 0.7862068965517242,
830
+ "eval_loss": 0.8418852090835571,
831
+ "eval_runtime": 18.2763,
832
+ "eval_samples_per_second": 1.094,
833
+ "eval_steps_per_second": 0.547,
834
+ "step": 57
835
+ },
836
+ {
837
+ "epoch": 0.8,
838
+ "grad_norm": 1.9732853174209595,
839
+ "learning_rate": 6.818181818181818e-06,
840
+ "loss": 1.2825,
841
+ "step": 58
842
+ },
843
+ {
844
+ "epoch": 0.8,
845
+ "eval_loss": 0.8386393785476685,
846
+ "eval_runtime": 18.1184,
847
+ "eval_samples_per_second": 1.104,
848
+ "eval_steps_per_second": 0.552,
849
+ "step": 58
850
+ },
851
+ {
852
+ "epoch": 0.8137931034482758,
853
+ "grad_norm": 2.0547423362731934,
854
+ "learning_rate": 9.090909090909091e-06,
855
+ "loss": 1.2972,
856
+ "step": 59
857
+ },
858
+ {
859
+ "epoch": 0.8137931034482758,
860
+ "eval_loss": 0.8355510830879211,
861
+ "eval_runtime": 18.2216,
862
+ "eval_samples_per_second": 1.098,
863
+ "eval_steps_per_second": 0.549,
864
+ "step": 59
865
  }
866
  ],
867
  "logging_steps": 1,
 
881
  "attributes": {}
882
  }
883
  },
884
+ "total_flos": 7.477855969291469e+16,
885
  "train_batch_size": 2,
886
  "trial_name": null,
887
  "trial_params": null