YL95 commited on
Commit
1e73f0b
1 Parent(s): ab60d15

training state at step 65

Browse files
Files changed (1) hide show
  1. trainer_state.json +78 -3
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8275862068965517,
5
  "eval_steps": 1,
6
- "global_step": 60,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -862,6 +862,81 @@
862
  "eval_samples_per_second": 1.098,
863
  "eval_steps_per_second": 0.549,
864
  "step": 59
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
865
  }
866
  ],
867
  "logging_steps": 1,
@@ -881,7 +956,7 @@
881
  "attributes": {}
882
  }
883
  },
884
- "total_flos": 7.477855969291469e+16,
885
  "train_batch_size": 2,
886
  "trial_name": null,
887
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.896551724137931,
5
  "eval_steps": 1,
6
+ "global_step": 65,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
862
  "eval_samples_per_second": 1.098,
863
  "eval_steps_per_second": 0.549,
864
  "step": 59
865
+ },
866
+ {
867
+ "epoch": 0.8275862068965517,
868
+ "grad_norm": 2.0684103965759277,
869
+ "learning_rate": 1.1363636363636365e-05,
870
+ "loss": 1.3615,
871
+ "step": 60
872
+ },
873
+ {
874
+ "epoch": 0.8275862068965517,
875
+ "eval_loss": 0.8328086137771606,
876
+ "eval_runtime": 18.8073,
877
+ "eval_samples_per_second": 1.063,
878
+ "eval_steps_per_second": 0.532,
879
+ "step": 60
880
+ },
881
+ {
882
+ "epoch": 0.8413793103448276,
883
+ "grad_norm": 2.0212347507476807,
884
+ "learning_rate": 1.3636363636363637e-05,
885
+ "loss": 1.3648,
886
+ "step": 61
887
+ },
888
+ {
889
+ "epoch": 0.8413793103448276,
890
+ "eval_loss": 0.8308294415473938,
891
+ "eval_runtime": 18.376,
892
+ "eval_samples_per_second": 1.088,
893
+ "eval_steps_per_second": 0.544,
894
+ "step": 61
895
+ },
896
+ {
897
+ "epoch": 0.8551724137931035,
898
+ "grad_norm": 1.9967029094696045,
899
+ "learning_rate": 1.590909090909091e-05,
900
+ "loss": 1.4334,
901
+ "step": 62
902
+ },
903
+ {
904
+ "epoch": 0.8551724137931035,
905
+ "eval_loss": 0.8297985792160034,
906
+ "eval_runtime": 18.24,
907
+ "eval_samples_per_second": 1.096,
908
+ "eval_steps_per_second": 0.548,
909
+ "step": 62
910
+ },
911
+ {
912
+ "epoch": 0.8689655172413793,
913
+ "grad_norm": 1.956730842590332,
914
+ "learning_rate": 1.8181818181818182e-05,
915
+ "loss": 1.246,
916
+ "step": 63
917
+ },
918
+ {
919
+ "epoch": 0.8689655172413793,
920
+ "eval_loss": 0.8276138305664062,
921
+ "eval_runtime": 18.1099,
922
+ "eval_samples_per_second": 1.104,
923
+ "eval_steps_per_second": 0.552,
924
+ "step": 63
925
+ },
926
+ {
927
+ "epoch": 0.8827586206896552,
928
+ "grad_norm": 1.8840367794036865,
929
+ "learning_rate": 2.0454545454545457e-05,
930
+ "loss": 1.2346,
931
+ "step": 64
932
+ },
933
+ {
934
+ "epoch": 0.8827586206896552,
935
+ "eval_loss": 0.8268927335739136,
936
+ "eval_runtime": 18.2242,
937
+ "eval_samples_per_second": 1.097,
938
+ "eval_steps_per_second": 0.549,
939
+ "step": 64
940
  }
941
  ],
942
  "logging_steps": 1,
 
956
  "attributes": {}
957
  }
958
  },
959
+ "total_flos": 8.144087573982413e+16,
960
  "train_batch_size": 2,
961
  "trial_name": null,
962
  "trial_params": null