YL95 commited on
Commit
d3c5397
1 Parent(s): ddc96ba

training state at step 75

Browse files
Files changed (1) hide show
  1. trainer_state.json +78 -3
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9655172413793104,
5
  "eval_steps": 1,
6
- "global_step": 70,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1012,6 +1012,81 @@
1012
  "eval_samples_per_second": 1.098,
1013
  "eval_steps_per_second": 0.549,
1014
  "step": 69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1015
  }
1016
  ],
1017
  "logging_steps": 1,
@@ -1031,7 +1106,7 @@
1031
  "attributes": {}
1032
  }
1033
  },
1034
- "total_flos": 8.799498944033587e+16,
1035
  "train_batch_size": 2,
1036
  "trial_name": null,
1037
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0344827586206897,
5
  "eval_steps": 1,
6
+ "global_step": 75,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1012
  "eval_samples_per_second": 1.098,
1013
  "eval_steps_per_second": 0.549,
1014
  "step": 69
1015
+ },
1016
+ {
1017
+ "epoch": 0.9655172413793104,
1018
+ "grad_norm": 2.1164538860321045,
1019
+ "learning_rate": 3.409090909090909e-05,
1020
+ "loss": 1.3873,
1021
+ "step": 70
1022
+ },
1023
+ {
1024
+ "epoch": 0.9655172413793104,
1025
+ "eval_loss": 0.7753366231918335,
1026
+ "eval_runtime": 18.5302,
1027
+ "eval_samples_per_second": 1.079,
1028
+ "eval_steps_per_second": 0.54,
1029
+ "step": 70
1030
+ },
1031
+ {
1032
+ "epoch": 0.9793103448275862,
1033
+ "grad_norm": 2.032721996307373,
1034
+ "learning_rate": 3.6363636363636364e-05,
1035
+ "loss": 1.232,
1036
+ "step": 71
1037
+ },
1038
+ {
1039
+ "epoch": 0.9793103448275862,
1040
+ "eval_loss": 0.7628229856491089,
1041
+ "eval_runtime": 18.4062,
1042
+ "eval_samples_per_second": 1.087,
1043
+ "eval_steps_per_second": 0.543,
1044
+ "step": 71
1045
+ },
1046
+ {
1047
+ "epoch": 0.993103448275862,
1048
+ "grad_norm": 2.1039462089538574,
1049
+ "learning_rate": 3.8636363636363636e-05,
1050
+ "loss": 1.2715,
1051
+ "step": 72
1052
+ },
1053
+ {
1054
+ "epoch": 0.993103448275862,
1055
+ "eval_loss": 0.751362681388855,
1056
+ "eval_runtime": 18.2628,
1057
+ "eval_samples_per_second": 1.095,
1058
+ "eval_steps_per_second": 0.548,
1059
+ "step": 72
1060
+ },
1061
+ {
1062
+ "epoch": 1.006896551724138,
1063
+ "grad_norm": 2.1415343284606934,
1064
+ "learning_rate": 4.0909090909090915e-05,
1065
+ "loss": 1.3012,
1066
+ "step": 73
1067
+ },
1068
+ {
1069
+ "epoch": 1.006896551724138,
1070
+ "eval_loss": 0.7407116293907166,
1071
+ "eval_runtime": 18.1993,
1072
+ "eval_samples_per_second": 1.099,
1073
+ "eval_steps_per_second": 0.549,
1074
+ "step": 73
1075
+ },
1076
+ {
1077
+ "epoch": 1.0206896551724138,
1078
+ "grad_norm": 1.9539107084274292,
1079
+ "learning_rate": 4.318181818181819e-05,
1080
+ "loss": 1.1411,
1081
+ "step": 74
1082
+ },
1083
+ {
1084
+ "epoch": 1.0206896551724138,
1085
+ "eval_loss": 0.7367935180664062,
1086
+ "eval_runtime": 18.2237,
1087
+ "eval_samples_per_second": 1.097,
1088
+ "eval_steps_per_second": 0.549,
1089
+ "step": 74
1090
  }
1091
  ],
1092
  "logging_steps": 1,
 
1106
  "attributes": {}
1107
  }
1108
  },
1109
+ "total_flos": 9.437537876803584e+16,
1110
  "train_batch_size": 2,
1111
  "trial_name": null,
1112
  "trial_params": null