training state at step 85
Browse files- trainer_state.json +78 -3
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 1,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1162,6 +1162,81 @@
|
|
1162 |
"eval_samples_per_second": 1.107,
|
1163 |
"eval_steps_per_second": 0.554,
|
1164 |
"step": 79
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1165 |
}
|
1166 |
],
|
1167 |
"logging_steps": 1,
|
@@ -1181,7 +1256,7 @@
|
|
1181 |
"attributes": {}
|
1182 |
}
|
1183 |
},
|
1184 |
-
"total_flos": 1.
|
1185 |
"train_batch_size": 2,
|
1186 |
"trial_name": null,
|
1187 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.1724137931034484,
|
5 |
"eval_steps": 1,
|
6 |
+
"global_step": 85,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1162 |
"eval_samples_per_second": 1.107,
|
1163 |
"eval_steps_per_second": 0.554,
|
1164 |
"step": 79
|
1165 |
+
},
|
1166 |
+
{
|
1167 |
+
"epoch": 1.103448275862069,
|
1168 |
+
"grad_norm": 2.047912836074829,
|
1169 |
+
"learning_rate": 4.997050398198977e-05,
|
1170 |
+
"loss": 1.0507,
|
1171 |
+
"step": 80
|
1172 |
+
},
|
1173 |
+
{
|
1174 |
+
"epoch": 1.103448275862069,
|
1175 |
+
"eval_loss": 0.7239590883255005,
|
1176 |
+
"eval_runtime": 18.4343,
|
1177 |
+
"eval_samples_per_second": 1.085,
|
1178 |
+
"eval_steps_per_second": 0.542,
|
1179 |
+
"step": 80
|
1180 |
+
},
|
1181 |
+
{
|
1182 |
+
"epoch": 1.1172413793103448,
|
1183 |
+
"grad_norm": 2.004422664642334,
|
1184 |
+
"learning_rate": 4.9947570655942796e-05,
|
1185 |
+
"loss": 0.9516,
|
1186 |
+
"step": 81
|
1187 |
+
},
|
1188 |
+
{
|
1189 |
+
"epoch": 1.1172413793103448,
|
1190 |
+
"eval_loss": 0.7144821882247925,
|
1191 |
+
"eval_runtime": 18.3983,
|
1192 |
+
"eval_samples_per_second": 1.087,
|
1193 |
+
"eval_steps_per_second": 0.544,
|
1194 |
+
"step": 81
|
1195 |
+
},
|
1196 |
+
{
|
1197 |
+
"epoch": 1.1310344827586207,
|
1198 |
+
"grad_norm": 2.013328790664673,
|
1199 |
+
"learning_rate": 4.991809526186424e-05,
|
1200 |
+
"loss": 1.0593,
|
1201 |
+
"step": 82
|
1202 |
+
},
|
1203 |
+
{
|
1204 |
+
"epoch": 1.1310344827586207,
|
1205 |
+
"eval_loss": 0.7059406638145447,
|
1206 |
+
"eval_runtime": 18.2362,
|
1207 |
+
"eval_samples_per_second": 1.097,
|
1208 |
+
"eval_steps_per_second": 0.548,
|
1209 |
+
"step": 82
|
1210 |
+
},
|
1211 |
+
{
|
1212 |
+
"epoch": 1.1448275862068966,
|
1213 |
+
"grad_norm": 2.068134069442749,
|
1214 |
+
"learning_rate": 4.988208552916535e-05,
|
1215 |
+
"loss": 1.1188,
|
1216 |
+
"step": 83
|
1217 |
+
},
|
1218 |
+
{
|
1219 |
+
"epoch": 1.1448275862068966,
|
1220 |
+
"eval_loss": 0.7021835446357727,
|
1221 |
+
"eval_runtime": 18.1868,
|
1222 |
+
"eval_samples_per_second": 1.1,
|
1223 |
+
"eval_steps_per_second": 0.55,
|
1224 |
+
"step": 83
|
1225 |
+
},
|
1226 |
+
{
|
1227 |
+
"epoch": 1.1586206896551725,
|
1228 |
+
"grad_norm": 2.2628672122955322,
|
1229 |
+
"learning_rate": 4.983955090077444e-05,
|
1230 |
+
"loss": 1.1473,
|
1231 |
+
"step": 84
|
1232 |
+
},
|
1233 |
+
{
|
1234 |
+
"epoch": 1.1586206896551725,
|
1235 |
+
"eval_loss": 0.6942790150642395,
|
1236 |
+
"eval_runtime": 18.1494,
|
1237 |
+
"eval_samples_per_second": 1.102,
|
1238 |
+
"eval_steps_per_second": 0.551,
|
1239 |
+
"step": 84
|
1240 |
}
|
1241 |
],
|
1242 |
"logging_steps": 1,
|
|
|
1256 |
"attributes": {}
|
1257 |
}
|
1258 |
},
|
1259 |
+
"total_flos": 1.074128969822208e+17,
|
1260 |
"train_batch_size": 2,
|
1261 |
"trial_name": null,
|
1262 |
"trial_params": null
|