training state at step 90
Browse files- trainer_state.json +78 -3
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 1,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1237,6 +1237,81 @@
|
|
1237 |
"eval_samples_per_second": 1.102,
|
1238 |
"eval_steps_per_second": 0.551,
|
1239 |
"step": 84
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1240 |
}
|
1241 |
],
|
1242 |
"logging_steps": 1,
|
@@ -1256,7 +1331,7 @@
|
|
1256 |
"attributes": {}
|
1257 |
}
|
1258 |
},
|
1259 |
-
"total_flos": 1.
|
1260 |
"train_batch_size": 2,
|
1261 |
"trial_name": null,
|
1262 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.2413793103448276,
|
5 |
"eval_steps": 1,
|
6 |
+
"global_step": 90,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1237 |
"eval_samples_per_second": 1.102,
|
1238 |
"eval_steps_per_second": 0.551,
|
1239 |
"step": 84
|
1240 |
+
},
|
1241 |
+
{
|
1242 |
+
"epoch": 1.1724137931034484,
|
1243 |
+
"grad_norm": 2.1747775077819824,
|
1244 |
+
"learning_rate": 4.9790502530660635e-05,
|
1245 |
+
"loss": 1.1778,
|
1246 |
+
"step": 85
|
1247 |
+
},
|
1248 |
+
{
|
1249 |
+
"epoch": 1.1724137931034484,
|
1250 |
+
"eval_loss": 0.6942981481552124,
|
1251 |
+
"eval_runtime": 18.8763,
|
1252 |
+
"eval_samples_per_second": 1.06,
|
1253 |
+
"eval_steps_per_second": 0.53,
|
1254 |
+
"step": 85
|
1255 |
+
},
|
1256 |
+
{
|
1257 |
+
"epoch": 1.186206896551724,
|
1258 |
+
"grad_norm": 2.152348041534424,
|
1259 |
+
"learning_rate": 4.9734953280908904e-05,
|
1260 |
+
"loss": 1.331,
|
1261 |
+
"step": 86
|
1262 |
+
},
|
1263 |
+
{
|
1264 |
+
"epoch": 1.186206896551724,
|
1265 |
+
"eval_loss": 0.6978840827941895,
|
1266 |
+
"eval_runtime": 18.349,
|
1267 |
+
"eval_samples_per_second": 1.09,
|
1268 |
+
"eval_steps_per_second": 0.545,
|
1269 |
+
"step": 86
|
1270 |
+
},
|
1271 |
+
{
|
1272 |
+
"epoch": 1.2,
|
1273 |
+
"grad_norm": 2.069314956665039,
|
1274 |
+
"learning_rate": 4.967291771834727e-05,
|
1275 |
+
"loss": 1.1638,
|
1276 |
+
"step": 87
|
1277 |
+
},
|
1278 |
+
{
|
1279 |
+
"epoch": 1.2,
|
1280 |
+
"eval_loss": 0.6983293294906616,
|
1281 |
+
"eval_runtime": 18.1961,
|
1282 |
+
"eval_samples_per_second": 1.099,
|
1283 |
+
"eval_steps_per_second": 0.55,
|
1284 |
+
"step": 87
|
1285 |
+
},
|
1286 |
+
{
|
1287 |
+
"epoch": 1.2137931034482758,
|
1288 |
+
"grad_norm": 2.037853717803955,
|
1289 |
+
"learning_rate": 4.960441211072686e-05,
|
1290 |
+
"loss": 1.1118,
|
1291 |
+
"step": 88
|
1292 |
+
},
|
1293 |
+
{
|
1294 |
+
"epoch": 1.2137931034482758,
|
1295 |
+
"eval_loss": 0.6962876915931702,
|
1296 |
+
"eval_runtime": 18.1105,
|
1297 |
+
"eval_samples_per_second": 1.104,
|
1298 |
+
"eval_steps_per_second": 0.552,
|
1299 |
+
"step": 88
|
1300 |
+
},
|
1301 |
+
{
|
1302 |
+
"epoch": 1.2275862068965517,
|
1303 |
+
"grad_norm": 1.9961076974868774,
|
1304 |
+
"learning_rate": 4.9529454422455976e-05,
|
1305 |
+
"loss": 1.0972,
|
1306 |
+
"step": 89
|
1307 |
+
},
|
1308 |
+
{
|
1309 |
+
"epoch": 1.2275862068965517,
|
1310 |
+
"eval_loss": 0.6896785497665405,
|
1311 |
+
"eval_runtime": 18.0553,
|
1312 |
+
"eval_samples_per_second": 1.108,
|
1313 |
+
"eval_steps_per_second": 0.554,
|
1314 |
+
"step": 89
|
1315 |
}
|
1316 |
],
|
1317 |
"logging_steps": 1,
|
|
|
1331 |
"attributes": {}
|
1332 |
}
|
1333 |
},
|
1334 |
+
"total_flos": 1.1395900243505971e+17,
|
1335 |
"train_batch_size": 2,
|
1336 |
"trial_name": null,
|
1337 |
"trial_params": null
|