YL95 commited on
Commit
e831c16
1 Parent(s): f44038a

training state at step 90

Browse files
Files changed (1) hide show
  1. trainer_state.json +78 -3
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.1724137931034484,
5
  "eval_steps": 1,
6
- "global_step": 85,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1237,6 +1237,81 @@
1237
  "eval_samples_per_second": 1.102,
1238
  "eval_steps_per_second": 0.551,
1239
  "step": 84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1240
  }
1241
  ],
1242
  "logging_steps": 1,
@@ -1256,7 +1331,7 @@
1256
  "attributes": {}
1257
  }
1258
  },
1259
- "total_flos": 1.074128969822208e+17,
1260
  "train_batch_size": 2,
1261
  "trial_name": null,
1262
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.2413793103448276,
5
  "eval_steps": 1,
6
+ "global_step": 90,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1237
  "eval_samples_per_second": 1.102,
1238
  "eval_steps_per_second": 0.551,
1239
  "step": 84
1240
+ },
1241
+ {
1242
+ "epoch": 1.1724137931034484,
1243
+ "grad_norm": 2.1747775077819824,
1244
+ "learning_rate": 4.9790502530660635e-05,
1245
+ "loss": 1.1778,
1246
+ "step": 85
1247
+ },
1248
+ {
1249
+ "epoch": 1.1724137931034484,
1250
+ "eval_loss": 0.6942981481552124,
1251
+ "eval_runtime": 18.8763,
1252
+ "eval_samples_per_second": 1.06,
1253
+ "eval_steps_per_second": 0.53,
1254
+ "step": 85
1255
+ },
1256
+ {
1257
+ "epoch": 1.186206896551724,
1258
+ "grad_norm": 2.152348041534424,
1259
+ "learning_rate": 4.9734953280908904e-05,
1260
+ "loss": 1.331,
1261
+ "step": 86
1262
+ },
1263
+ {
1264
+ "epoch": 1.186206896551724,
1265
+ "eval_loss": 0.6978840827941895,
1266
+ "eval_runtime": 18.349,
1267
+ "eval_samples_per_second": 1.09,
1268
+ "eval_steps_per_second": 0.545,
1269
+ "step": 86
1270
+ },
1271
+ {
1272
+ "epoch": 1.2,
1273
+ "grad_norm": 2.069314956665039,
1274
+ "learning_rate": 4.967291771834727e-05,
1275
+ "loss": 1.1638,
1276
+ "step": 87
1277
+ },
1278
+ {
1279
+ "epoch": 1.2,
1280
+ "eval_loss": 0.6983293294906616,
1281
+ "eval_runtime": 18.1961,
1282
+ "eval_samples_per_second": 1.099,
1283
+ "eval_steps_per_second": 0.55,
1284
+ "step": 87
1285
+ },
1286
+ {
1287
+ "epoch": 1.2137931034482758,
1288
+ "grad_norm": 2.037853717803955,
1289
+ "learning_rate": 4.960441211072686e-05,
1290
+ "loss": 1.1118,
1291
+ "step": 88
1292
+ },
1293
+ {
1294
+ "epoch": 1.2137931034482758,
1295
+ "eval_loss": 0.6962876915931702,
1296
+ "eval_runtime": 18.1105,
1297
+ "eval_samples_per_second": 1.104,
1298
+ "eval_steps_per_second": 0.552,
1299
+ "step": 88
1300
+ },
1301
+ {
1302
+ "epoch": 1.2275862068965517,
1303
+ "grad_norm": 1.9961076974868774,
1304
+ "learning_rate": 4.9529454422455976e-05,
1305
+ "loss": 1.0972,
1306
+ "step": 89
1307
+ },
1308
+ {
1309
+ "epoch": 1.2275862068965517,
1310
+ "eval_loss": 0.6896785497665405,
1311
+ "eval_runtime": 18.0553,
1312
+ "eval_samples_per_second": 1.108,
1313
+ "eval_steps_per_second": 0.554,
1314
+ "step": 89
1315
  }
1316
  ],
1317
  "logging_steps": 1,
 
1331
  "attributes": {}
1332
  }
1333
  },
1334
+ "total_flos": 1.1395900243505971e+17,
1335
  "train_batch_size": 2,
1336
  "trial_name": null,
1337
  "trial_params": null