YL95 commited on
Commit
79acaaf
1 Parent(s): db92803

training state at step 95

Browse files
Files changed (1) hide show
  1. trainer_state.json +78 -3
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.2413793103448276,
5
  "eval_steps": 1,
6
- "global_step": 90,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1312,6 +1312,81 @@
1312
  "eval_samples_per_second": 1.108,
1313
  "eval_steps_per_second": 0.554,
1314
  "step": 89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1315
  }
1316
  ],
1317
  "logging_steps": 1,
@@ -1331,7 +1406,7 @@
1331
  "attributes": {}
1332
  }
1333
  },
1334
- "total_flos": 1.1395900243505971e+17,
1335
  "train_batch_size": 2,
1336
  "trial_name": null,
1337
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.3103448275862069,
5
  "eval_steps": 1,
6
+ "global_step": 95,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1312
  "eval_samples_per_second": 1.108,
1313
  "eval_steps_per_second": 0.554,
1314
  "step": 89
1315
+ },
1316
+ {
1317
+ "epoch": 1.2413793103448276,
1318
+ "grad_norm": 2.28176212310791,
1319
+ "learning_rate": 4.944806430988927e-05,
1320
+ "loss": 1.3304,
1321
+ "step": 90
1322
+ },
1323
+ {
1324
+ "epoch": 1.2413793103448276,
1325
+ "eval_loss": 0.6826642751693726,
1326
+ "eval_runtime": 18.5055,
1327
+ "eval_samples_per_second": 1.081,
1328
+ "eval_steps_per_second": 0.54,
1329
+ "step": 90
1330
+ },
1331
+ {
1332
+ "epoch": 1.2551724137931035,
1333
+ "grad_norm": 1.894646406173706,
1334
+ "learning_rate": 4.936026311617316e-05,
1335
+ "loss": 1.0935,
1336
+ "step": 91
1337
+ },
1338
+ {
1339
+ "epoch": 1.2551724137931035,
1340
+ "eval_loss": 0.678307831287384,
1341
+ "eval_runtime": 18.3532,
1342
+ "eval_samples_per_second": 1.09,
1343
+ "eval_steps_per_second": 0.545,
1344
+ "step": 91
1345
+ },
1346
+ {
1347
+ "epoch": 1.2689655172413792,
1348
+ "grad_norm": 2.0475075244903564,
1349
+ "learning_rate": 4.926607386564898e-05,
1350
+ "loss": 1.2393,
1351
+ "step": 92
1352
+ },
1353
+ {
1354
+ "epoch": 1.2689655172413792,
1355
+ "eval_loss": 0.6765857934951782,
1356
+ "eval_runtime": 18.2689,
1357
+ "eval_samples_per_second": 1.095,
1358
+ "eval_steps_per_second": 0.547,
1359
+ "step": 92
1360
+ },
1361
+ {
1362
+ "epoch": 1.282758620689655,
1363
+ "grad_norm": 2.140949249267578,
1364
+ "learning_rate": 4.916552125781528e-05,
1365
+ "loss": 1.0277,
1366
+ "step": 93
1367
+ },
1368
+ {
1369
+ "epoch": 1.282758620689655,
1370
+ "eval_loss": 0.6735562682151794,
1371
+ "eval_runtime": 18.1407,
1372
+ "eval_samples_per_second": 1.102,
1373
+ "eval_steps_per_second": 0.551,
1374
+ "step": 93
1375
+ },
1376
+ {
1377
+ "epoch": 1.296551724137931,
1378
+ "grad_norm": 2.235147476196289,
1379
+ "learning_rate": 4.9058631660850765e-05,
1380
+ "loss": 1.2081,
1381
+ "step": 94
1382
+ },
1383
+ {
1384
+ "epoch": 1.296551724137931,
1385
+ "eval_loss": 0.6619122624397278,
1386
+ "eval_runtime": 18.2145,
1387
+ "eval_samples_per_second": 1.098,
1388
+ "eval_steps_per_second": 0.549,
1389
+ "step": 94
1390
  }
1391
  ],
1392
  "logging_steps": 1,
 
1406
  "attributes": {}
1407
  }
1408
  },
1409
+ "total_flos": 1.2071987453003366e+17,
1410
  "train_batch_size": 2,
1411
  "trial_name": null,
1412
  "trial_params": null