YL95 commited on
Commit
09d62e2
1 Parent(s): 68bd8b3

training state at step 105

Browse files
Files changed (1) hide show
  1. trainer_state.json +63 -3
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.3793103448275863,
5
  "eval_steps": 1,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1462,6 +1462,66 @@
1462
  "eval_samples_per_second": 1.101,
1463
  "eval_steps_per_second": 0.55,
1464
  "step": 99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1465
  }
1466
  ],
1467
  "logging_steps": 1,
@@ -1481,7 +1541,7 @@
1481
  "attributes": {}
1482
  }
1483
  },
1484
- "total_flos": 1.2709753377329971e+17,
1485
  "train_batch_size": 2,
1486
  "trial_name": null,
1487
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.4551724137931035,
5
  "eval_steps": 1,
6
+ "global_step": 105,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1462
  "eval_samples_per_second": 1.101,
1463
  "eval_steps_per_second": 0.55,
1464
  "step": 99
1465
+ },
1466
+ {
1467
+ "epoch": 1.4,
1468
+ "grad_norm": 2.0707523822784424,
1469
+ "learning_rate": 2.2727272727272728e-06,
1470
+ "loss": 1.2557,
1471
+ "step": 101
1472
+ },
1473
+ {
1474
+ "epoch": 1.4,
1475
+ "eval_loss": 0.6193124055862427,
1476
+ "eval_runtime": 15.3335,
1477
+ "eval_samples_per_second": 1.304,
1478
+ "eval_steps_per_second": 0.652,
1479
+ "step": 101
1480
+ },
1481
+ {
1482
+ "epoch": 1.4137931034482758,
1483
+ "grad_norm": 2.099379539489746,
1484
+ "learning_rate": 4.5454545454545455e-06,
1485
+ "loss": 1.1953,
1486
+ "step": 102
1487
+ },
1488
+ {
1489
+ "epoch": 1.4137931034482758,
1490
+ "eval_loss": 0.6185603141784668,
1491
+ "eval_runtime": 15.3458,
1492
+ "eval_samples_per_second": 1.303,
1493
+ "eval_steps_per_second": 0.652,
1494
+ "step": 102
1495
+ },
1496
+ {
1497
+ "epoch": 1.4275862068965517,
1498
+ "grad_norm": 1.9917728900909424,
1499
+ "learning_rate": 6.818181818181818e-06,
1500
+ "loss": 1.2443,
1501
+ "step": 103
1502
+ },
1503
+ {
1504
+ "epoch": 1.4275862068965517,
1505
+ "eval_loss": 0.6176949739456177,
1506
+ "eval_runtime": 15.2437,
1507
+ "eval_samples_per_second": 1.312,
1508
+ "eval_steps_per_second": 0.656,
1509
+ "step": 103
1510
+ },
1511
+ {
1512
+ "epoch": 1.4413793103448276,
1513
+ "grad_norm": 1.8723604679107666,
1514
+ "learning_rate": 9.090909090909091e-06,
1515
+ "loss": 1.0864,
1516
+ "step": 104
1517
+ },
1518
+ {
1519
+ "epoch": 1.4413793103448276,
1520
+ "eval_loss": 0.6164005994796753,
1521
+ "eval_runtime": 15.2753,
1522
+ "eval_samples_per_second": 1.309,
1523
+ "eval_steps_per_second": 0.655,
1524
+ "step": 104
1525
  }
1526
  ],
1527
  "logging_steps": 1,
 
1541
  "attributes": {}
1542
  }
1543
  },
1544
+ "total_flos": 1.325273987039232e+17,
1545
  "train_batch_size": 2,
1546
  "trial_name": null,
1547
  "trial_params": null