YL95 commited on
Commit
87325b1
1 Parent(s): ff9a1a8

training state at step 110

Browse files
Files changed (1) hide show
  1. trainer_state.json +78 -3
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.4551724137931035,
5
  "eval_steps": 1,
6
- "global_step": 105,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1522,6 +1522,81 @@
1522
  "eval_samples_per_second": 1.309,
1523
  "eval_steps_per_second": 0.655,
1524
  "step": 104
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1525
  }
1526
  ],
1527
  "logging_steps": 1,
@@ -1541,7 +1616,7 @@
1541
  "attributes": {}
1542
  }
1543
  },
1544
- "total_flos": 1.325273987039232e+17,
1545
  "train_batch_size": 2,
1546
  "trial_name": null,
1547
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.524137931034483,
5
  "eval_steps": 1,
6
+ "global_step": 110,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1522
  "eval_samples_per_second": 1.309,
1523
  "eval_steps_per_second": 0.655,
1524
  "step": 104
1525
+ },
1526
+ {
1527
+ "epoch": 1.4551724137931035,
1528
+ "grad_norm": 1.98838472366333,
1529
+ "learning_rate": 1.1363636363636365e-05,
1530
+ "loss": 1.2297,
1531
+ "step": 105
1532
+ },
1533
+ {
1534
+ "epoch": 1.4551724137931035,
1535
+ "eval_loss": 0.6155186891555786,
1536
+ "eval_runtime": 15.2373,
1537
+ "eval_samples_per_second": 1.313,
1538
+ "eval_steps_per_second": 0.656,
1539
+ "step": 105
1540
+ },
1541
+ {
1542
+ "epoch": 1.4689655172413794,
1543
+ "grad_norm": 1.8609561920166016,
1544
+ "learning_rate": 1.3636363636363637e-05,
1545
+ "loss": 0.9922,
1546
+ "step": 106
1547
+ },
1548
+ {
1549
+ "epoch": 1.4689655172413794,
1550
+ "eval_loss": 0.6143234372138977,
1551
+ "eval_runtime": 15.4977,
1552
+ "eval_samples_per_second": 1.291,
1553
+ "eval_steps_per_second": 0.645,
1554
+ "step": 106
1555
+ },
1556
+ {
1557
+ "epoch": 1.4827586206896552,
1558
+ "grad_norm": 2.010931968688965,
1559
+ "learning_rate": 1.590909090909091e-05,
1560
+ "loss": 1.1338,
1561
+ "step": 107
1562
+ },
1563
+ {
1564
+ "epoch": 1.4827586206896552,
1565
+ "eval_loss": 0.610894501209259,
1566
+ "eval_runtime": 15.4008,
1567
+ "eval_samples_per_second": 1.299,
1568
+ "eval_steps_per_second": 0.649,
1569
+ "step": 107
1570
+ },
1571
+ {
1572
+ "epoch": 1.4965517241379311,
1573
+ "grad_norm": 1.9721729755401611,
1574
+ "learning_rate": 1.8181818181818182e-05,
1575
+ "loss": 1.0459,
1576
+ "step": 108
1577
+ },
1578
+ {
1579
+ "epoch": 1.4965517241379311,
1580
+ "eval_loss": 0.606325089931488,
1581
+ "eval_runtime": 15.2962,
1582
+ "eval_samples_per_second": 1.308,
1583
+ "eval_steps_per_second": 0.654,
1584
+ "step": 108
1585
+ },
1586
+ {
1587
+ "epoch": 1.510344827586207,
1588
+ "grad_norm": 2.1253089904785156,
1589
+ "learning_rate": 2.0454545454545457e-05,
1590
+ "loss": 1.285,
1591
+ "step": 109
1592
+ },
1593
+ {
1594
+ "epoch": 1.510344827586207,
1595
+ "eval_loss": 0.6013532876968384,
1596
+ "eval_runtime": 15.3278,
1597
+ "eval_samples_per_second": 1.305,
1598
+ "eval_steps_per_second": 0.652,
1599
+ "step": 109
1600
  }
1601
  ],
1602
  "logging_steps": 1,
 
1616
  "attributes": {}
1617
  }
1618
  },
1619
+ "total_flos": 1.3916678204158771e+17,
1620
  "train_batch_size": 2,
1621
  "trial_name": null,
1622
  "trial_params": null