|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 100.0, |
|
"global_step": 400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2814, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 2e-05, |
|
"loss": 2.2162, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 3e-05, |
|
"loss": 2.06, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 4e-05, |
|
"loss": 1.7632, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3906, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 6e-05, |
|
"loss": 1.0958, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 17.5, |
|
"learning_rate": 7e-05, |
|
"loss": 0.8168, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 8e-05, |
|
"loss": 0.6212, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 22.5, |
|
"learning_rate": 9e-05, |
|
"loss": 0.5377, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4409, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 27.5, |
|
"learning_rate": 9.972609476841367e-05, |
|
"loss": 0.4009, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 9.890738003669029e-05, |
|
"loss": 0.3598, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 32.5, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 0.2784, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 9.567727288213005e-05, |
|
"loss": 0.3299, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 37.5, |
|
"learning_rate": 9.330127018922194e-05, |
|
"loss": 0.3236, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 0.2714, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 42.5, |
|
"learning_rate": 8.715724127386972e-05, |
|
"loss": 0.2654, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"learning_rate": 8.345653031794292e-05, |
|
"loss": 0.254, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 47.5, |
|
"learning_rate": 7.938926261462366e-05, |
|
"loss": 0.2223, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 0.2671, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 52.5, |
|
"learning_rate": 7.033683215379002e-05, |
|
"loss": 0.2293, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 0.2235, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 57.5, |
|
"learning_rate": 6.0395584540887963e-05, |
|
"loss": 0.2031, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 5.522642316338268e-05, |
|
"loss": 0.2034, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 62.5, |
|
"learning_rate": 5e-05, |
|
"loss": 0.189, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"learning_rate": 4.477357683661734e-05, |
|
"loss": 0.1788, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 67.5, |
|
"learning_rate": 3.960441545911204e-05, |
|
"loss": 0.1698, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 0.2309, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 72.5, |
|
"learning_rate": 2.9663167846209998e-05, |
|
"loss": 0.1637, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"learning_rate": 2.500000000000001e-05, |
|
"loss": 0.1669, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 77.5, |
|
"learning_rate": 2.061073738537635e-05, |
|
"loss": 0.1786, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 1.6543469682057106e-05, |
|
"loss": 0.1792, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 82.5, |
|
"learning_rate": 1.2842758726130283e-05, |
|
"loss": 0.1874, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 0.1805, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 87.5, |
|
"learning_rate": 6.698729810778065e-06, |
|
"loss": 0.196, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"learning_rate": 4.322727117869951e-06, |
|
"loss": 0.1615, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 92.5, |
|
"learning_rate": 2.4471741852423237e-06, |
|
"loss": 0.1481, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"learning_rate": 1.0926199633097157e-06, |
|
"loss": 0.1633, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 97.5, |
|
"learning_rate": 2.7390523158633554e-07, |
|
"loss": 0.1673, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.1748, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 400, |
|
"total_flos": 1.3814551337974377e+23, |
|
"train_loss": 0.4972930908203125, |
|
"train_runtime": 820.6819, |
|
"train_samples_per_second": 4242.326, |
|
"train_steps_per_second": 0.487 |
|
} |
|
], |
|
"max_steps": 400, |
|
"num_train_epochs": 100, |
|
"start_time": 1656513160.5917685, |
|
"total_flos": 1.3814551337974377e+23, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|