|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 19.99970457902511, |
|
"global_step": 16920, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00029775, |
|
"loss": 3.3587, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 1.1883399486541748, |
|
"eval_runtime": 298.8414, |
|
"eval_samples_per_second": 16.986, |
|
"eval_steps_per_second": 2.831, |
|
"eval_wer": 0.8392195865162833, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00029279055690072635, |
|
"loss": 1.8377, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 0.8830727338790894, |
|
"eval_runtime": 295.1554, |
|
"eval_samples_per_second": 17.198, |
|
"eval_steps_per_second": 2.866, |
|
"eval_wer": 0.6852318175085172, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.00028552663438256653, |
|
"loss": 1.7118, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_loss": 0.8031275868415833, |
|
"eval_runtime": 296.1538, |
|
"eval_samples_per_second": 17.14, |
|
"eval_steps_per_second": 2.857, |
|
"eval_wer": 0.6565588166832427, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00027826271186440676, |
|
"loss": 1.6741, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 0.7517648339271545, |
|
"eval_runtime": 296.1537, |
|
"eval_samples_per_second": 17.14, |
|
"eval_steps_per_second": 2.857, |
|
"eval_wer": 0.6104492456144063, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 0.00027099878934624694, |
|
"loss": 1.6163, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_loss": 0.6887586712837219, |
|
"eval_runtime": 298.4301, |
|
"eval_samples_per_second": 17.009, |
|
"eval_steps_per_second": 2.835, |
|
"eval_wer": 0.5590706138773092, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 0.0002637348668280871, |
|
"loss": 1.5782, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_loss": 0.6580386161804199, |
|
"eval_runtime": 296.628, |
|
"eval_samples_per_second": 17.112, |
|
"eval_steps_per_second": 2.852, |
|
"eval_wer": 0.5164525890344288, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 0.0002564709443099273, |
|
"loss": 1.5548, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"eval_loss": 0.6505530476570129, |
|
"eval_runtime": 300.5251, |
|
"eval_samples_per_second": 16.89, |
|
"eval_steps_per_second": 2.815, |
|
"eval_wer": 0.5184417123389128, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 0.00024920702179176754, |
|
"loss": 1.5249, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"eval_loss": 0.6197642683982849, |
|
"eval_runtime": 298.2224, |
|
"eval_samples_per_second": 17.021, |
|
"eval_steps_per_second": 2.837, |
|
"eval_wer": 0.5028461391963095, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 0.00024194309927360772, |
|
"loss": 1.5078, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"eval_loss": 0.5992246866226196, |
|
"eval_runtime": 298.0744, |
|
"eval_samples_per_second": 17.029, |
|
"eval_steps_per_second": 2.838, |
|
"eval_wer": 0.4932179359671583, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 0.0002346791767554479, |
|
"loss": 1.4836, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"eval_loss": 0.5705241560935974, |
|
"eval_runtime": 292.5306, |
|
"eval_samples_per_second": 17.352, |
|
"eval_steps_per_second": 2.892, |
|
"eval_wer": 0.46513743995598533, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 0.0002274152542372881, |
|
"loss": 1.4505, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"eval_loss": 0.5488837957382202, |
|
"eval_runtime": 300.7764, |
|
"eval_samples_per_second": 16.876, |
|
"eval_steps_per_second": 2.813, |
|
"eval_wer": 0.4507691982140212, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 0.00022015133171912832, |
|
"loss": 1.4481, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"eval_loss": 0.5577110052108765, |
|
"eval_runtime": 299.1357, |
|
"eval_samples_per_second": 16.969, |
|
"eval_steps_per_second": 2.828, |
|
"eval_wer": 0.45624986774446114, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 0.0002128874092009685, |
|
"loss": 1.4136, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"eval_loss": 0.5452219247817993, |
|
"eval_runtime": 298.6905, |
|
"eval_samples_per_second": 16.994, |
|
"eval_steps_per_second": 2.832, |
|
"eval_wer": 0.4370992657172482, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"learning_rate": 0.00020564164648910412, |
|
"loss": 1.3861, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"eval_loss": 0.510087788105011, |
|
"eval_runtime": 298.7447, |
|
"eval_samples_per_second": 16.991, |
|
"eval_steps_per_second": 2.832, |
|
"eval_wer": 0.40865903464037073, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 0.0001983777239709443, |
|
"loss": 1.3772, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"eval_loss": 0.49333110451698303, |
|
"eval_runtime": 298.2699, |
|
"eval_samples_per_second": 17.018, |
|
"eval_steps_per_second": 2.836, |
|
"eval_wer": 0.39513722834712317, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 0.00019111380145278448, |
|
"loss": 1.3478, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"eval_loss": 0.4849308133125305, |
|
"eval_runtime": 299.279, |
|
"eval_samples_per_second": 16.961, |
|
"eval_steps_per_second": 2.827, |
|
"eval_wer": 0.3921747042766151, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 0.0001838498789346247, |
|
"loss": 1.3394, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"eval_loss": 0.4805210828781128, |
|
"eval_runtime": 297.1556, |
|
"eval_samples_per_second": 17.082, |
|
"eval_steps_per_second": 2.847, |
|
"eval_wer": 0.3891698584336712, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 0.00017658595641646487, |
|
"loss": 1.3095, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"eval_loss": 0.48388615250587463, |
|
"eval_runtime": 298.6068, |
|
"eval_samples_per_second": 16.999, |
|
"eval_steps_per_second": 2.833, |
|
"eval_wer": 0.38335061472374465, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"learning_rate": 0.00016932203389830508, |
|
"loss": 1.306, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"eval_loss": 0.461063027381897, |
|
"eval_runtime": 296.7161, |
|
"eval_samples_per_second": 17.107, |
|
"eval_steps_per_second": 2.851, |
|
"eval_wer": 0.35865586050743803, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"learning_rate": 0.00016205811138014526, |
|
"loss": 1.2707, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"eval_loss": 0.4544948935508728, |
|
"eval_runtime": 298.2639, |
|
"eval_samples_per_second": 17.018, |
|
"eval_steps_per_second": 2.836, |
|
"eval_wer": 0.3730241022494022, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 9.93, |
|
"learning_rate": 0.00015479418886198547, |
|
"loss": 1.2626, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 9.93, |
|
"eval_loss": 0.4515869617462158, |
|
"eval_runtime": 299.4234, |
|
"eval_samples_per_second": 16.953, |
|
"eval_steps_per_second": 2.825, |
|
"eval_wer": 0.35241339907315317, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 0.00014753026634382565, |
|
"loss": 1.2412, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"eval_loss": 0.4314204454421997, |
|
"eval_runtime": 294.8808, |
|
"eval_samples_per_second": 17.214, |
|
"eval_steps_per_second": 2.869, |
|
"eval_wer": 0.3310197431068413, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"learning_rate": 0.00014026634382566583, |
|
"loss": 1.2456, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"eval_loss": 0.4400792419910431, |
|
"eval_runtime": 296.6883, |
|
"eval_samples_per_second": 17.109, |
|
"eval_steps_per_second": 2.851, |
|
"eval_wer": 0.3458958461180354, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 11.35, |
|
"learning_rate": 0.00013302058111380144, |
|
"loss": 1.2081, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 11.35, |
|
"eval_loss": 0.4399240016937256, |
|
"eval_runtime": 295.1566, |
|
"eval_samples_per_second": 17.198, |
|
"eval_steps_per_second": 2.866, |
|
"eval_wer": 0.33563281630234676, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 11.82, |
|
"learning_rate": 0.00012575665859564165, |
|
"loss": 1.1998, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 11.82, |
|
"eval_loss": 0.41947221755981445, |
|
"eval_runtime": 296.8576, |
|
"eval_samples_per_second": 17.099, |
|
"eval_steps_per_second": 2.85, |
|
"eval_wer": 0.3215396660812155, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 12.29, |
|
"learning_rate": 0.00011851089588377723, |
|
"loss": 1.1826, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 12.29, |
|
"eval_loss": 0.4220864474773407, |
|
"eval_runtime": 296.3734, |
|
"eval_samples_per_second": 17.127, |
|
"eval_steps_per_second": 2.855, |
|
"eval_wer": 0.3177518674482087, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 12.77, |
|
"learning_rate": 0.00011124697336561742, |
|
"loss": 1.1573, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 12.77, |
|
"eval_loss": 0.4097737967967987, |
|
"eval_runtime": 297.6541, |
|
"eval_samples_per_second": 17.053, |
|
"eval_steps_per_second": 2.842, |
|
"eval_wer": 0.3084199166261083, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 13.24, |
|
"learning_rate": 0.00010398305084745762, |
|
"loss": 1.1416, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 13.24, |
|
"eval_loss": 0.4085627496242523, |
|
"eval_runtime": 296.7019, |
|
"eval_samples_per_second": 17.108, |
|
"eval_steps_per_second": 2.851, |
|
"eval_wer": 0.3119114628520642, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 13.71, |
|
"learning_rate": 9.671912832929781e-05, |
|
"loss": 1.1174, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 13.71, |
|
"eval_loss": 0.3854358196258545, |
|
"eval_runtime": 297.2728, |
|
"eval_samples_per_second": 17.075, |
|
"eval_steps_per_second": 2.846, |
|
"eval_wer": 0.2910468290412003, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 14.18, |
|
"learning_rate": 8.947336561743341e-05, |
|
"loss": 1.1048, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 14.18, |
|
"eval_loss": 0.38590070605278015, |
|
"eval_runtime": 297.414, |
|
"eval_samples_per_second": 17.067, |
|
"eval_steps_per_second": 2.845, |
|
"eval_wer": 0.282413187464291, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 14.66, |
|
"learning_rate": 8.22094430992736e-05, |
|
"loss": 1.0748, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 14.66, |
|
"eval_loss": 0.3854171633720398, |
|
"eval_runtime": 295.6597, |
|
"eval_samples_per_second": 17.168, |
|
"eval_steps_per_second": 2.861, |
|
"eval_wer": 0.275705186533212, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 15.13, |
|
"learning_rate": 7.494552058111379e-05, |
|
"loss": 1.0697, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 15.13, |
|
"eval_loss": 0.37401217222213745, |
|
"eval_runtime": 297.4379, |
|
"eval_samples_per_second": 17.066, |
|
"eval_steps_per_second": 2.844, |
|
"eval_wer": 0.2723829273969994, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 15.6, |
|
"learning_rate": 6.7681598062954e-05, |
|
"loss": 1.0477, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 15.6, |
|
"eval_loss": 0.3693487048149109, |
|
"eval_runtime": 298.4761, |
|
"eval_samples_per_second": 17.006, |
|
"eval_steps_per_second": 2.834, |
|
"eval_wer": 0.2642994688617559, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 16.08, |
|
"learning_rate": 6.0417675544794184e-05, |
|
"loss": 1.0356, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 16.08, |
|
"eval_loss": 0.3726678788661957, |
|
"eval_runtime": 298.583, |
|
"eval_samples_per_second": 17.0, |
|
"eval_steps_per_second": 2.833, |
|
"eval_wer": 0.2561102058954229, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 16.55, |
|
"learning_rate": 5.315375302663438e-05, |
|
"loss": 1.0083, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 16.55, |
|
"eval_loss": 0.36522331833839417, |
|
"eval_runtime": 301.7885, |
|
"eval_samples_per_second": 16.82, |
|
"eval_steps_per_second": 2.803, |
|
"eval_wer": 0.25007935332331716, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 17.02, |
|
"learning_rate": 4.5889830508474574e-05, |
|
"loss": 1.0, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 17.02, |
|
"eval_loss": 0.36411818861961365, |
|
"eval_runtime": 297.5091, |
|
"eval_samples_per_second": 17.062, |
|
"eval_steps_per_second": 2.844, |
|
"eval_wer": 0.24572021076242673, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 17.49, |
|
"learning_rate": 3.862590799031477e-05, |
|
"loss": 0.9779, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 17.49, |
|
"eval_loss": 0.35678642988204956, |
|
"eval_runtime": 295.5669, |
|
"eval_samples_per_second": 17.174, |
|
"eval_steps_per_second": 2.862, |
|
"eval_wer": 0.24089552870474215, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 17.97, |
|
"learning_rate": 3.1361985472154963e-05, |
|
"loss": 0.9596, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 17.97, |
|
"eval_loss": 0.3557918071746826, |
|
"eval_runtime": 296.6965, |
|
"eval_samples_per_second": 17.108, |
|
"eval_steps_per_second": 2.851, |
|
"eval_wer": 0.23759443045474746, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 18.44, |
|
"learning_rate": 2.4098062953995155e-05, |
|
"loss": 0.946, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 18.44, |
|
"eval_loss": 0.35907429456710815, |
|
"eval_runtime": 295.7242, |
|
"eval_samples_per_second": 17.165, |
|
"eval_steps_per_second": 2.861, |
|
"eval_wer": 0.23114036015828343, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 18.91, |
|
"learning_rate": 1.685230024213075e-05, |
|
"loss": 0.9389, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 18.91, |
|
"eval_loss": 0.3540380001068115, |
|
"eval_runtime": 296.3149, |
|
"eval_samples_per_second": 17.13, |
|
"eval_steps_per_second": 2.855, |
|
"eval_wer": 0.22828364051886493, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 19.39, |
|
"learning_rate": 9.588377723970942e-06, |
|
"loss": 0.9173, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 19.39, |
|
"eval_loss": 0.35515815019607544, |
|
"eval_runtime": 296.2267, |
|
"eval_samples_per_second": 17.136, |
|
"eval_steps_per_second": 2.856, |
|
"eval_wer": 0.2265061260765601, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 19.86, |
|
"learning_rate": 2.3244552058111378e-06, |
|
"loss": 0.9122, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 19.86, |
|
"eval_loss": 0.353456050157547, |
|
"eval_runtime": 298.3571, |
|
"eval_samples_per_second": 17.013, |
|
"eval_steps_per_second": 2.836, |
|
"eval_wer": 0.22502486404130606, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 16920, |
|
"total_flos": 1.9284410553255697e+20, |
|
"train_loss": 1.3125287554224614, |
|
"train_runtime": 67690.7896, |
|
"train_samples_per_second": 6.0, |
|
"train_steps_per_second": 0.25 |
|
} |
|
], |
|
"max_steps": 16920, |
|
"num_train_epochs": 20, |
|
"total_flos": 1.9284410553255697e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|