|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 100.0, |
|
"global_step": 11100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.3949999999999997e-06, |
|
"loss": 14.8836, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 6.895e-06, |
|
"loss": 8.0576, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 1.0394999999999998e-05, |
|
"loss": 4.7488, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 1.3895e-05, |
|
"loss": 4.0527, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 1.7395e-05, |
|
"loss": 3.6278, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 2.0894999999999996e-05, |
|
"loss": 3.3072, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 2.4394999999999996e-05, |
|
"loss": 3.2162, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 2.7895e-05, |
|
"loss": 3.1693, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"learning_rate": 3.1395e-05, |
|
"loss": 3.1292, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 3.4895e-05, |
|
"loss": 3.064, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.91, |
|
"learning_rate": 3.8394999999999994e-05, |
|
"loss": 3.0327, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 10.81, |
|
"learning_rate": 4.1895e-05, |
|
"loss": 3.0108, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 11.71, |
|
"learning_rate": 4.5394999999999995e-05, |
|
"loss": 2.8725, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 12.61, |
|
"learning_rate": 4.8895e-05, |
|
"loss": 2.3983, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 13.51, |
|
"learning_rate": 5.2395e-05, |
|
"loss": 1.9654, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 14.41, |
|
"learning_rate": 5.589499999999999e-05, |
|
"loss": 1.7331, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 15.32, |
|
"learning_rate": 5.9394999999999996e-05, |
|
"loss": 1.5913, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 16.22, |
|
"learning_rate": 6.289499999999999e-05, |
|
"loss": 1.4628, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 17.12, |
|
"learning_rate": 6.639499999999999e-05, |
|
"loss": 1.3249, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 18.02, |
|
"learning_rate": 6.9895e-05, |
|
"loss": 1.2238, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 18.02, |
|
"eval_loss": 0.3910697400569916, |
|
"eval_runtime": 92.527, |
|
"eval_samples_per_second": 17.789, |
|
"eval_steps_per_second": 17.789, |
|
"eval_wer": 0.4309626986150055, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 18.92, |
|
"learning_rate": 6.925384615384615e-05, |
|
"loss": 1.1572, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 19.82, |
|
"learning_rate": 6.848461538461538e-05, |
|
"loss": 1.1058, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 20.72, |
|
"learning_rate": 6.771538461538461e-05, |
|
"loss": 1.0696, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 21.62, |
|
"learning_rate": 6.694615384615384e-05, |
|
"loss": 1.0338, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 22.52, |
|
"learning_rate": 6.617692307692307e-05, |
|
"loss": 1.0053, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 23.42, |
|
"learning_rate": 6.54076923076923e-05, |
|
"loss": 0.9785, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 24.32, |
|
"learning_rate": 6.463846153846153e-05, |
|
"loss": 0.9695, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 25.23, |
|
"learning_rate": 6.386923076923076e-05, |
|
"loss": 0.945, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 26.13, |
|
"learning_rate": 6.31e-05, |
|
"loss": 0.9157, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 27.03, |
|
"learning_rate": 6.233076923076922e-05, |
|
"loss": 0.8971, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 27.93, |
|
"learning_rate": 6.156153846153846e-05, |
|
"loss": 0.8921, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 28.83, |
|
"learning_rate": 6.079230769230769e-05, |
|
"loss": 0.8734, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 29.73, |
|
"learning_rate": 6.0023076923076915e-05, |
|
"loss": 0.8685, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 30.63, |
|
"learning_rate": 5.925384615384615e-05, |
|
"loss": 0.8628, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 31.53, |
|
"learning_rate": 5.848461538461538e-05, |
|
"loss": 0.8357, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 32.43, |
|
"learning_rate": 5.771538461538461e-05, |
|
"loss": 0.828, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 5.694615384615384e-05, |
|
"loss": 0.8184, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 34.23, |
|
"learning_rate": 5.617692307692307e-05, |
|
"loss": 0.8199, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 35.14, |
|
"learning_rate": 5.54076923076923e-05, |
|
"loss": 0.7896, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 36.04, |
|
"learning_rate": 5.463846153846154e-05, |
|
"loss": 0.7871, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 36.04, |
|
"eval_loss": 0.20626200735569, |
|
"eval_runtime": 92.1829, |
|
"eval_samples_per_second": 17.856, |
|
"eval_steps_per_second": 17.856, |
|
"eval_wer": 0.23086073583142153, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 36.94, |
|
"learning_rate": 5.386923076923076e-05, |
|
"loss": 0.78, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 37.84, |
|
"learning_rate": 5.3099999999999996e-05, |
|
"loss": 0.7835, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 38.74, |
|
"learning_rate": 5.2330769230769226e-05, |
|
"loss": 0.7698, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 39.64, |
|
"learning_rate": 5.1561538461538456e-05, |
|
"loss": 0.7689, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 40.54, |
|
"learning_rate": 5.0792307692307686e-05, |
|
"loss": 0.7565, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 41.44, |
|
"learning_rate": 5.002307692307692e-05, |
|
"loss": 0.7382, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 42.34, |
|
"learning_rate": 4.926153846153846e-05, |
|
"loss": 0.7356, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 43.24, |
|
"learning_rate": 4.849230769230769e-05, |
|
"loss": 0.7272, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 44.14, |
|
"learning_rate": 4.7723076923076914e-05, |
|
"loss": 0.727, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 45.05, |
|
"learning_rate": 4.695384615384615e-05, |
|
"loss": 0.7231, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 45.95, |
|
"learning_rate": 4.618461538461539e-05, |
|
"loss": 0.7056, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 46.85, |
|
"learning_rate": 4.541538461538461e-05, |
|
"loss": 0.7219, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 47.75, |
|
"learning_rate": 4.464615384615385e-05, |
|
"loss": 0.705, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 48.65, |
|
"learning_rate": 4.387692307692307e-05, |
|
"loss": 0.6876, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 49.55, |
|
"learning_rate": 4.3107692307692306e-05, |
|
"loss": 0.698, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 50.45, |
|
"learning_rate": 4.2338461538461536e-05, |
|
"loss": 0.6915, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 51.35, |
|
"learning_rate": 4.1569230769230766e-05, |
|
"loss": 0.6722, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 52.25, |
|
"learning_rate": 4.0807692307692305e-05, |
|
"loss": 0.6661, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 53.15, |
|
"learning_rate": 4.0038461538461534e-05, |
|
"loss": 0.6593, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 54.05, |
|
"learning_rate": 3.9269230769230764e-05, |
|
"loss": 0.6653, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 54.05, |
|
"eval_loss": 0.1959763765335083, |
|
"eval_runtime": 92.0136, |
|
"eval_samples_per_second": 17.889, |
|
"eval_steps_per_second": 17.889, |
|
"eval_wer": 0.20910867533350327, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 54.95, |
|
"learning_rate": 3.85e-05, |
|
"loss": 0.6658, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 55.86, |
|
"learning_rate": 3.7730769230769224e-05, |
|
"loss": 0.6484, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 56.76, |
|
"learning_rate": 3.696153846153846e-05, |
|
"loss": 0.6714, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 57.66, |
|
"learning_rate": 3.619230769230769e-05, |
|
"loss": 0.655, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 58.56, |
|
"learning_rate": 3.542307692307692e-05, |
|
"loss": 0.6484, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 59.46, |
|
"learning_rate": 3.465384615384615e-05, |
|
"loss": 0.6407, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 60.36, |
|
"learning_rate": 3.388461538461538e-05, |
|
"loss": 0.6332, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 61.26, |
|
"learning_rate": 3.311538461538461e-05, |
|
"loss": 0.6252, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 62.16, |
|
"learning_rate": 3.2346153846153846e-05, |
|
"loss": 0.6233, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 63.06, |
|
"learning_rate": 3.1576923076923076e-05, |
|
"loss": 0.6253, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 63.96, |
|
"learning_rate": 3.0807692307692305e-05, |
|
"loss": 0.6281, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 64.86, |
|
"learning_rate": 3.0038461538461535e-05, |
|
"loss": 0.6236, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 65.77, |
|
"learning_rate": 2.926923076923077e-05, |
|
"loss": 0.6042, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 66.67, |
|
"learning_rate": 2.8499999999999998e-05, |
|
"loss": 0.6226, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 67.57, |
|
"learning_rate": 2.7730769230769228e-05, |
|
"loss": 0.5901, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 68.47, |
|
"learning_rate": 2.696153846153846e-05, |
|
"loss": 0.6054, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 69.37, |
|
"learning_rate": 2.619230769230769e-05, |
|
"loss": 0.5953, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 70.27, |
|
"learning_rate": 2.542307692307692e-05, |
|
"loss": 0.5935, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 71.17, |
|
"learning_rate": 2.4653846153846154e-05, |
|
"loss": 0.6045, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 72.07, |
|
"learning_rate": 2.3884615384615384e-05, |
|
"loss": 0.5861, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 72.07, |
|
"eval_loss": 0.19857272505760193, |
|
"eval_runtime": 91.2101, |
|
"eval_samples_per_second": 18.046, |
|
"eval_steps_per_second": 18.046, |
|
"eval_wer": 0.200016993797264, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 72.97, |
|
"learning_rate": 2.3115384615384614e-05, |
|
"loss": 0.5953, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 73.87, |
|
"learning_rate": 2.2346153846153843e-05, |
|
"loss": 0.5869, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 74.77, |
|
"learning_rate": 2.1576923076923076e-05, |
|
"loss": 0.5713, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 75.68, |
|
"learning_rate": 2.0807692307692306e-05, |
|
"loss": 0.5826, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 76.58, |
|
"learning_rate": 2.0038461538461536e-05, |
|
"loss": 0.5735, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 77.48, |
|
"learning_rate": 1.926923076923077e-05, |
|
"loss": 0.5692, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 78.38, |
|
"learning_rate": 1.85e-05, |
|
"loss": 0.5692, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 79.28, |
|
"learning_rate": 1.773076923076923e-05, |
|
"loss": 0.5567, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 80.18, |
|
"learning_rate": 1.696153846153846e-05, |
|
"loss": 0.5608, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 81.08, |
|
"learning_rate": 1.6192307692307692e-05, |
|
"loss": 0.5542, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 81.98, |
|
"learning_rate": 1.542307692307692e-05, |
|
"loss": 0.5575, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 82.88, |
|
"learning_rate": 1.4653846153846153e-05, |
|
"loss": 0.5311, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 83.78, |
|
"learning_rate": 1.3884615384615383e-05, |
|
"loss": 0.543, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 84.68, |
|
"learning_rate": 1.3115384615384614e-05, |
|
"loss": 0.5605, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 85.59, |
|
"learning_rate": 1.2346153846153846e-05, |
|
"loss": 0.5422, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 86.49, |
|
"learning_rate": 1.1576923076923076e-05, |
|
"loss": 0.5459, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 87.39, |
|
"learning_rate": 1.0807692307692307e-05, |
|
"loss": 0.5396, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 88.29, |
|
"learning_rate": 1.0038461538461537e-05, |
|
"loss": 0.5311, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 89.19, |
|
"learning_rate": 9.269230769230768e-06, |
|
"loss": 0.5317, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 90.09, |
|
"learning_rate": 8.5e-06, |
|
"loss": 0.5283, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 90.09, |
|
"eval_loss": 0.19934897124767303, |
|
"eval_runtime": 92.517, |
|
"eval_samples_per_second": 17.791, |
|
"eval_steps_per_second": 17.791, |
|
"eval_wer": 0.19092531226102472, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 90.99, |
|
"learning_rate": 7.73076923076923e-06, |
|
"loss": 0.5289, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 91.89, |
|
"learning_rate": 6.9692307692307684e-06, |
|
"loss": 0.5346, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 92.79, |
|
"learning_rate": 6.199999999999999e-06, |
|
"loss": 0.5107, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 93.69, |
|
"learning_rate": 5.4307692307692306e-06, |
|
"loss": 0.5251, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 94.59, |
|
"learning_rate": 4.661538461538461e-06, |
|
"loss": 0.5324, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 95.5, |
|
"learning_rate": 3.892307692307692e-06, |
|
"loss": 0.5216, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 96.4, |
|
"learning_rate": 3.1307692307692306e-06, |
|
"loss": 0.5225, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 97.3, |
|
"learning_rate": 2.361538461538461e-06, |
|
"loss": 0.5249, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 98.2, |
|
"learning_rate": 1.5923076923076923e-06, |
|
"loss": 0.5159, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 99.1, |
|
"learning_rate": 8.23076923076923e-07, |
|
"loss": 0.5276, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 5.384615384615384e-08, |
|
"loss": 0.5284, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 11100, |
|
"total_flos": 5.003170174161145e+19, |
|
"train_loss": 1.208113473084596, |
|
"train_runtime": 18177.1255, |
|
"train_samples_per_second": 19.398, |
|
"train_steps_per_second": 0.611 |
|
} |
|
], |
|
"max_steps": 11100, |
|
"num_train_epochs": 100, |
|
"total_flos": 5.003170174161145e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|