|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 99.9584, |
|
"global_step": 46800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.9880000000000002e-05, |
|
"loss": 7.2457, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 2.967732181425486e-05, |
|
"loss": 2.3129, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_cer": 0.05796002383402849, |
|
"eval_loss": 0.5042470097541809, |
|
"eval_runtime": 80.7316, |
|
"eval_samples_per_second": 18.58, |
|
"eval_steps_per_second": 2.329, |
|
"eval_wer": 0.270302835569122, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 2.9353347732181423e-05, |
|
"loss": 0.389, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 2.9029373650107992e-05, |
|
"loss": 0.2251, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"eval_cer": 0.02945398407453551, |
|
"eval_loss": 0.17824731767177582, |
|
"eval_runtime": 80.3146, |
|
"eval_samples_per_second": 18.677, |
|
"eval_steps_per_second": 2.341, |
|
"eval_wer": 0.11984898385412483, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 2.870539956803456e-05, |
|
"loss": 0.1727, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 2.8381425485961122e-05, |
|
"loss": 0.1462, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"eval_cer": 0.02651535669790369, |
|
"eval_loss": 0.1635013222694397, |
|
"eval_runtime": 72.2551, |
|
"eval_samples_per_second": 20.76, |
|
"eval_steps_per_second": 2.602, |
|
"eval_wer": 0.10185557072857257, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 2.805745140388769e-05, |
|
"loss": 0.128, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 2.7733477321814256e-05, |
|
"loss": 0.1162, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"eval_cer": 0.02478197280754022, |
|
"eval_loss": 0.1619153469800949, |
|
"eval_runtime": 81.2175, |
|
"eval_samples_per_second": 18.469, |
|
"eval_steps_per_second": 2.315, |
|
"eval_wer": 0.09309984737729939, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 2.740950323974082e-05, |
|
"loss": 0.1095, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 10.67, |
|
"learning_rate": 2.7085529157667386e-05, |
|
"loss": 0.0988, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 10.67, |
|
"eval_cer": 0.024890309300687936, |
|
"eval_loss": 0.1653970628976822, |
|
"eval_runtime": 72.7324, |
|
"eval_samples_per_second": 20.624, |
|
"eval_steps_per_second": 2.585, |
|
"eval_wer": 0.09398345248614347, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 11.73, |
|
"learning_rate": 2.676155507559395e-05, |
|
"loss": 0.0949, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 2.643758099352052e-05, |
|
"loss": 0.0904, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"eval_cer": 0.024186122095227778, |
|
"eval_loss": 0.17023487389087677, |
|
"eval_runtime": 80.5666, |
|
"eval_samples_per_second": 18.618, |
|
"eval_steps_per_second": 2.333, |
|
"eval_wer": 0.08450477950036148, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 13.87, |
|
"learning_rate": 2.6113606911447086e-05, |
|
"loss": 0.0859, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"learning_rate": 2.578963282937365e-05, |
|
"loss": 0.0813, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"eval_cer": 0.02392882292400195, |
|
"eval_loss": 0.16578005254268646, |
|
"eval_runtime": 72.3723, |
|
"eval_samples_per_second": 20.726, |
|
"eval_steps_per_second": 2.598, |
|
"eval_wer": 0.08458510723752911, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 16.02, |
|
"learning_rate": 2.5466306695464364e-05, |
|
"loss": 0.0763, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 17.09, |
|
"learning_rate": 2.514233261339093e-05, |
|
"loss": 0.074, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 17.09, |
|
"eval_cer": 0.02395590704728888, |
|
"eval_loss": 0.1762518584728241, |
|
"eval_runtime": 72.6367, |
|
"eval_samples_per_second": 20.651, |
|
"eval_steps_per_second": 2.588, |
|
"eval_wer": 0.07928347658446462, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 18.16, |
|
"learning_rate": 2.4818358531317497e-05, |
|
"loss": 0.0714, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 19.22, |
|
"learning_rate": 2.4495032397408207e-05, |
|
"loss": 0.0692, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 19.22, |
|
"eval_cer": 0.0242538324034451, |
|
"eval_loss": 0.17680838704109192, |
|
"eval_runtime": 68.8274, |
|
"eval_samples_per_second": 21.794, |
|
"eval_steps_per_second": 2.731, |
|
"eval_wer": 0.0834605189171821, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 20.3, |
|
"learning_rate": 2.4171058315334776e-05, |
|
"loss": 0.0658, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 21.36, |
|
"learning_rate": 2.3847084233261337e-05, |
|
"loss": 0.0652, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 21.36, |
|
"eval_cer": 0.02365798169113266, |
|
"eval_loss": 0.18117107450962067, |
|
"eval_runtime": 72.0293, |
|
"eval_samples_per_second": 20.825, |
|
"eval_steps_per_second": 2.61, |
|
"eval_wer": 0.07968511527030284, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 22.43, |
|
"learning_rate": 2.3523110151187906e-05, |
|
"loss": 0.0625, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 23.5, |
|
"learning_rate": 2.319913606911447e-05, |
|
"loss": 0.0593, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 23.5, |
|
"eval_cer": 0.022127728725421156, |
|
"eval_loss": 0.18102437257766724, |
|
"eval_runtime": 69.723, |
|
"eval_samples_per_second": 21.514, |
|
"eval_steps_per_second": 2.696, |
|
"eval_wer": 0.07502610651457949, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 24.56, |
|
"learning_rate": 2.2875809935205184e-05, |
|
"loss": 0.0604, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 25.63, |
|
"learning_rate": 2.255183585313175e-05, |
|
"loss": 0.0547, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 25.63, |
|
"eval_cer": 0.023265261903472185, |
|
"eval_loss": 0.18346643447875977, |
|
"eval_runtime": 70.9539, |
|
"eval_samples_per_second": 21.14, |
|
"eval_steps_per_second": 2.65, |
|
"eval_wer": 0.07944413205879991, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 26.7, |
|
"learning_rate": 2.2227861771058315e-05, |
|
"loss": 0.0565, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 27.76, |
|
"learning_rate": 2.1903887688984883e-05, |
|
"loss": 0.0514, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 27.76, |
|
"eval_cer": 0.022385027896646984, |
|
"eval_loss": 0.18281148374080658, |
|
"eval_runtime": 70.1644, |
|
"eval_samples_per_second": 21.378, |
|
"eval_steps_per_second": 2.679, |
|
"eval_wer": 0.07607036709775886, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 28.83, |
|
"learning_rate": 2.157991360691145e-05, |
|
"loss": 0.053, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 29.9, |
|
"learning_rate": 2.1255939524838014e-05, |
|
"loss": 0.0488, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 29.9, |
|
"eval_cer": 0.02242565408157738, |
|
"eval_loss": 0.18435174226760864, |
|
"eval_runtime": 69.6075, |
|
"eval_samples_per_second": 21.549, |
|
"eval_steps_per_second": 2.701, |
|
"eval_wer": 0.07663266125793236, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 30.96, |
|
"learning_rate": 2.093196544276458e-05, |
|
"loss": 0.0514, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 32.03, |
|
"learning_rate": 2.0607991360691144e-05, |
|
"loss": 0.0478, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 32.03, |
|
"eval_cer": 0.022588158821298953, |
|
"eval_loss": 0.1909506469964981, |
|
"eval_runtime": 72.4928, |
|
"eval_samples_per_second": 20.692, |
|
"eval_steps_per_second": 2.593, |
|
"eval_wer": 0.0768736444694353, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 33.1, |
|
"learning_rate": 2.0284017278617713e-05, |
|
"loss": 0.0489, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 34.16, |
|
"learning_rate": 1.9960043196544274e-05, |
|
"loss": 0.0459, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 34.16, |
|
"eval_cer": 0.02386111261578463, |
|
"eval_loss": 0.19651177525520325, |
|
"eval_runtime": 69.572, |
|
"eval_samples_per_second": 21.56, |
|
"eval_steps_per_second": 2.702, |
|
"eval_wer": 0.08313920796851153, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 35.23, |
|
"learning_rate": 1.9636069114470843e-05, |
|
"loss": 0.0481, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 36.3, |
|
"learning_rate": 1.9312095032397408e-05, |
|
"loss": 0.0429, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 36.3, |
|
"eval_cer": 0.021951681924056117, |
|
"eval_loss": 0.20002886652946472, |
|
"eval_runtime": 69.2113, |
|
"eval_samples_per_second": 21.673, |
|
"eval_steps_per_second": 2.716, |
|
"eval_wer": 0.07599003936059122, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 37.36, |
|
"learning_rate": 1.898876889848812e-05, |
|
"loss": 0.0461, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 38.43, |
|
"learning_rate": 1.8664794816414686e-05, |
|
"loss": 0.0443, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 38.43, |
|
"eval_cer": 0.02277774768430746, |
|
"eval_loss": 0.20385122299194336, |
|
"eval_runtime": 72.6401, |
|
"eval_samples_per_second": 20.65, |
|
"eval_steps_per_second": 2.588, |
|
"eval_wer": 0.0774359386296088, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 39.5, |
|
"learning_rate": 1.834082073434125e-05, |
|
"loss": 0.0411, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 40.56, |
|
"learning_rate": 1.8017494600431965e-05, |
|
"loss": 0.0398, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 40.56, |
|
"eval_cer": 0.021856887492551866, |
|
"eval_loss": 0.19806204736232758, |
|
"eval_runtime": 72.4106, |
|
"eval_samples_per_second": 20.715, |
|
"eval_steps_per_second": 2.596, |
|
"eval_wer": 0.07550807293758535, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 41.63, |
|
"learning_rate": 1.7693520518358533e-05, |
|
"loss": 0.0431, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 42.7, |
|
"learning_rate": 1.7369546436285095e-05, |
|
"loss": 0.0408, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 42.7, |
|
"eval_cer": 0.02387465467742809, |
|
"eval_loss": 0.2053176462650299, |
|
"eval_runtime": 69.6266, |
|
"eval_samples_per_second": 21.543, |
|
"eval_steps_per_second": 2.7, |
|
"eval_wer": 0.0775965941039441, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 43.76, |
|
"learning_rate": 1.7045572354211664e-05, |
|
"loss": 0.0399, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 44.83, |
|
"learning_rate": 1.6721598272138232e-05, |
|
"loss": 0.0406, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 44.83, |
|
"eval_cer": 0.022060018417203835, |
|
"eval_loss": 0.2049773633480072, |
|
"eval_runtime": 70.2646, |
|
"eval_samples_per_second": 21.348, |
|
"eval_steps_per_second": 2.676, |
|
"eval_wer": 0.07398184593140011, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 45.9, |
|
"learning_rate": 1.6397624190064794e-05, |
|
"loss": 0.0381, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 46.96, |
|
"learning_rate": 1.6073650107991363e-05, |
|
"loss": 0.0383, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 46.96, |
|
"eval_cer": 0.022357943773360055, |
|
"eval_loss": 0.21280354261398315, |
|
"eval_runtime": 72.639, |
|
"eval_samples_per_second": 20.65, |
|
"eval_steps_per_second": 2.588, |
|
"eval_wer": 0.07325889629689132, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 48.03, |
|
"learning_rate": 1.5749676025917928e-05, |
|
"loss": 0.0365, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 49.1, |
|
"learning_rate": 1.5425701943844493e-05, |
|
"loss": 0.0379, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 49.1, |
|
"eval_cer": 0.021978766047343046, |
|
"eval_loss": 0.21096549928188324, |
|
"eval_runtime": 73.1028, |
|
"eval_samples_per_second": 20.519, |
|
"eval_steps_per_second": 2.572, |
|
"eval_wer": 0.07309824082255603, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 50.16, |
|
"learning_rate": 1.5102375809935208e-05, |
|
"loss": 0.0361, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 51.23, |
|
"learning_rate": 1.4778401727861771e-05, |
|
"loss": 0.0369, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 51.23, |
|
"eval_cer": 0.021951681924056117, |
|
"eval_loss": 0.2144922912120819, |
|
"eval_runtime": 69.8484, |
|
"eval_samples_per_second": 21.475, |
|
"eval_steps_per_second": 2.692, |
|
"eval_wer": 0.07446381235440598, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 52.3, |
|
"learning_rate": 1.4455075593952484e-05, |
|
"loss": 0.0334, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 53.36, |
|
"learning_rate": 1.4131101511879051e-05, |
|
"loss": 0.0341, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 53.36, |
|
"eval_cer": 0.022208981095281945, |
|
"eval_loss": 0.2145662158727646, |
|
"eval_runtime": 69.6873, |
|
"eval_samples_per_second": 21.525, |
|
"eval_steps_per_second": 2.698, |
|
"eval_wer": 0.07245561892521488, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 54.43, |
|
"learning_rate": 1.3807127429805616e-05, |
|
"loss": 0.0356, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 55.5, |
|
"learning_rate": 1.3483801295896327e-05, |
|
"loss": 0.0322, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 55.5, |
|
"eval_cer": 0.021626672444612968, |
|
"eval_loss": 0.21297892928123474, |
|
"eval_runtime": 73.9282, |
|
"eval_samples_per_second": 20.29, |
|
"eval_steps_per_second": 2.543, |
|
"eval_wer": 0.07100971965619729, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 56.56, |
|
"learning_rate": 1.3159827213822896e-05, |
|
"loss": 0.0327, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 57.63, |
|
"learning_rate": 1.2835853131749461e-05, |
|
"loss": 0.0316, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 57.63, |
|
"eval_cer": 0.02224960728021234, |
|
"eval_loss": 0.2133886069059372, |
|
"eval_runtime": 69.9039, |
|
"eval_samples_per_second": 21.458, |
|
"eval_steps_per_second": 2.689, |
|
"eval_wer": 0.0715720138163708, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 58.7, |
|
"learning_rate": 1.2511879049676026e-05, |
|
"loss": 0.0319, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 59.76, |
|
"learning_rate": 1.218855291576674e-05, |
|
"loss": 0.0324, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 59.76, |
|
"eval_cer": 0.022236065218568874, |
|
"eval_loss": 0.21722277998924255, |
|
"eval_runtime": 69.753, |
|
"eval_samples_per_second": 21.504, |
|
"eval_steps_per_second": 2.695, |
|
"eval_wer": 0.07309824082255603, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 60.83, |
|
"learning_rate": 1.1864578833693305e-05, |
|
"loss": 0.0296, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 61.9, |
|
"learning_rate": 1.1540604751619871e-05, |
|
"loss": 0.0315, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 61.9, |
|
"eval_cer": 0.02280483180759439, |
|
"eval_loss": 0.22069784998893738, |
|
"eval_runtime": 79.4911, |
|
"eval_samples_per_second": 18.87, |
|
"eval_steps_per_second": 2.365, |
|
"eval_wer": 0.07454414009157362, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 62.96, |
|
"learning_rate": 1.1216630669546437e-05, |
|
"loss": 0.0315, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 64.03, |
|
"learning_rate": 1.0892656587473002e-05, |
|
"loss": 0.0294, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 64.03, |
|
"eval_cer": 0.021843345430908403, |
|
"eval_loss": 0.2183454930782318, |
|
"eval_runtime": 77.818, |
|
"eval_samples_per_second": 19.276, |
|
"eval_steps_per_second": 2.416, |
|
"eval_wer": 0.07165234155353843, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 65.1, |
|
"learning_rate": 1.0569330453563715e-05, |
|
"loss": 0.0301, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 66.16, |
|
"learning_rate": 1.024535637149028e-05, |
|
"loss": 0.028, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 66.16, |
|
"eval_cer": 0.021355831211743677, |
|
"eval_loss": 0.2184668779373169, |
|
"eval_runtime": 80.0286, |
|
"eval_samples_per_second": 18.743, |
|
"eval_steps_per_second": 2.349, |
|
"eval_wer": 0.06956382038717969, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 67.23, |
|
"learning_rate": 9.921382289416847e-06, |
|
"loss": 0.0283, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 68.3, |
|
"learning_rate": 9.597408207343414e-06, |
|
"loss": 0.0263, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 68.3, |
|
"eval_cer": 0.021504793889821787, |
|
"eval_loss": 0.21668484807014465, |
|
"eval_runtime": 70.5125, |
|
"eval_samples_per_second": 21.273, |
|
"eval_steps_per_second": 2.666, |
|
"eval_wer": 0.06964414812434734, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 69.36, |
|
"learning_rate": 9.273434125269979e-06, |
|
"loss": 0.0286, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 70.43, |
|
"learning_rate": 8.949460043196544e-06, |
|
"loss": 0.0299, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 70.43, |
|
"eval_cer": 0.021707924814473756, |
|
"eval_loss": 0.22012607753276825, |
|
"eval_runtime": 72.8041, |
|
"eval_samples_per_second": 20.603, |
|
"eval_steps_per_second": 2.582, |
|
"eval_wer": 0.07092939191902964, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 71.5, |
|
"learning_rate": 8.62548596112311e-06, |
|
"loss": 0.0267, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 72.56, |
|
"learning_rate": 8.301511879049676e-06, |
|
"loss": 0.0273, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 72.56, |
|
"eval_cer": 0.02219543903363848, |
|
"eval_loss": 0.21641910076141357, |
|
"eval_runtime": 72.2901, |
|
"eval_samples_per_second": 20.75, |
|
"eval_steps_per_second": 2.601, |
|
"eval_wer": 0.07237529118804724, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 73.63, |
|
"learning_rate": 7.977537796976242e-06, |
|
"loss": 0.0267, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 74.7, |
|
"learning_rate": 7.653563714902809e-06, |
|
"loss": 0.0269, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 74.7, |
|
"eval_cer": 0.021951681924056117, |
|
"eval_loss": 0.22398144006729126, |
|
"eval_runtime": 69.5423, |
|
"eval_samples_per_second": 21.57, |
|
"eval_steps_per_second": 2.703, |
|
"eval_wer": 0.06932283717567676, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 75.85, |
|
"learning_rate": 7.329589632829374e-06, |
|
"loss": 0.0265, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 76.92, |
|
"learning_rate": 7.00561555075594e-06, |
|
"loss": 0.0264, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 76.92, |
|
"eval_cer": 0.021789177184334544, |
|
"eval_loss": 0.22204121947288513, |
|
"eval_runtime": 71.976, |
|
"eval_samples_per_second": 20.84, |
|
"eval_steps_per_second": 2.612, |
|
"eval_wer": 0.07036709775885613, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 77.99, |
|
"learning_rate": 6.682289416846653e-06, |
|
"loss": 0.0257, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 79.05, |
|
"learning_rate": 6.358963282937365e-06, |
|
"loss": 0.0257, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 79.05, |
|
"eval_cer": 0.02172146687611722, |
|
"eval_loss": 0.22285164892673492, |
|
"eval_runtime": 68.7898, |
|
"eval_samples_per_second": 21.806, |
|
"eval_steps_per_second": 2.733, |
|
"eval_wer": 0.0688408707526709, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 80.12, |
|
"learning_rate": 6.034989200863931e-06, |
|
"loss": 0.0257, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 81.19, |
|
"learning_rate": 5.711015118790497e-06, |
|
"loss": 0.0251, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 81.19, |
|
"eval_cer": 0.02154542007475218, |
|
"eval_loss": 0.22632832825183868, |
|
"eval_runtime": 71.6223, |
|
"eval_samples_per_second": 20.943, |
|
"eval_steps_per_second": 2.625, |
|
"eval_wer": 0.0694031649128444, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 82.25, |
|
"learning_rate": 5.387041036717062e-06, |
|
"loss": 0.0235, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 83.32, |
|
"learning_rate": 5.063066954643629e-06, |
|
"loss": 0.0245, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 83.32, |
|
"eval_cer": 0.02101727967065706, |
|
"eval_loss": 0.22526498138904572, |
|
"eval_runtime": 70.7229, |
|
"eval_samples_per_second": 21.21, |
|
"eval_steps_per_second": 2.658, |
|
"eval_wer": 0.06731464374648566, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 84.39, |
|
"learning_rate": 4.739740820734342e-06, |
|
"loss": 0.0252, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 85.45, |
|
"learning_rate": 4.415766738660907e-06, |
|
"loss": 0.0243, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 85.45, |
|
"eval_cer": 0.02151833595146525, |
|
"eval_loss": 0.2263830602169037, |
|
"eval_runtime": 69.4576, |
|
"eval_samples_per_second": 21.596, |
|
"eval_steps_per_second": 2.707, |
|
"eval_wer": 0.06916218170134147, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 86.52, |
|
"learning_rate": 4.09244060475162e-06, |
|
"loss": 0.0228, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 87.59, |
|
"learning_rate": 3.7684665226781857e-06, |
|
"loss": 0.0236, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 87.59, |
|
"eval_cer": 0.021667298629543363, |
|
"eval_loss": 0.22610026597976685, |
|
"eval_runtime": 69.0446, |
|
"eval_samples_per_second": 21.725, |
|
"eval_steps_per_second": 2.723, |
|
"eval_wer": 0.06892119848983855, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 88.65, |
|
"learning_rate": 3.4444924406047518e-06, |
|
"loss": 0.0244, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 89.72, |
|
"learning_rate": 3.1205183585313174e-06, |
|
"loss": 0.0225, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 89.72, |
|
"eval_cer": 0.021179784410378637, |
|
"eval_loss": 0.22654421627521515, |
|
"eval_runtime": 72.2716, |
|
"eval_samples_per_second": 20.755, |
|
"eval_steps_per_second": 2.601, |
|
"eval_wer": 0.06803759338099445, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 90.79, |
|
"learning_rate": 2.7965442764578835e-06, |
|
"loss": 0.0216, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 91.85, |
|
"learning_rate": 2.472570194384449e-06, |
|
"loss": 0.023, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 91.85, |
|
"eval_cer": 0.021030821732300524, |
|
"eval_loss": 0.22652284801006317, |
|
"eval_runtime": 72.2083, |
|
"eval_samples_per_second": 20.773, |
|
"eval_steps_per_second": 2.604, |
|
"eval_wer": 0.06739497148365331, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 92.92, |
|
"learning_rate": 2.149244060475162e-06, |
|
"loss": 0.0228, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 93.99, |
|
"learning_rate": 1.825917926565875e-06, |
|
"loss": 0.0217, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 93.99, |
|
"eval_cer": 0.020936027300796273, |
|
"eval_loss": 0.22653113305568695, |
|
"eval_runtime": 69.3882, |
|
"eval_samples_per_second": 21.618, |
|
"eval_steps_per_second": 2.709, |
|
"eval_wer": 0.06771628243232389, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 95.05, |
|
"learning_rate": 1.5019438444924408e-06, |
|
"loss": 0.0203, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 96.12, |
|
"learning_rate": 1.1779697624190064e-06, |
|
"loss": 0.022, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 96.12, |
|
"eval_cer": 0.021057905855587453, |
|
"eval_loss": 0.22544603049755096, |
|
"eval_runtime": 72.3315, |
|
"eval_samples_per_second": 20.738, |
|
"eval_steps_per_second": 2.599, |
|
"eval_wer": 0.06851955980400032, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 97.19, |
|
"learning_rate": 8.539956803455724e-07, |
|
"loss": 0.0215, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 98.25, |
|
"learning_rate": 5.300215982721382e-07, |
|
"loss": 0.0219, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 98.25, |
|
"eval_cer": 0.020759980499431233, |
|
"eval_loss": 0.22618666291236877, |
|
"eval_runtime": 73.3776, |
|
"eval_samples_per_second": 20.442, |
|
"eval_steps_per_second": 2.562, |
|
"eval_wer": 0.06715398827215037, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 99.32, |
|
"learning_rate": 2.0604751619870412e-07, |
|
"loss": 0.0204, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 99.96, |
|
"step": 46800, |
|
"total_flos": 2.6878408769720543e+20, |
|
"train_loss": 0.005932311532843826, |
|
"train_runtime": 23991.1474, |
|
"train_samples_per_second": 62.523, |
|
"train_steps_per_second": 1.951 |
|
} |
|
], |
|
"max_steps": 46800, |
|
"num_train_epochs": 100, |
|
"total_flos": 2.6878408769720543e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|