|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"global_step": 9400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.5e-05, |
|
"loss": 15.9584, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 14.658380508422852, |
|
"eval_runtime": 249.5052, |
|
"eval_samples_per_second": 8.208, |
|
"eval_wer": 1.0, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 5e-05, |
|
"loss": 6.3907, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 4.082584857940674, |
|
"eval_runtime": 247.9481, |
|
"eval_samples_per_second": 8.26, |
|
"eval_wer": 1.0, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 3.4412, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 3.1572635173797607, |
|
"eval_runtime": 251.7843, |
|
"eval_samples_per_second": 8.134, |
|
"eval_wer": 1.0, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0001, |
|
"loss": 3.005, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 3.0201895236968994, |
|
"eval_runtime": 248.8764, |
|
"eval_samples_per_second": 8.229, |
|
"eval_wer": 1.0, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.000125, |
|
"loss": 2.9507, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 3.0097341537475586, |
|
"eval_runtime": 250.2451, |
|
"eval_samples_per_second": 8.184, |
|
"eval_wer": 1.0, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 2.9285, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_loss": 3.0145537853240967, |
|
"eval_runtime": 249.391, |
|
"eval_samples_per_second": 8.212, |
|
"eval_wer": 1.0, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.000175, |
|
"loss": 2.9081, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 2.889194965362549, |
|
"eval_runtime": 252.4706, |
|
"eval_samples_per_second": 8.112, |
|
"eval_wer": 1.0, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0002, |
|
"loss": 2.8141, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 3.1128151416778564, |
|
"eval_runtime": 249.3837, |
|
"eval_samples_per_second": 8.212, |
|
"eval_wer": 0.9997997196074504, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.00019855985598559857, |
|
"loss": 1.9764, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_loss": 1.4347355365753174, |
|
"eval_runtime": 253.6868, |
|
"eval_samples_per_second": 8.073, |
|
"eval_wer": 0.9202884037652714, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.0001971197119711971, |
|
"loss": 1.171, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_loss": 1.1342754364013672, |
|
"eval_runtime": 252.7977, |
|
"eval_samples_per_second": 8.101, |
|
"eval_wer": 0.8585352827291541, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.0001956795679567957, |
|
"loss": 0.9986, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_loss": 1.0098124742507935, |
|
"eval_runtime": 254.0896, |
|
"eval_samples_per_second": 8.06, |
|
"eval_wer": 0.8405768075305428, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00019423942394239426, |
|
"loss": 0.8642, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_loss": 0.9640593528747559, |
|
"eval_runtime": 254.2048, |
|
"eval_samples_per_second": 8.056, |
|
"eval_wer": 0.8503905467654717, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.0001927992799279928, |
|
"loss": 0.7951, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_loss": 0.8799329996109009, |
|
"eval_runtime": 255.0842, |
|
"eval_samples_per_second": 8.029, |
|
"eval_wer": 0.787903064290006, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00019135913591359136, |
|
"loss": 0.7317, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"eval_loss": 0.8695712089538574, |
|
"eval_runtime": 253.1871, |
|
"eval_samples_per_second": 8.089, |
|
"eval_wer": 0.7686093864743975, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.00018991899189918995, |
|
"loss": 0.6835, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_loss": 0.8140479326248169, |
|
"eval_runtime": 253.7263, |
|
"eval_samples_per_second": 8.072, |
|
"eval_wer": 0.7338941184324721, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 0.00018847884788478848, |
|
"loss": 0.6188, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"eval_loss": 0.8309915065765381, |
|
"eval_runtime": 254.0037, |
|
"eval_samples_per_second": 8.063, |
|
"eval_wer": 0.7473129047332933, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 0.00018703870387038705, |
|
"loss": 0.6116, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"eval_loss": 0.8404783010482788, |
|
"eval_runtime": 255.6119, |
|
"eval_samples_per_second": 8.012, |
|
"eval_wer": 0.7376326857600641, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 0.0001855985598559856, |
|
"loss": 0.5654, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"eval_loss": 0.7973554134368896, |
|
"eval_runtime": 253.93, |
|
"eval_samples_per_second": 8.065, |
|
"eval_wer": 0.7304893517591294, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 0.00018415841584158417, |
|
"loss": 0.5697, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"eval_loss": 0.7916511297225952, |
|
"eval_runtime": 255.1499, |
|
"eval_samples_per_second": 8.027, |
|
"eval_wer": 0.7420388543961546, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 0.00018271827182718273, |
|
"loss": 0.5621, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"eval_loss": 0.773869514465332, |
|
"eval_runtime": 253.9689, |
|
"eval_samples_per_second": 8.064, |
|
"eval_wer": 0.7266840243006877, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 0.0001812781278127813, |
|
"loss": 0.5646, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"eval_loss": 0.7937338352203369, |
|
"eval_runtime": 254.2977, |
|
"eval_samples_per_second": 8.054, |
|
"eval_wer": 0.70698978569998, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.00017983798379837983, |
|
"loss": 0.5266, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.738484263420105, |
|
"eval_runtime": 251.7487, |
|
"eval_samples_per_second": 8.135, |
|
"eval_wer": 0.6933707190066093, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 0.00017839783978397842, |
|
"loss": 0.4719, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"eval_loss": 0.7587714195251465, |
|
"eval_runtime": 253.339, |
|
"eval_samples_per_second": 8.084, |
|
"eval_wer": 0.6889645503705187, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 0.00017695769576957698, |
|
"loss": 0.4479, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"eval_loss": 0.7555954456329346, |
|
"eval_runtime": 252.6303, |
|
"eval_samples_per_second": 8.107, |
|
"eval_wer": 0.6807530542759864, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 0.00017551755175517552, |
|
"loss": 0.4614, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"eval_loss": 0.7171066999435425, |
|
"eval_runtime": 252.5327, |
|
"eval_samples_per_second": 8.11, |
|
"eval_wer": 0.6792843313972895, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 0.00017407740774077408, |
|
"loss": 0.4491, |
|
"step": 1664 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"eval_loss": 0.7290804386138916, |
|
"eval_runtime": 251.92, |
|
"eval_samples_per_second": 8.13, |
|
"eval_wer": 0.6693370719006609, |
|
"step": 1664 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 0.00017263726372637264, |
|
"loss": 0.4355, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"eval_loss": 0.7258065938949585, |
|
"eval_runtime": 251.8895, |
|
"eval_samples_per_second": 8.131, |
|
"eval_wer": 0.6802857333600374, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 0.0001711971197119712, |
|
"loss": 0.4217, |
|
"step": 1792 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"eval_loss": 0.7212890982627869, |
|
"eval_runtime": 251.4573, |
|
"eval_samples_per_second": 8.145, |
|
"eval_wer": 0.674878162761199, |
|
"step": 1792 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 0.00016975697569756977, |
|
"loss": 0.4466, |
|
"step": 1856 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"eval_loss": 0.7866759300231934, |
|
"eval_runtime": 252.4271, |
|
"eval_samples_per_second": 8.113, |
|
"eval_wer": 0.7039855798117365, |
|
"step": 1856 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 0.00016831683168316833, |
|
"loss": 0.4019, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"eval_loss": 0.724478006362915, |
|
"eval_runtime": 252.8043, |
|
"eval_samples_per_second": 8.101, |
|
"eval_wer": 0.6720074771346551, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 0.0001668766876687669, |
|
"loss": 0.3812, |
|
"step": 1984 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"eval_loss": 0.7306143045425415, |
|
"eval_runtime": 252.5346, |
|
"eval_samples_per_second": 8.11, |
|
"eval_wer": 0.671206355564457, |
|
"step": 1984 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 0.00016543654365436546, |
|
"loss": 0.3662, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"eval_loss": 0.7334802150726318, |
|
"eval_runtime": 251.9822, |
|
"eval_samples_per_second": 8.128, |
|
"eval_wer": 0.6639295013018226, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 0.000163996399639964, |
|
"loss": 0.3684, |
|
"step": 2112 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"eval_loss": 0.7250552177429199, |
|
"eval_runtime": 251.8402, |
|
"eval_samples_per_second": 8.132, |
|
"eval_wer": 0.6481073502904066, |
|
"step": 2112 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 0.00016255625562556255, |
|
"loss": 0.3809, |
|
"step": 2176 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"eval_loss": 0.7295255064964294, |
|
"eval_runtime": 251.8173, |
|
"eval_samples_per_second": 8.133, |
|
"eval_wer": 0.666533146404967, |
|
"step": 2176 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 0.00016111611161116112, |
|
"loss": 0.3618, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"eval_loss": 0.725806474685669, |
|
"eval_runtime": 251.7567, |
|
"eval_samples_per_second": 8.135, |
|
"eval_wer": 0.6467721476734094, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 0.00015967596759675968, |
|
"loss": 0.3896, |
|
"step": 2304 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"eval_loss": 0.6914154291152954, |
|
"eval_runtime": 253.233, |
|
"eval_samples_per_second": 8.087, |
|
"eval_wer": 0.6378262901395286, |
|
"step": 2304 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 0.00015823582358235824, |
|
"loss": 0.3381, |
|
"step": 2368 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"eval_loss": 0.7252967357635498, |
|
"eval_runtime": 251.9208, |
|
"eval_samples_per_second": 8.13, |
|
"eval_wer": 0.6556512450764403, |
|
"step": 2368 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 0.0001567956795679568, |
|
"loss": 0.3385, |
|
"step": 2432 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"eval_loss": 0.739501953125, |
|
"eval_runtime": 251.6871, |
|
"eval_samples_per_second": 8.137, |
|
"eval_wer": 0.6486414313372054, |
|
"step": 2432 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 0.00015535553555355537, |
|
"loss": 0.2991, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"eval_loss": 0.718348503112793, |
|
"eval_runtime": 252.6947, |
|
"eval_samples_per_second": 8.105, |
|
"eval_wer": 0.6476400293744575, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 0.00015391539153915393, |
|
"loss": 0.3219, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"eval_loss": 0.6939400434494019, |
|
"eval_runtime": 252.3364, |
|
"eval_samples_per_second": 8.116, |
|
"eval_wer": 0.6454369450564124, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 0.0001524752475247525, |
|
"loss": 0.3212, |
|
"step": 2624 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"eval_loss": 0.723796010017395, |
|
"eval_runtime": 252.2722, |
|
"eval_samples_per_second": 8.118, |
|
"eval_wer": 0.6450363842713132, |
|
"step": 2624 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 0.00015103510351035103, |
|
"loss": 0.3152, |
|
"step": 2688 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"eval_loss": 0.7430602312088013, |
|
"eval_runtime": 252.488, |
|
"eval_samples_per_second": 8.111, |
|
"eval_wer": 0.6561853261232392, |
|
"step": 2688 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 0.0001495949594959496, |
|
"loss": 0.3359, |
|
"step": 2752 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"eval_loss": 0.7211728096008301, |
|
"eval_runtime": 252.04, |
|
"eval_samples_per_second": 8.126, |
|
"eval_wer": 0.6389612123639762, |
|
"step": 2752 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 0.00014815481548154818, |
|
"loss": 0.3171, |
|
"step": 2816 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"eval_loss": 0.707861602306366, |
|
"eval_runtime": 253.0372, |
|
"eval_samples_per_second": 8.094, |
|
"eval_wer": 0.6495093130382535, |
|
"step": 2816 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 0.00014671467146714671, |
|
"loss": 0.2806, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"eval_loss": 0.6907961368560791, |
|
"eval_runtime": 254.422, |
|
"eval_samples_per_second": 8.05, |
|
"eval_wer": 0.6292142332598972, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 0.00014527452745274528, |
|
"loss": 0.2765, |
|
"step": 2944 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"eval_loss": 0.7019878029823303, |
|
"eval_runtime": 252.9804, |
|
"eval_samples_per_second": 8.095, |
|
"eval_wer": 0.6294145136524467, |
|
"step": 2944 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 0.00014383438343834384, |
|
"loss": 0.2854, |
|
"step": 3008 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"eval_loss": 0.811292290687561, |
|
"eval_runtime": 253.017, |
|
"eval_samples_per_second": 8.094, |
|
"eval_wer": 0.6774818078643434, |
|
"step": 3008 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 0.0001423942394239424, |
|
"loss": 0.2805, |
|
"step": 3072 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"eval_loss": 0.7887662649154663, |
|
"eval_runtime": 251.6217, |
|
"eval_samples_per_second": 8.139, |
|
"eval_wer": 0.6378930502703786, |
|
"step": 3072 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 0.00014095409540954096, |
|
"loss": 0.2861, |
|
"step": 3136 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"eval_loss": 0.7010805606842041, |
|
"eval_runtime": 252.2814, |
|
"eval_samples_per_second": 8.118, |
|
"eval_wer": 0.635356165298084, |
|
"step": 3136 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 0.00013951395139513953, |
|
"loss": 0.2903, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"eval_loss": 0.7183523774147034, |
|
"eval_runtime": 253.427, |
|
"eval_samples_per_second": 8.081, |
|
"eval_wer": 0.6266105881567527, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 0.00013807380738073806, |
|
"loss": 0.2798, |
|
"step": 3264 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"eval_loss": 0.7161312699317932, |
|
"eval_runtime": 253.1259, |
|
"eval_samples_per_second": 8.091, |
|
"eval_wer": 0.6217370986047133, |
|
"step": 3264 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 0.00013663366336633665, |
|
"loss": 0.2528, |
|
"step": 3328 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"eval_loss": 0.7560293674468994, |
|
"eval_runtime": 252.846, |
|
"eval_samples_per_second": 8.1, |
|
"eval_wer": 0.6506442352627011, |
|
"step": 3328 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"learning_rate": 0.00013519351935193521, |
|
"loss": 0.2549, |
|
"step": 3392 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"eval_loss": 0.7602236270904541, |
|
"eval_runtime": 253.2296, |
|
"eval_samples_per_second": 8.088, |
|
"eval_wer": 0.6505774751318513, |
|
"step": 3392 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 0.00013375337533753375, |
|
"loss": 0.2431, |
|
"step": 3456 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"eval_loss": 0.6875869035720825, |
|
"eval_runtime": 252.6805, |
|
"eval_samples_per_second": 8.105, |
|
"eval_wer": 0.61859937245477, |
|
"step": 3456 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 0.0001323132313231323, |
|
"loss": 0.2506, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"eval_loss": 0.7143360376358032, |
|
"eval_runtime": 253.2779, |
|
"eval_samples_per_second": 8.086, |
|
"eval_wer": 0.6345550437278857, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 0.00013087308730873087, |
|
"loss": 0.2497, |
|
"step": 3584 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"eval_loss": 0.7228794693946838, |
|
"eval_runtime": 252.2511, |
|
"eval_samples_per_second": 8.119, |
|
"eval_wer": 0.6364910875225316, |
|
"step": 3584 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"learning_rate": 0.00012943294329432944, |
|
"loss": 0.2492, |
|
"step": 3648 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"eval_loss": 0.7050414085388184, |
|
"eval_runtime": 253.5665, |
|
"eval_samples_per_second": 8.077, |
|
"eval_wer": 0.6172641698377729, |
|
"step": 3648 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 0.000127992799279928, |
|
"loss": 0.2444, |
|
"step": 3712 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"eval_loss": 0.7215788960456848, |
|
"eval_runtime": 252.8539, |
|
"eval_samples_per_second": 8.1, |
|
"eval_wer": 0.6314173175779425, |
|
"step": 3712 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 0.00012655265526552656, |
|
"loss": 0.2438, |
|
"step": 3776 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"eval_loss": 0.7077196836471558, |
|
"eval_runtime": 253.6898, |
|
"eval_samples_per_second": 8.073, |
|
"eval_wer": 0.6182655718005208, |
|
"step": 3776 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 0.0001251125112511251, |
|
"loss": 0.2083, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"eval_loss": 0.7524451613426208, |
|
"eval_runtime": 255.0333, |
|
"eval_samples_per_second": 8.03, |
|
"eval_wer": 0.6258762267174044, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"learning_rate": 0.0001236723672367237, |
|
"loss": 0.204, |
|
"step": 3904 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"eval_loss": 0.7626018524169922, |
|
"eval_runtime": 253.5087, |
|
"eval_samples_per_second": 8.079, |
|
"eval_wer": 0.6330863208491888, |
|
"step": 3904 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"learning_rate": 0.00012223222322232225, |
|
"loss": 0.2353, |
|
"step": 3968 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"eval_loss": 0.7177530527114868, |
|
"eval_runtime": 253.4757, |
|
"eval_samples_per_second": 8.08, |
|
"eval_wer": 0.6127244809399827, |
|
"step": 3968 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"learning_rate": 0.0001207920792079208, |
|
"loss": 0.2208, |
|
"step": 4032 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"eval_loss": 0.7555935382843018, |
|
"eval_runtime": 253.7282, |
|
"eval_samples_per_second": 8.072, |
|
"eval_wer": 0.6294812737832967, |
|
"step": 4032 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"learning_rate": 0.00011935193519351935, |
|
"loss": 0.2266, |
|
"step": 4096 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"eval_loss": 0.7341886162757874, |
|
"eval_runtime": 253.8255, |
|
"eval_samples_per_second": 8.069, |
|
"eval_wer": 0.6294812737832967, |
|
"step": 4096 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 0.00011791179117911792, |
|
"loss": 0.2188, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"eval_loss": 0.7594360709190369, |
|
"eval_runtime": 253.012, |
|
"eval_samples_per_second": 8.094, |
|
"eval_wer": 0.6449028640096135, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"learning_rate": 0.00011647164716471647, |
|
"loss": 0.231, |
|
"step": 4224 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_loss": 0.6901054382324219, |
|
"eval_runtime": 253.458, |
|
"eval_samples_per_second": 8.08, |
|
"eval_wer": 0.6077842312570932, |
|
"step": 4224 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 0.00011503150315031504, |
|
"loss": 0.2063, |
|
"step": 4288 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"eval_loss": 0.7627975344657898, |
|
"eval_runtime": 253.6008, |
|
"eval_samples_per_second": 8.076, |
|
"eval_wer": 0.6116563188463849, |
|
"step": 4288 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 0.00011359135913591358, |
|
"loss": 0.1931, |
|
"step": 4352 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"eval_loss": 0.7425730228424072, |
|
"eval_runtime": 254.3179, |
|
"eval_samples_per_second": 8.053, |
|
"eval_wer": 0.6149943253888778, |
|
"step": 4352 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 0.00011215121512151216, |
|
"loss": 0.2189, |
|
"step": 4416 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"eval_loss": 0.7322723269462585, |
|
"eval_runtime": 254.9695, |
|
"eval_samples_per_second": 8.032, |
|
"eval_wer": 0.6066493090326457, |
|
"step": 4416 |
|
}, |
|
{ |
|
"epoch": 9.53, |
|
"learning_rate": 0.00011071107110711072, |
|
"loss": 0.1931, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 9.53, |
|
"eval_loss": 0.7253696918487549, |
|
"eval_runtime": 253.0608, |
|
"eval_samples_per_second": 8.093, |
|
"eval_wer": 0.613525602510181, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 0.00010927092709270927, |
|
"loss": 0.2017, |
|
"step": 4544 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"eval_loss": 0.6984794735908508, |
|
"eval_runtime": 253.842, |
|
"eval_samples_per_second": 8.068, |
|
"eval_wer": 0.6072501502102944, |
|
"step": 4544 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 0.00010783078307830783, |
|
"loss": 0.2169, |
|
"step": 4608 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"eval_loss": 0.698683500289917, |
|
"eval_runtime": 253.2368, |
|
"eval_samples_per_second": 8.087, |
|
"eval_wer": 0.6078509913879431, |
|
"step": 4608 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"learning_rate": 0.00010639063906390641, |
|
"loss": 0.1956, |
|
"step": 4672 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"eval_loss": 0.7127052545547485, |
|
"eval_runtime": 254.4368, |
|
"eval_samples_per_second": 8.049, |
|
"eval_wer": 0.6073836704719942, |
|
"step": 4672 |
|
}, |
|
{ |
|
"epoch": 10.08, |
|
"learning_rate": 0.00010495049504950496, |
|
"loss": 0.1972, |
|
"step": 4736 |
|
}, |
|
{ |
|
"epoch": 10.08, |
|
"eval_loss": 0.7555835247039795, |
|
"eval_runtime": 253.4953, |
|
"eval_samples_per_second": 8.079, |
|
"eval_wer": 0.6094532345283397, |
|
"step": 4736 |
|
}, |
|
{ |
|
"epoch": 10.21, |
|
"learning_rate": 0.00010351035103510351, |
|
"loss": 0.1827, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 10.21, |
|
"eval_loss": 0.7492347359657288, |
|
"eval_runtime": 255.7545, |
|
"eval_samples_per_second": 8.008, |
|
"eval_wer": 0.6165298083984244, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 10.35, |
|
"learning_rate": 0.00010207020702070207, |
|
"loss": 0.1798, |
|
"step": 4864 |
|
}, |
|
{ |
|
"epoch": 10.35, |
|
"eval_loss": 0.7403519749641418, |
|
"eval_runtime": 254.7315, |
|
"eval_samples_per_second": 8.04, |
|
"eval_wer": 0.6058481874624474, |
|
"step": 4864 |
|
}, |
|
{ |
|
"epoch": 10.49, |
|
"learning_rate": 0.00010063006300630065, |
|
"loss": 0.1839, |
|
"step": 4928 |
|
}, |
|
{ |
|
"epoch": 10.49, |
|
"eval_loss": 0.7588409185409546, |
|
"eval_runtime": 255.5749, |
|
"eval_samples_per_second": 8.013, |
|
"eval_wer": 0.60931971426664, |
|
"step": 4928 |
|
}, |
|
{ |
|
"epoch": 10.62, |
|
"learning_rate": 9.91899189918992e-05, |
|
"loss": 0.1823, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 10.62, |
|
"eval_loss": 0.7923687696456909, |
|
"eval_runtime": 254.6347, |
|
"eval_samples_per_second": 8.043, |
|
"eval_wer": 0.621403297950464, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 10.76, |
|
"learning_rate": 9.774977497749776e-05, |
|
"loss": 0.1749, |
|
"step": 5056 |
|
}, |
|
{ |
|
"epoch": 10.76, |
|
"eval_loss": 0.7493650913238525, |
|
"eval_runtime": 252.6472, |
|
"eval_samples_per_second": 8.106, |
|
"eval_wer": 0.6101875959676881, |
|
"step": 5056 |
|
}, |
|
{ |
|
"epoch": 10.89, |
|
"learning_rate": 9.630963096309631e-05, |
|
"loss": 0.1876, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 10.89, |
|
"eval_loss": 0.7571142911911011, |
|
"eval_runtime": 253.4609, |
|
"eval_samples_per_second": 8.08, |
|
"eval_wer": 0.6097870351825889, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 11.03, |
|
"learning_rate": 9.486948694869487e-05, |
|
"loss": 0.2027, |
|
"step": 5184 |
|
}, |
|
{ |
|
"epoch": 11.03, |
|
"eval_loss": 0.7235008478164673, |
|
"eval_runtime": 252.9146, |
|
"eval_samples_per_second": 8.098, |
|
"eval_wer": 0.6036451031444021, |
|
"step": 5184 |
|
}, |
|
{ |
|
"epoch": 11.17, |
|
"learning_rate": 9.342934293429343e-05, |
|
"loss": 0.1699, |
|
"step": 5248 |
|
}, |
|
{ |
|
"epoch": 11.17, |
|
"eval_loss": 0.7639468908309937, |
|
"eval_runtime": 253.3789, |
|
"eval_samples_per_second": 8.083, |
|
"eval_wer": 0.6093864743974898, |
|
"step": 5248 |
|
}, |
|
{ |
|
"epoch": 11.3, |
|
"learning_rate": 9.1989198919892e-05, |
|
"loss": 0.1556, |
|
"step": 5312 |
|
}, |
|
{ |
|
"epoch": 11.3, |
|
"eval_loss": 0.7758333683013916, |
|
"eval_runtime": 255.9522, |
|
"eval_samples_per_second": 8.001, |
|
"eval_wer": 0.6128580012016823, |
|
"step": 5312 |
|
}, |
|
{ |
|
"epoch": 11.44, |
|
"learning_rate": 9.054905490549054e-05, |
|
"loss": 0.1679, |
|
"step": 5376 |
|
}, |
|
{ |
|
"epoch": 11.44, |
|
"eval_loss": 0.7407320737838745, |
|
"eval_runtime": 253.1505, |
|
"eval_samples_per_second": 8.09, |
|
"eval_wer": 0.5980372521530142, |
|
"step": 5376 |
|
}, |
|
{ |
|
"epoch": 11.57, |
|
"learning_rate": 8.910891089108912e-05, |
|
"loss": 0.1681, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 11.57, |
|
"eval_loss": 0.7515969276428223, |
|
"eval_runtime": 254.0821, |
|
"eval_samples_per_second": 8.06, |
|
"eval_wer": 0.6076507109953936, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 11.71, |
|
"learning_rate": 8.766876687668767e-05, |
|
"loss": 0.1814, |
|
"step": 5504 |
|
}, |
|
{ |
|
"epoch": 11.71, |
|
"eval_loss": 0.7393285036087036, |
|
"eval_runtime": 255.9386, |
|
"eval_samples_per_second": 8.002, |
|
"eval_wer": 0.6063822685092463, |
|
"step": 5504 |
|
}, |
|
{ |
|
"epoch": 11.85, |
|
"learning_rate": 8.622862286228623e-05, |
|
"loss": 0.162, |
|
"step": 5568 |
|
}, |
|
{ |
|
"epoch": 11.85, |
|
"eval_loss": 0.7688995003700256, |
|
"eval_runtime": 255.0601, |
|
"eval_samples_per_second": 8.029, |
|
"eval_wer": 0.6063155083783964, |
|
"step": 5568 |
|
}, |
|
{ |
|
"epoch": 11.98, |
|
"learning_rate": 8.47884788478848e-05, |
|
"loss": 0.1835, |
|
"step": 5632 |
|
}, |
|
{ |
|
"epoch": 11.98, |
|
"eval_loss": 0.7229721546173096, |
|
"eval_runtime": 256.1311, |
|
"eval_samples_per_second": 7.996, |
|
"eval_wer": 0.6020428600040056, |
|
"step": 5632 |
|
}, |
|
{ |
|
"epoch": 12.12, |
|
"learning_rate": 8.334833483348336e-05, |
|
"loss": 0.1491, |
|
"step": 5696 |
|
}, |
|
{ |
|
"epoch": 12.12, |
|
"eval_loss": 0.7780319452285767, |
|
"eval_runtime": 253.9764, |
|
"eval_samples_per_second": 8.064, |
|
"eval_wer": 0.6079845116496428, |
|
"step": 5696 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"learning_rate": 8.19081908190819e-05, |
|
"loss": 0.163, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"eval_loss": 0.7481300830841064, |
|
"eval_runtime": 255.5174, |
|
"eval_samples_per_second": 8.015, |
|
"eval_wer": 0.6035115828827025, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 12.39, |
|
"learning_rate": 8.046804680468047e-05, |
|
"loss": 0.1564, |
|
"step": 5824 |
|
}, |
|
{ |
|
"epoch": 12.39, |
|
"eval_loss": 0.7477182149887085, |
|
"eval_runtime": 253.9361, |
|
"eval_samples_per_second": 8.065, |
|
"eval_wer": 0.6073169103411443, |
|
"step": 5824 |
|
}, |
|
{ |
|
"epoch": 12.53, |
|
"learning_rate": 7.902790279027903e-05, |
|
"loss": 0.1651, |
|
"step": 5888 |
|
}, |
|
{ |
|
"epoch": 12.53, |
|
"eval_loss": 0.7700252532958984, |
|
"eval_runtime": 255.811, |
|
"eval_samples_per_second": 8.006, |
|
"eval_wer": 0.6119901195006342, |
|
"step": 5888 |
|
}, |
|
{ |
|
"epoch": 12.66, |
|
"learning_rate": 7.75877587758776e-05, |
|
"loss": 0.1406, |
|
"step": 5952 |
|
}, |
|
{ |
|
"epoch": 12.66, |
|
"eval_loss": 0.7303926348686218, |
|
"eval_runtime": 255.3564, |
|
"eval_samples_per_second": 8.02, |
|
"eval_wer": 0.6010414580412577, |
|
"step": 5952 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 7.614761476147616e-05, |
|
"loss": 0.1454, |
|
"step": 6016 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"eval_loss": 0.7804365158081055, |
|
"eval_runtime": 258.3951, |
|
"eval_samples_per_second": 7.926, |
|
"eval_wer": 0.6108551972761866, |
|
"step": 6016 |
|
}, |
|
{ |
|
"epoch": 12.94, |
|
"learning_rate": 7.470747074707472e-05, |
|
"loss": 0.1405, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 12.94, |
|
"eval_loss": 0.7660069465637207, |
|
"eval_runtime": 258.3136, |
|
"eval_samples_per_second": 7.928, |
|
"eval_wer": 0.5965017691434675, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 13.07, |
|
"learning_rate": 7.326732673267327e-05, |
|
"loss": 0.142, |
|
"step": 6144 |
|
}, |
|
{ |
|
"epoch": 13.07, |
|
"eval_loss": 0.7802821397781372, |
|
"eval_runtime": 255.9638, |
|
"eval_samples_per_second": 8.001, |
|
"eval_wer": 0.6066493090326457, |
|
"step": 6144 |
|
}, |
|
{ |
|
"epoch": 13.21, |
|
"learning_rate": 7.182718271827183e-05, |
|
"loss": 0.1423, |
|
"step": 6208 |
|
}, |
|
{ |
|
"epoch": 13.21, |
|
"eval_loss": 0.7940844297409058, |
|
"eval_runtime": 257.753, |
|
"eval_samples_per_second": 7.946, |
|
"eval_wer": 0.6061152279858468, |
|
"step": 6208 |
|
}, |
|
{ |
|
"epoch": 13.34, |
|
"learning_rate": 7.038703870387039e-05, |
|
"loss": 0.1494, |
|
"step": 6272 |
|
}, |
|
{ |
|
"epoch": 13.34, |
|
"eval_loss": 0.7598519325256348, |
|
"eval_runtime": 257.9759, |
|
"eval_samples_per_second": 7.939, |
|
"eval_wer": 0.6009079377795581, |
|
"step": 6272 |
|
}, |
|
{ |
|
"epoch": 13.48, |
|
"learning_rate": 6.894689468946895e-05, |
|
"loss": 0.1459, |
|
"step": 6336 |
|
}, |
|
{ |
|
"epoch": 13.48, |
|
"eval_loss": 0.7658703327178955, |
|
"eval_runtime": 257.4947, |
|
"eval_samples_per_second": 7.954, |
|
"eval_wer": 0.6087188730889913, |
|
"step": 6336 |
|
}, |
|
{ |
|
"epoch": 13.62, |
|
"learning_rate": 6.75067506750675e-05, |
|
"loss": 0.1316, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 13.62, |
|
"eval_loss": 0.7935535311698914, |
|
"eval_runtime": 256.5247, |
|
"eval_samples_per_second": 7.984, |
|
"eval_wer": 0.6047132652379998, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 13.75, |
|
"learning_rate": 6.606660666066608e-05, |
|
"loss": 0.1428, |
|
"step": 6464 |
|
}, |
|
{ |
|
"epoch": 13.75, |
|
"eval_loss": 0.782112181186676, |
|
"eval_runtime": 259.1672, |
|
"eval_samples_per_second": 7.902, |
|
"eval_wer": 0.5998397756859604, |
|
"step": 6464 |
|
}, |
|
{ |
|
"epoch": 13.89, |
|
"learning_rate": 6.462646264626463e-05, |
|
"loss": 0.1391, |
|
"step": 6528 |
|
}, |
|
{ |
|
"epoch": 13.89, |
|
"eval_loss": 0.7535277605056763, |
|
"eval_runtime": 257.3279, |
|
"eval_samples_per_second": 7.959, |
|
"eval_wer": 0.5976366913679151, |
|
"step": 6528 |
|
}, |
|
{ |
|
"epoch": 14.03, |
|
"learning_rate": 6.318631863186318e-05, |
|
"loss": 0.138, |
|
"step": 6592 |
|
}, |
|
{ |
|
"epoch": 14.03, |
|
"eval_loss": 0.7846142649650574, |
|
"eval_runtime": 257.0571, |
|
"eval_samples_per_second": 7.967, |
|
"eval_wer": 0.5956338874424194, |
|
"step": 6592 |
|
}, |
|
{ |
|
"epoch": 14.16, |
|
"learning_rate": 6.174617461746175e-05, |
|
"loss": 0.1299, |
|
"step": 6656 |
|
}, |
|
{ |
|
"epoch": 14.16, |
|
"eval_loss": 0.7645628452301025, |
|
"eval_runtime": 259.0072, |
|
"eval_samples_per_second": 7.907, |
|
"eval_wer": 0.5987716135923626, |
|
"step": 6656 |
|
}, |
|
{ |
|
"epoch": 14.3, |
|
"learning_rate": 6.03060306030603e-05, |
|
"loss": 0.1287, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 14.3, |
|
"eval_loss": 0.8099244832992554, |
|
"eval_runtime": 258.7408, |
|
"eval_samples_per_second": 7.915, |
|
"eval_wer": 0.612056879631484, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 14.43, |
|
"learning_rate": 5.886588658865887e-05, |
|
"loss": 0.1288, |
|
"step": 6784 |
|
}, |
|
{ |
|
"epoch": 14.43, |
|
"eval_loss": 0.8042709231376648, |
|
"eval_runtime": 258.8574, |
|
"eval_samples_per_second": 7.912, |
|
"eval_wer": 0.6015755390880566, |
|
"step": 6784 |
|
}, |
|
{ |
|
"epoch": 14.57, |
|
"learning_rate": 5.742574257425742e-05, |
|
"loss": 0.131, |
|
"step": 6848 |
|
}, |
|
{ |
|
"epoch": 14.57, |
|
"eval_loss": 0.7604876756668091, |
|
"eval_runtime": 256.5152, |
|
"eval_samples_per_second": 7.984, |
|
"eval_wer": 0.5951665665264704, |
|
"step": 6848 |
|
}, |
|
{ |
|
"epoch": 14.71, |
|
"learning_rate": 5.598559855985599e-05, |
|
"loss": 0.1353, |
|
"step": 6912 |
|
}, |
|
{ |
|
"epoch": 14.71, |
|
"eval_loss": 0.784662127494812, |
|
"eval_runtime": 256.9735, |
|
"eval_samples_per_second": 7.97, |
|
"eval_wer": 0.59997329594766, |
|
"step": 6912 |
|
}, |
|
{ |
|
"epoch": 14.84, |
|
"learning_rate": 5.4545454545454546e-05, |
|
"loss": 0.133, |
|
"step": 6976 |
|
}, |
|
{ |
|
"epoch": 14.84, |
|
"eval_loss": 0.8255736827850342, |
|
"eval_runtime": 256.2102, |
|
"eval_samples_per_second": 7.993, |
|
"eval_wer": 0.6010414580412577, |
|
"step": 6976 |
|
}, |
|
{ |
|
"epoch": 14.98, |
|
"learning_rate": 5.310531053105311e-05, |
|
"loss": 0.1351, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 14.98, |
|
"eval_loss": 0.7962599992752075, |
|
"eval_runtime": 256.4813, |
|
"eval_samples_per_second": 7.985, |
|
"eval_wer": 0.5983710528072635, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 15.11, |
|
"learning_rate": 5.1665166516651664e-05, |
|
"loss": 0.1156, |
|
"step": 7104 |
|
}, |
|
{ |
|
"epoch": 15.11, |
|
"eval_loss": 0.8220678567886353, |
|
"eval_runtime": 255.8959, |
|
"eval_samples_per_second": 8.003, |
|
"eval_wer": 0.6015087789572068, |
|
"step": 7104 |
|
}, |
|
{ |
|
"epoch": 15.25, |
|
"learning_rate": 5.022502250225023e-05, |
|
"loss": 0.1171, |
|
"step": 7168 |
|
}, |
|
{ |
|
"epoch": 15.25, |
|
"eval_loss": 0.7746037840843201, |
|
"eval_runtime": 255.6563, |
|
"eval_samples_per_second": 8.011, |
|
"eval_wer": 0.592562921423326, |
|
"step": 7168 |
|
}, |
|
{ |
|
"epoch": 15.39, |
|
"learning_rate": 4.878487848784879e-05, |
|
"loss": 0.1213, |
|
"step": 7232 |
|
}, |
|
{ |
|
"epoch": 15.39, |
|
"eval_loss": 0.8010141253471375, |
|
"eval_runtime": 256.8217, |
|
"eval_samples_per_second": 7.974, |
|
"eval_wer": 0.6013084985646572, |
|
"step": 7232 |
|
}, |
|
{ |
|
"epoch": 15.52, |
|
"learning_rate": 4.734473447344735e-05, |
|
"loss": 0.1175, |
|
"step": 7296 |
|
}, |
|
{ |
|
"epoch": 15.52, |
|
"eval_loss": 0.8121392726898193, |
|
"eval_runtime": 255.5954, |
|
"eval_samples_per_second": 8.013, |
|
"eval_wer": 0.6077174711262434, |
|
"step": 7296 |
|
}, |
|
{ |
|
"epoch": 15.66, |
|
"learning_rate": 4.59045904590459e-05, |
|
"loss": 0.12, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 15.66, |
|
"eval_loss": 0.7914840579032898, |
|
"eval_runtime": 256.4734, |
|
"eval_samples_per_second": 7.985, |
|
"eval_wer": 0.5952333266573202, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 15.8, |
|
"learning_rate": 4.4464446444644464e-05, |
|
"loss": 0.1147, |
|
"step": 7424 |
|
}, |
|
{ |
|
"epoch": 15.8, |
|
"eval_loss": 0.7968710660934448, |
|
"eval_runtime": 255.805, |
|
"eval_samples_per_second": 8.006, |
|
"eval_wer": 0.5996394952934108, |
|
"step": 7424 |
|
}, |
|
{ |
|
"epoch": 15.93, |
|
"learning_rate": 4.3024302430243026e-05, |
|
"loss": 0.1217, |
|
"step": 7488 |
|
}, |
|
{ |
|
"epoch": 15.93, |
|
"eval_loss": 0.8240338563919067, |
|
"eval_runtime": 254.7962, |
|
"eval_samples_per_second": 8.038, |
|
"eval_wer": 0.6034448227518526, |
|
"step": 7488 |
|
}, |
|
{ |
|
"epoch": 16.07, |
|
"learning_rate": 4.158415841584158e-05, |
|
"loss": 0.1088, |
|
"step": 7552 |
|
}, |
|
{ |
|
"epoch": 16.07, |
|
"eval_loss": 0.8148845434188843, |
|
"eval_runtime": 255.3742, |
|
"eval_samples_per_second": 8.02, |
|
"eval_wer": 0.6006408972561587, |
|
"step": 7552 |
|
}, |
|
{ |
|
"epoch": 16.2, |
|
"learning_rate": 4.0144014401440144e-05, |
|
"loss": 0.1086, |
|
"step": 7616 |
|
}, |
|
{ |
|
"epoch": 16.2, |
|
"eval_loss": 0.8434123992919922, |
|
"eval_runtime": 256.199, |
|
"eval_samples_per_second": 7.994, |
|
"eval_wer": 0.6020428600040056, |
|
"step": 7616 |
|
}, |
|
{ |
|
"epoch": 16.34, |
|
"learning_rate": 3.870387038703871e-05, |
|
"loss": 0.122, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 16.34, |
|
"eval_loss": 0.8277792930603027, |
|
"eval_runtime": 255.246, |
|
"eval_samples_per_second": 8.024, |
|
"eval_wer": 0.5944989652179719, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 16.48, |
|
"learning_rate": 3.726372637263726e-05, |
|
"loss": 0.109, |
|
"step": 7744 |
|
}, |
|
{ |
|
"epoch": 16.48, |
|
"eval_loss": 0.8119443655014038, |
|
"eval_runtime": 255.9109, |
|
"eval_samples_per_second": 8.003, |
|
"eval_wer": 0.5994392149008612, |
|
"step": 7744 |
|
}, |
|
{ |
|
"epoch": 16.61, |
|
"learning_rate": 3.5823582358235825e-05, |
|
"loss": 0.1159, |
|
"step": 7808 |
|
}, |
|
{ |
|
"epoch": 16.61, |
|
"eval_loss": 0.8458046913146973, |
|
"eval_runtime": 255.3949, |
|
"eval_samples_per_second": 8.019, |
|
"eval_wer": 0.6047800253688497, |
|
"step": 7808 |
|
}, |
|
{ |
|
"epoch": 16.75, |
|
"learning_rate": 3.438343834383439e-05, |
|
"loss": 0.0992, |
|
"step": 7872 |
|
}, |
|
{ |
|
"epoch": 16.75, |
|
"eval_loss": 0.8495743870735168, |
|
"eval_runtime": 255.6285, |
|
"eval_samples_per_second": 8.012, |
|
"eval_wer": 0.6048467854996996, |
|
"step": 7872 |
|
}, |
|
{ |
|
"epoch": 16.89, |
|
"learning_rate": 3.294329432943294e-05, |
|
"loss": 0.1078, |
|
"step": 7936 |
|
}, |
|
{ |
|
"epoch": 16.89, |
|
"eval_loss": 0.8393277525901794, |
|
"eval_runtime": 255.6117, |
|
"eval_samples_per_second": 8.012, |
|
"eval_wer": 0.5990386541157621, |
|
"step": 7936 |
|
}, |
|
{ |
|
"epoch": 17.02, |
|
"learning_rate": 3.1503150315031506e-05, |
|
"loss": 0.1096, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 17.02, |
|
"eval_loss": 0.8454439640045166, |
|
"eval_runtime": 255.5979, |
|
"eval_samples_per_second": 8.013, |
|
"eval_wer": 0.5995059750317111, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 17.16, |
|
"learning_rate": 3.0063006300630065e-05, |
|
"loss": 0.0986, |
|
"step": 8064 |
|
}, |
|
{ |
|
"epoch": 17.16, |
|
"eval_loss": 0.8286423683166504, |
|
"eval_runtime": 257.011, |
|
"eval_samples_per_second": 7.969, |
|
"eval_wer": 0.6011082181721076, |
|
"step": 8064 |
|
}, |
|
{ |
|
"epoch": 17.29, |
|
"learning_rate": 2.8622862286228624e-05, |
|
"loss": 0.1154, |
|
"step": 8128 |
|
}, |
|
{ |
|
"epoch": 17.29, |
|
"eval_loss": 0.8368263244628906, |
|
"eval_runtime": 258.2275, |
|
"eval_samples_per_second": 7.931, |
|
"eval_wer": 0.5988383737232125, |
|
"step": 8128 |
|
}, |
|
{ |
|
"epoch": 17.43, |
|
"learning_rate": 2.7182718271827183e-05, |
|
"loss": 0.0927, |
|
"step": 8192 |
|
}, |
|
{ |
|
"epoch": 17.43, |
|
"eval_loss": 0.861258864402771, |
|
"eval_runtime": 257.1554, |
|
"eval_samples_per_second": 7.964, |
|
"eval_wer": 0.6089191534815408, |
|
"step": 8192 |
|
}, |
|
{ |
|
"epoch": 17.57, |
|
"learning_rate": 2.5742574257425746e-05, |
|
"loss": 0.1056, |
|
"step": 8256 |
|
}, |
|
{ |
|
"epoch": 17.57, |
|
"eval_loss": 0.8197841644287109, |
|
"eval_runtime": 256.4042, |
|
"eval_samples_per_second": 7.987, |
|
"eval_wer": 0.592562921423326, |
|
"step": 8256 |
|
}, |
|
{ |
|
"epoch": 17.7, |
|
"learning_rate": 2.4302430243024305e-05, |
|
"loss": 0.1112, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 17.7, |
|
"eval_loss": 0.8646882772445679, |
|
"eval_runtime": 257.1754, |
|
"eval_samples_per_second": 7.963, |
|
"eval_wer": 0.5978369717604647, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 17.84, |
|
"learning_rate": 2.2862286228622864e-05, |
|
"loss": 0.0962, |
|
"step": 8384 |
|
}, |
|
{ |
|
"epoch": 17.84, |
|
"eval_loss": 0.8387396335601807, |
|
"eval_runtime": 255.7406, |
|
"eval_samples_per_second": 8.008, |
|
"eval_wer": 0.5963014887509179, |
|
"step": 8384 |
|
}, |
|
{ |
|
"epoch": 17.97, |
|
"learning_rate": 2.1422142214221423e-05, |
|
"loss": 0.0973, |
|
"step": 8448 |
|
}, |
|
{ |
|
"epoch": 17.97, |
|
"eval_loss": 0.8584703207015991, |
|
"eval_runtime": 256.391, |
|
"eval_samples_per_second": 7.988, |
|
"eval_wer": 0.5963682488817678, |
|
"step": 8448 |
|
}, |
|
{ |
|
"epoch": 18.11, |
|
"learning_rate": 1.9981998199819982e-05, |
|
"loss": 0.1013, |
|
"step": 8512 |
|
}, |
|
{ |
|
"epoch": 18.11, |
|
"eval_loss": 0.8741297721862793, |
|
"eval_runtime": 257.1245, |
|
"eval_samples_per_second": 7.965, |
|
"eval_wer": 0.6001735763402096, |
|
"step": 8512 |
|
}, |
|
{ |
|
"epoch": 18.25, |
|
"learning_rate": 1.854185418541854e-05, |
|
"loss": 0.1037, |
|
"step": 8576 |
|
}, |
|
{ |
|
"epoch": 18.25, |
|
"eval_loss": 0.8630872368812561, |
|
"eval_runtime": 257.6611, |
|
"eval_samples_per_second": 7.948, |
|
"eval_wer": 0.5943654449562721, |
|
"step": 8576 |
|
}, |
|
{ |
|
"epoch": 18.38, |
|
"learning_rate": 1.71017101710171e-05, |
|
"loss": 0.0947, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 18.38, |
|
"eval_loss": 0.86741703748703, |
|
"eval_runtime": 257.8968, |
|
"eval_samples_per_second": 7.941, |
|
"eval_wer": 0.596768809666867, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 18.52, |
|
"learning_rate": 1.5661566156615663e-05, |
|
"loss": 0.0884, |
|
"step": 8704 |
|
}, |
|
{ |
|
"epoch": 18.52, |
|
"eval_loss": 0.8764230608940125, |
|
"eval_runtime": 256.705, |
|
"eval_samples_per_second": 7.978, |
|
"eval_wer": 0.5943654449562721, |
|
"step": 8704 |
|
}, |
|
{ |
|
"epoch": 18.66, |
|
"learning_rate": 1.4221422142214222e-05, |
|
"loss": 0.0997, |
|
"step": 8768 |
|
}, |
|
{ |
|
"epoch": 18.66, |
|
"eval_loss": 0.8616538047790527, |
|
"eval_runtime": 256.2987, |
|
"eval_samples_per_second": 7.991, |
|
"eval_wer": 0.593030242339275, |
|
"step": 8768 |
|
}, |
|
{ |
|
"epoch": 18.79, |
|
"learning_rate": 1.2781278127812781e-05, |
|
"loss": 0.1066, |
|
"step": 8832 |
|
}, |
|
{ |
|
"epoch": 18.79, |
|
"eval_loss": 0.8461120128631592, |
|
"eval_runtime": 255.9062, |
|
"eval_samples_per_second": 8.003, |
|
"eval_wer": 0.5889578743574337, |
|
"step": 8832 |
|
}, |
|
{ |
|
"epoch": 18.93, |
|
"learning_rate": 1.1341134113411342e-05, |
|
"loss": 0.0952, |
|
"step": 8896 |
|
}, |
|
{ |
|
"epoch": 18.93, |
|
"eval_loss": 0.8545425534248352, |
|
"eval_runtime": 254.9463, |
|
"eval_samples_per_second": 8.033, |
|
"eval_wer": 0.5970358501902664, |
|
"step": 8896 |
|
}, |
|
{ |
|
"epoch": 19.06, |
|
"learning_rate": 9.900990099009901e-06, |
|
"loss": 0.1094, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 19.06, |
|
"eval_loss": 0.8487824201583862, |
|
"eval_runtime": 255.9015, |
|
"eval_samples_per_second": 8.003, |
|
"eval_wer": 0.5928967220775753, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"learning_rate": 8.46084608460846e-06, |
|
"loss": 0.1023, |
|
"step": 9024 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"eval_loss": 0.8481884002685547, |
|
"eval_runtime": 255.378, |
|
"eval_samples_per_second": 8.019, |
|
"eval_wer": 0.5924961612924762, |
|
"step": 9024 |
|
}, |
|
{ |
|
"epoch": 19.34, |
|
"learning_rate": 7.02070207020702e-06, |
|
"loss": 0.0896, |
|
"step": 9088 |
|
}, |
|
{ |
|
"epoch": 19.34, |
|
"eval_loss": 0.8508026003837585, |
|
"eval_runtime": 258.1213, |
|
"eval_samples_per_second": 7.934, |
|
"eval_wer": 0.5942319246945724, |
|
"step": 9088 |
|
}, |
|
{ |
|
"epoch": 19.47, |
|
"learning_rate": 5.58055805580558e-06, |
|
"loss": 0.1055, |
|
"step": 9152 |
|
}, |
|
{ |
|
"epoch": 19.47, |
|
"eval_loss": 0.8545140027999878, |
|
"eval_runtime": 255.3836, |
|
"eval_samples_per_second": 8.019, |
|
"eval_wer": 0.5957674077041191, |
|
"step": 9152 |
|
}, |
|
{ |
|
"epoch": 19.61, |
|
"learning_rate": 4.14041404140414e-06, |
|
"loss": 0.1102, |
|
"step": 9216 |
|
}, |
|
{ |
|
"epoch": 19.61, |
|
"eval_loss": 0.8519406318664551, |
|
"eval_runtime": 255.7976, |
|
"eval_samples_per_second": 8.006, |
|
"eval_wer": 0.5952333266573202, |
|
"step": 9216 |
|
}, |
|
{ |
|
"epoch": 19.74, |
|
"learning_rate": 2.7002700270027004e-06, |
|
"loss": 0.1021, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 19.74, |
|
"eval_loss": 0.8542845249176025, |
|
"eval_runtime": 255.9288, |
|
"eval_samples_per_second": 8.002, |
|
"eval_wer": 0.5940984044328727, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 19.88, |
|
"learning_rate": 1.2601260126012601e-06, |
|
"loss": 0.0934, |
|
"step": 9344 |
|
}, |
|
{ |
|
"epoch": 19.88, |
|
"eval_loss": 0.8573585748672485, |
|
"eval_runtime": 256.1564, |
|
"eval_samples_per_second": 7.995, |
|
"eval_wer": 0.5932305227318245, |
|
"step": 9344 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 9400, |
|
"total_flos": 5.3055511592602894e+19, |
|
"train_runtime": 80232.1889, |
|
"train_samples_per_second": 0.117 |
|
} |
|
], |
|
"max_steps": 9400, |
|
"num_train_epochs": 20, |
|
"total_flos": 5.3055511592602894e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|