|
{ |
|
"best_metric": 3.3327760696411133, |
|
"best_model_checkpoint": "nrshoudi/hubert-xlarge-ll60k_arabic/checkpoint-1637", |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 32740, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.987782529016494e-05, |
|
"loss": 13.7253, |
|
"step": 1637 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 3.3327760696411133, |
|
"eval_per": 1.0, |
|
"eval_runtime": 121.8139, |
|
"eval_samples_per_second": 6.674, |
|
"eval_steps_per_second": 3.341, |
|
"eval_wer": 1.0, |
|
"step": 1637 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 9.987782529016494e-05, |
|
"loss": 3.3354, |
|
"step": 3274 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 3.2846710681915283, |
|
"eval_per": 1.0, |
|
"eval_runtime": 123.7338, |
|
"eval_samples_per_second": 6.571, |
|
"eval_steps_per_second": 3.289, |
|
"eval_wer": 1.0, |
|
"step": 3274 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 9.44580194122039e-05, |
|
"loss": 3.304, |
|
"step": 4911 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 3.2374837398529053, |
|
"eval_per": 1.0, |
|
"eval_runtime": 124.396, |
|
"eval_samples_per_second": 6.536, |
|
"eval_steps_per_second": 3.272, |
|
"eval_wer": 1.0, |
|
"step": 4911 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 8.89058575985882e-05, |
|
"loss": 3.2655, |
|
"step": 6548 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 3.2142696380615234, |
|
"eval_per": 1.0, |
|
"eval_runtime": 124.9199, |
|
"eval_samples_per_second": 6.508, |
|
"eval_steps_per_second": 3.258, |
|
"eval_wer": 1.0, |
|
"step": 6548 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 8.335030204303264e-05, |
|
"loss": 3.2242, |
|
"step": 8185 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 3.1873738765716553, |
|
"eval_per": 1.0, |
|
"eval_runtime": 125.5279, |
|
"eval_samples_per_second": 6.477, |
|
"eval_steps_per_second": 3.242, |
|
"eval_wer": 1.0, |
|
"step": 8185 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 7.779814022941696e-05, |
|
"loss": 3.1556, |
|
"step": 9822 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 3.0734314918518066, |
|
"eval_per": 1.0, |
|
"eval_runtime": 126.5196, |
|
"eval_samples_per_second": 6.426, |
|
"eval_steps_per_second": 3.217, |
|
"eval_wer": 1.0, |
|
"step": 9822 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 7.22425846738614e-05, |
|
"loss": 3.0485, |
|
"step": 11459 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 2.9547784328460693, |
|
"eval_per": 1.0, |
|
"eval_runtime": 127.0062, |
|
"eval_samples_per_second": 6.401, |
|
"eval_steps_per_second": 3.205, |
|
"eval_wer": 1.0, |
|
"step": 11459 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 6.668702911830585e-05, |
|
"loss": 2.935, |
|
"step": 13096 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 2.83778977394104, |
|
"eval_per": 0.9171063789691888, |
|
"eval_runtime": 127.5426, |
|
"eval_samples_per_second": 6.374, |
|
"eval_steps_per_second": 3.191, |
|
"eval_wer": 0.9043441743269096, |
|
"step": 13096 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 6.113826104663001e-05, |
|
"loss": 2.8158, |
|
"step": 14733 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 2.702333450317383, |
|
"eval_per": 0.9086733251672477, |
|
"eval_runtime": 127.7194, |
|
"eval_samples_per_second": 6.366, |
|
"eval_steps_per_second": 3.187, |
|
"eval_wer": 0.8971116606784565, |
|
"step": 14733 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 5.558270549107446e-05, |
|
"loss": 2.7185, |
|
"step": 16370 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 2.598202705383301, |
|
"eval_per": 0.8996749269763498, |
|
"eval_runtime": 127.815, |
|
"eval_samples_per_second": 6.361, |
|
"eval_steps_per_second": 3.184, |
|
"eval_wer": 0.8864728664084737, |
|
"step": 16370 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 5.00271499355189e-05, |
|
"loss": 2.6406, |
|
"step": 18007 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 2.5032498836517334, |
|
"eval_per": 0.857085649674927, |
|
"eval_runtime": 128.216, |
|
"eval_samples_per_second": 6.341, |
|
"eval_steps_per_second": 3.174, |
|
"eval_wer": 0.8452708693014792, |
|
"step": 18007 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 4.4478381863843074e-05, |
|
"loss": 2.5721, |
|
"step": 19644 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 2.4384822845458984, |
|
"eval_per": 0.8291011024215584, |
|
"eval_runtime": 128.477, |
|
"eval_samples_per_second": 6.328, |
|
"eval_steps_per_second": 3.168, |
|
"eval_wer": 0.8179273015724885, |
|
"step": 19644 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 3.892622005022738e-05, |
|
"loss": 2.5136, |
|
"step": 21281 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 2.381009578704834, |
|
"eval_per": 0.8097145010835768, |
|
"eval_runtime": 128.3969, |
|
"eval_samples_per_second": 6.332, |
|
"eval_steps_per_second": 3.17, |
|
"eval_wer": 0.7993560729783957, |
|
"step": 21281 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 3.337405823661169e-05, |
|
"loss": 2.457, |
|
"step": 22918 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 2.304086208343506, |
|
"eval_per": 0.7896212192593989, |
|
"eval_runtime": 128.5082, |
|
"eval_samples_per_second": 6.326, |
|
"eval_steps_per_second": 3.167, |
|
"eval_wer": 0.779151696141104, |
|
"step": 22918 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 2.7818502681056137e-05, |
|
"loss": 2.4104, |
|
"step": 24555 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 2.246638536453247, |
|
"eval_per": 0.7694808254028078, |
|
"eval_runtime": 128.3201, |
|
"eval_samples_per_second": 6.336, |
|
"eval_steps_per_second": 3.172, |
|
"eval_wer": 0.7587606737903038, |
|
"step": 24555 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 2.2262947125500577e-05, |
|
"loss": 2.3711, |
|
"step": 26192 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 2.2065765857696533, |
|
"eval_per": 0.7417318383115048, |
|
"eval_runtime": 128.1923, |
|
"eval_samples_per_second": 6.342, |
|
"eval_steps_per_second": 3.175, |
|
"eval_wer": 0.7321170267369698, |
|
"step": 26192 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 1.6714179053824745e-05, |
|
"loss": 2.3365, |
|
"step": 27829 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 2.1757304668426514, |
|
"eval_per": 0.7071987185527183, |
|
"eval_runtime": 128.7988, |
|
"eval_samples_per_second": 6.312, |
|
"eval_steps_per_second": 3.16, |
|
"eval_wer": 0.7002006439270216, |
|
"step": 27829 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 1.1158623498269193e-05, |
|
"loss": 2.3109, |
|
"step": 29466 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 2.141193389892578, |
|
"eval_per": 0.6828182417789503, |
|
"eval_runtime": 128.9306, |
|
"eval_samples_per_second": 6.306, |
|
"eval_steps_per_second": 3.157, |
|
"eval_wer": 0.6771499230087257, |
|
"step": 29466 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 5.603067942713636e-06, |
|
"loss": 2.2857, |
|
"step": 31103 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 2.1265432834625244, |
|
"eval_per": 0.6784603787807406, |
|
"eval_runtime": 129.2068, |
|
"eval_samples_per_second": 6.292, |
|
"eval_steps_per_second": 3.15, |
|
"eval_wer": 0.6722038169007513, |
|
"step": 31103 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 5.090612909794339e-08, |
|
"loss": 2.2757, |
|
"step": 32740 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 2.113328695297241, |
|
"eval_per": 0.670616225383963, |
|
"eval_runtime": 128.9922, |
|
"eval_samples_per_second": 6.303, |
|
"eval_steps_per_second": 3.155, |
|
"eval_wer": 0.6645980122252811, |
|
"step": 32740 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 32740, |
|
"total_flos": 3.2217850292594e+19, |
|
"train_loss": 3.285075871508953, |
|
"train_runtime": 18041.7674, |
|
"train_samples_per_second": 3.628, |
|
"train_steps_per_second": 1.815 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 32740, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 3.2217850292594e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|