|
{ |
|
"best_metric": 0.6425132155418396, |
|
"best_model_checkpoint": "/scratch/skscla001/speech/results/whisper-medium-toigen-combined-model/checkpoint-800", |
|
"epoch": 6.6059171597633135, |
|
"eval_steps": 200, |
|
"global_step": 1400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11834319526627218, |
|
"grad_norm": 120.00074768066406, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"loss": 13.7413, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.23668639053254437, |
|
"grad_norm": 93.74044799804688, |
|
"learning_rate": 9.000000000000001e-07, |
|
"loss": 10.9596, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.35502958579881655, |
|
"grad_norm": 83.35017395019531, |
|
"learning_rate": 1.4000000000000001e-06, |
|
"loss": 8.1662, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.47337278106508873, |
|
"grad_norm": 69.67986297607422, |
|
"learning_rate": 1.9000000000000002e-06, |
|
"loss": 6.1015, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.591715976331361, |
|
"grad_norm": 65.54053497314453, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 4.9343, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.7100591715976331, |
|
"grad_norm": 51.996883392333984, |
|
"learning_rate": 2.9e-06, |
|
"loss": 4.21, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.8284023668639053, |
|
"grad_norm": 52.11565017700195, |
|
"learning_rate": 3.4000000000000005e-06, |
|
"loss": 4.4511, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.9467455621301775, |
|
"grad_norm": 70.60668182373047, |
|
"learning_rate": 3.900000000000001e-06, |
|
"loss": 3.9586, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.9467455621301775, |
|
"eval_loss": 0.8733280897140503, |
|
"eval_runtime": 258.2817, |
|
"eval_samples_per_second": 1.518, |
|
"eval_steps_per_second": 0.759, |
|
"eval_wer": 0.5994351612144034, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0615384615384615, |
|
"grad_norm": 54.279884338378906, |
|
"learning_rate": 4.4e-06, |
|
"loss": 3.2145, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.1798816568047337, |
|
"grad_norm": 50.64502716064453, |
|
"learning_rate": 4.9000000000000005e-06, |
|
"loss": 3.1628, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.298224852071006, |
|
"grad_norm": 62.229068756103516, |
|
"learning_rate": 5.400000000000001e-06, |
|
"loss": 2.9053, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.4165680473372781, |
|
"grad_norm": 48.52084732055664, |
|
"learning_rate": 5.9e-06, |
|
"loss": 2.8052, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.5349112426035503, |
|
"grad_norm": 45.87370300292969, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 2.7131, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.6532544378698226, |
|
"grad_norm": 60.52415084838867, |
|
"learning_rate": 6.9e-06, |
|
"loss": 2.7482, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.7715976331360945, |
|
"grad_norm": 51.26884078979492, |
|
"learning_rate": 7.4e-06, |
|
"loss": 2.8582, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.8899408284023669, |
|
"grad_norm": 51.80022430419922, |
|
"learning_rate": 7.9e-06, |
|
"loss": 2.4999, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.8899408284023669, |
|
"eval_loss": 0.6725602149963379, |
|
"eval_runtime": 255.9417, |
|
"eval_samples_per_second": 1.532, |
|
"eval_steps_per_second": 0.766, |
|
"eval_wer": 0.46481525064721113, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.0047337278106507, |
|
"grad_norm": 35.06719207763672, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 2.1474, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 2.123076923076923, |
|
"grad_norm": 40.39971160888672, |
|
"learning_rate": 8.900000000000001e-06, |
|
"loss": 1.4558, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.2414201183431954, |
|
"grad_norm": 37.043128967285156, |
|
"learning_rate": 9.4e-06, |
|
"loss": 1.6915, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 2.3597633136094673, |
|
"grad_norm": 38.598167419433594, |
|
"learning_rate": 9.9e-06, |
|
"loss": 1.7167, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.4781065088757397, |
|
"grad_norm": 46.40306091308594, |
|
"learning_rate": 9.965694682675816e-06, |
|
"loss": 1.6127, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 2.596449704142012, |
|
"grad_norm": 32.87551498413086, |
|
"learning_rate": 9.922813036020584e-06, |
|
"loss": 1.6845, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.714792899408284, |
|
"grad_norm": 19.153915405273438, |
|
"learning_rate": 9.879931389365352e-06, |
|
"loss": 1.4961, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 2.8331360946745563, |
|
"grad_norm": 28.79606819152832, |
|
"learning_rate": 9.837049742710121e-06, |
|
"loss": 1.7047, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.8331360946745563, |
|
"eval_loss": 0.6522520184516907, |
|
"eval_runtime": 254.5133, |
|
"eval_samples_per_second": 1.54, |
|
"eval_steps_per_second": 0.77, |
|
"eval_wer": 0.45846081430924923, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.9514792899408286, |
|
"grad_norm": 39.831016540527344, |
|
"learning_rate": 9.794168096054889e-06, |
|
"loss": 1.8359, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 3.0662721893491125, |
|
"grad_norm": 32.2681999206543, |
|
"learning_rate": 9.751286449399657e-06, |
|
"loss": 1.1873, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.184615384615385, |
|
"grad_norm": 23.113107681274414, |
|
"learning_rate": 9.708404802744426e-06, |
|
"loss": 0.8456, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 3.3029585798816568, |
|
"grad_norm": 24.971742630004883, |
|
"learning_rate": 9.665523156089196e-06, |
|
"loss": 0.7569, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.421301775147929, |
|
"grad_norm": 18.853492736816406, |
|
"learning_rate": 9.622641509433963e-06, |
|
"loss": 0.9036, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 3.5396449704142015, |
|
"grad_norm": 23.712282180786133, |
|
"learning_rate": 9.579759862778731e-06, |
|
"loss": 0.7134, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.6579881656804734, |
|
"grad_norm": 38.5533332824707, |
|
"learning_rate": 9.536878216123499e-06, |
|
"loss": 0.9859, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 3.7763313609467457, |
|
"grad_norm": 38.32489776611328, |
|
"learning_rate": 9.493996569468268e-06, |
|
"loss": 0.9573, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.7763313609467457, |
|
"eval_loss": 0.6425132155418396, |
|
"eval_runtime": 261.773, |
|
"eval_samples_per_second": 1.497, |
|
"eval_steps_per_second": 0.749, |
|
"eval_wer": 0.4497528830313015, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.8946745562130176, |
|
"grad_norm": 22.206430435180664, |
|
"learning_rate": 9.451114922813038e-06, |
|
"loss": 0.876, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 4.0094674556213015, |
|
"grad_norm": 17.389280319213867, |
|
"learning_rate": 9.408233276157806e-06, |
|
"loss": 0.8495, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.127810650887574, |
|
"grad_norm": 15.826939582824707, |
|
"learning_rate": 9.365351629502573e-06, |
|
"loss": 0.3702, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 4.246153846153846, |
|
"grad_norm": 19.050931930541992, |
|
"learning_rate": 9.322469982847341e-06, |
|
"loss": 0.4092, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.364497041420118, |
|
"grad_norm": 18.2451114654541, |
|
"learning_rate": 9.27958833619211e-06, |
|
"loss": 0.3743, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 4.482840236686391, |
|
"grad_norm": 12.22165298461914, |
|
"learning_rate": 9.236706689536878e-06, |
|
"loss": 0.4114, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 4.601183431952663, |
|
"grad_norm": 14.346109390258789, |
|
"learning_rate": 9.193825042881648e-06, |
|
"loss": 0.3816, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 4.719526627218935, |
|
"grad_norm": 56.37358474731445, |
|
"learning_rate": 9.150943396226416e-06, |
|
"loss": 0.4029, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.719526627218935, |
|
"eval_loss": 0.6656792163848877, |
|
"eval_runtime": 253.5608, |
|
"eval_samples_per_second": 1.546, |
|
"eval_steps_per_second": 0.773, |
|
"eval_wer": 0.404330430689574, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.8378698224852075, |
|
"grad_norm": 23.6176700592041, |
|
"learning_rate": 9.108061749571185e-06, |
|
"loss": 0.4506, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 4.956213017751479, |
|
"grad_norm": 23.03661346435547, |
|
"learning_rate": 9.065180102915953e-06, |
|
"loss": 0.4928, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 5.071005917159764, |
|
"grad_norm": 25.316198348999023, |
|
"learning_rate": 9.02229845626072e-06, |
|
"loss": 0.2739, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 5.189349112426036, |
|
"grad_norm": 6.165097713470459, |
|
"learning_rate": 8.97941680960549e-06, |
|
"loss": 0.1914, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 5.3076923076923075, |
|
"grad_norm": 20.19268226623535, |
|
"learning_rate": 8.936535162950258e-06, |
|
"loss": 0.221, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 5.42603550295858, |
|
"grad_norm": 12.469457626342773, |
|
"learning_rate": 8.893653516295027e-06, |
|
"loss": 0.199, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 5.544378698224852, |
|
"grad_norm": 14.28165054321289, |
|
"learning_rate": 8.850771869639795e-06, |
|
"loss": 0.2235, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 5.662721893491124, |
|
"grad_norm": 15.683744430541992, |
|
"learning_rate": 8.807890222984563e-06, |
|
"loss": 0.2311, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.662721893491124, |
|
"eval_loss": 0.6909866333007812, |
|
"eval_runtime": 253.3291, |
|
"eval_samples_per_second": 1.547, |
|
"eval_steps_per_second": 0.774, |
|
"eval_wer": 0.41868674982348786, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.781065088757396, |
|
"grad_norm": 11.54914665222168, |
|
"learning_rate": 8.765008576329332e-06, |
|
"loss": 0.2098, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 5.899408284023669, |
|
"grad_norm": 18.604154586791992, |
|
"learning_rate": 8.722126929674101e-06, |
|
"loss": 0.1964, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 6.014201183431953, |
|
"grad_norm": 13.384803771972656, |
|
"learning_rate": 8.67924528301887e-06, |
|
"loss": 0.2031, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 6.132544378698225, |
|
"grad_norm": 9.78126049041748, |
|
"learning_rate": 8.636363636363637e-06, |
|
"loss": 0.1747, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 6.250887573964497, |
|
"grad_norm": 8.569636344909668, |
|
"learning_rate": 8.593481989708405e-06, |
|
"loss": 0.1078, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 6.36923076923077, |
|
"grad_norm": 16.902490615844727, |
|
"learning_rate": 8.550600343053174e-06, |
|
"loss": 0.1253, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 6.487573964497042, |
|
"grad_norm": 15.703929901123047, |
|
"learning_rate": 8.507718696397942e-06, |
|
"loss": 0.1213, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 6.6059171597633135, |
|
"grad_norm": 10.21462631225586, |
|
"learning_rate": 8.464837049742711e-06, |
|
"loss": 0.1545, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 6.6059171597633135, |
|
"eval_loss": 0.720788836479187, |
|
"eval_runtime": 248.7794, |
|
"eval_samples_per_second": 1.576, |
|
"eval_steps_per_second": 0.788, |
|
"eval_wer": 0.38644386914568135, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 6.6059171597633135, |
|
"step": 1400, |
|
"total_flos": 1.138790955810816e+19, |
|
"train_loss": 1.9424426797458103, |
|
"train_runtime": 4520.3552, |
|
"train_samples_per_second": 11.209, |
|
"train_steps_per_second": 1.4 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 6330, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 3 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.138790955810816e+19, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|