{ "best_metric": 0.6425132155418396, "best_model_checkpoint": "/scratch/skscla001/speech/results/whisper-medium-toigen-combined-model/checkpoint-800", "epoch": 6.6059171597633135, "eval_steps": 200, "global_step": 1400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11834319526627218, "grad_norm": 120.00074768066406, "learning_rate": 4.0000000000000003e-07, "loss": 13.7413, "step": 25 }, { "epoch": 0.23668639053254437, "grad_norm": 93.74044799804688, "learning_rate": 9.000000000000001e-07, "loss": 10.9596, "step": 50 }, { "epoch": 0.35502958579881655, "grad_norm": 83.35017395019531, "learning_rate": 1.4000000000000001e-06, "loss": 8.1662, "step": 75 }, { "epoch": 0.47337278106508873, "grad_norm": 69.67986297607422, "learning_rate": 1.9000000000000002e-06, "loss": 6.1015, "step": 100 }, { "epoch": 0.591715976331361, "grad_norm": 65.54053497314453, "learning_rate": 2.4000000000000003e-06, "loss": 4.9343, "step": 125 }, { "epoch": 0.7100591715976331, "grad_norm": 51.996883392333984, "learning_rate": 2.9e-06, "loss": 4.21, "step": 150 }, { "epoch": 0.8284023668639053, "grad_norm": 52.11565017700195, "learning_rate": 3.4000000000000005e-06, "loss": 4.4511, "step": 175 }, { "epoch": 0.9467455621301775, "grad_norm": 70.60668182373047, "learning_rate": 3.900000000000001e-06, "loss": 3.9586, "step": 200 }, { "epoch": 0.9467455621301775, "eval_loss": 0.8733280897140503, "eval_runtime": 258.2817, "eval_samples_per_second": 1.518, "eval_steps_per_second": 0.759, "eval_wer": 0.5994351612144034, "step": 200 }, { "epoch": 1.0615384615384615, "grad_norm": 54.279884338378906, "learning_rate": 4.4e-06, "loss": 3.2145, "step": 225 }, { "epoch": 1.1798816568047337, "grad_norm": 50.64502716064453, "learning_rate": 4.9000000000000005e-06, "loss": 3.1628, "step": 250 }, { "epoch": 1.298224852071006, "grad_norm": 62.229068756103516, "learning_rate": 5.400000000000001e-06, "loss": 2.9053, "step": 275 }, { "epoch": 1.4165680473372781, "grad_norm": 48.52084732055664, "learning_rate": 5.9e-06, "loss": 2.8052, "step": 300 }, { "epoch": 1.5349112426035503, "grad_norm": 45.87370300292969, "learning_rate": 6.4000000000000006e-06, "loss": 2.7131, "step": 325 }, { "epoch": 1.6532544378698226, "grad_norm": 60.52415084838867, "learning_rate": 6.9e-06, "loss": 2.7482, "step": 350 }, { "epoch": 1.7715976331360945, "grad_norm": 51.26884078979492, "learning_rate": 7.4e-06, "loss": 2.8582, "step": 375 }, { "epoch": 1.8899408284023669, "grad_norm": 51.80022430419922, "learning_rate": 7.9e-06, "loss": 2.4999, "step": 400 }, { "epoch": 1.8899408284023669, "eval_loss": 0.6725602149963379, "eval_runtime": 255.9417, "eval_samples_per_second": 1.532, "eval_steps_per_second": 0.766, "eval_wer": 0.46481525064721113, "step": 400 }, { "epoch": 2.0047337278106507, "grad_norm": 35.06719207763672, "learning_rate": 8.400000000000001e-06, "loss": 2.1474, "step": 425 }, { "epoch": 2.123076923076923, "grad_norm": 40.39971160888672, "learning_rate": 8.900000000000001e-06, "loss": 1.4558, "step": 450 }, { "epoch": 2.2414201183431954, "grad_norm": 37.043128967285156, "learning_rate": 9.4e-06, "loss": 1.6915, "step": 475 }, { "epoch": 2.3597633136094673, "grad_norm": 38.598167419433594, "learning_rate": 9.9e-06, "loss": 1.7167, "step": 500 }, { "epoch": 2.4781065088757397, "grad_norm": 46.40306091308594, "learning_rate": 9.965694682675816e-06, "loss": 1.6127, "step": 525 }, { "epoch": 2.596449704142012, "grad_norm": 32.87551498413086, "learning_rate": 9.922813036020584e-06, "loss": 1.6845, "step": 550 }, { "epoch": 2.714792899408284, "grad_norm": 19.153915405273438, "learning_rate": 9.879931389365352e-06, "loss": 1.4961, "step": 575 }, { "epoch": 2.8331360946745563, "grad_norm": 28.79606819152832, "learning_rate": 9.837049742710121e-06, "loss": 1.7047, "step": 600 }, { "epoch": 2.8331360946745563, "eval_loss": 0.6522520184516907, "eval_runtime": 254.5133, "eval_samples_per_second": 1.54, "eval_steps_per_second": 0.77, "eval_wer": 0.45846081430924923, "step": 600 }, { "epoch": 2.9514792899408286, "grad_norm": 39.831016540527344, "learning_rate": 9.794168096054889e-06, "loss": 1.8359, "step": 625 }, { "epoch": 3.0662721893491125, "grad_norm": 32.2681999206543, "learning_rate": 9.751286449399657e-06, "loss": 1.1873, "step": 650 }, { "epoch": 3.184615384615385, "grad_norm": 23.113107681274414, "learning_rate": 9.708404802744426e-06, "loss": 0.8456, "step": 675 }, { "epoch": 3.3029585798816568, "grad_norm": 24.971742630004883, "learning_rate": 9.665523156089196e-06, "loss": 0.7569, "step": 700 }, { "epoch": 3.421301775147929, "grad_norm": 18.853492736816406, "learning_rate": 9.622641509433963e-06, "loss": 0.9036, "step": 725 }, { "epoch": 3.5396449704142015, "grad_norm": 23.712282180786133, "learning_rate": 9.579759862778731e-06, "loss": 0.7134, "step": 750 }, { "epoch": 3.6579881656804734, "grad_norm": 38.5533332824707, "learning_rate": 9.536878216123499e-06, "loss": 0.9859, "step": 775 }, { "epoch": 3.7763313609467457, "grad_norm": 38.32489776611328, "learning_rate": 9.493996569468268e-06, "loss": 0.9573, "step": 800 }, { "epoch": 3.7763313609467457, "eval_loss": 0.6425132155418396, "eval_runtime": 261.773, "eval_samples_per_second": 1.497, "eval_steps_per_second": 0.749, "eval_wer": 0.4497528830313015, "step": 800 }, { "epoch": 3.8946745562130176, "grad_norm": 22.206430435180664, "learning_rate": 9.451114922813038e-06, "loss": 0.876, "step": 825 }, { "epoch": 4.0094674556213015, "grad_norm": 17.389280319213867, "learning_rate": 9.408233276157806e-06, "loss": 0.8495, "step": 850 }, { "epoch": 4.127810650887574, "grad_norm": 15.826939582824707, "learning_rate": 9.365351629502573e-06, "loss": 0.3702, "step": 875 }, { "epoch": 4.246153846153846, "grad_norm": 19.050931930541992, "learning_rate": 9.322469982847341e-06, "loss": 0.4092, "step": 900 }, { "epoch": 4.364497041420118, "grad_norm": 18.2451114654541, "learning_rate": 9.27958833619211e-06, "loss": 0.3743, "step": 925 }, { "epoch": 4.482840236686391, "grad_norm": 12.22165298461914, "learning_rate": 9.236706689536878e-06, "loss": 0.4114, "step": 950 }, { "epoch": 4.601183431952663, "grad_norm": 14.346109390258789, "learning_rate": 9.193825042881648e-06, "loss": 0.3816, "step": 975 }, { "epoch": 4.719526627218935, "grad_norm": 56.37358474731445, "learning_rate": 9.150943396226416e-06, "loss": 0.4029, "step": 1000 }, { "epoch": 4.719526627218935, "eval_loss": 0.6656792163848877, "eval_runtime": 253.5608, "eval_samples_per_second": 1.546, "eval_steps_per_second": 0.773, "eval_wer": 0.404330430689574, "step": 1000 }, { "epoch": 4.8378698224852075, "grad_norm": 23.6176700592041, "learning_rate": 9.108061749571185e-06, "loss": 0.4506, "step": 1025 }, { "epoch": 4.956213017751479, "grad_norm": 23.03661346435547, "learning_rate": 9.065180102915953e-06, "loss": 0.4928, "step": 1050 }, { "epoch": 5.071005917159764, "grad_norm": 25.316198348999023, "learning_rate": 9.02229845626072e-06, "loss": 0.2739, "step": 1075 }, { "epoch": 5.189349112426036, "grad_norm": 6.165097713470459, "learning_rate": 8.97941680960549e-06, "loss": 0.1914, "step": 1100 }, { "epoch": 5.3076923076923075, "grad_norm": 20.19268226623535, "learning_rate": 8.936535162950258e-06, "loss": 0.221, "step": 1125 }, { "epoch": 5.42603550295858, "grad_norm": 12.469457626342773, "learning_rate": 8.893653516295027e-06, "loss": 0.199, "step": 1150 }, { "epoch": 5.544378698224852, "grad_norm": 14.28165054321289, "learning_rate": 8.850771869639795e-06, "loss": 0.2235, "step": 1175 }, { "epoch": 5.662721893491124, "grad_norm": 15.683744430541992, "learning_rate": 8.807890222984563e-06, "loss": 0.2311, "step": 1200 }, { "epoch": 5.662721893491124, "eval_loss": 0.6909866333007812, "eval_runtime": 253.3291, "eval_samples_per_second": 1.547, "eval_steps_per_second": 0.774, "eval_wer": 0.41868674982348786, "step": 1200 }, { "epoch": 5.781065088757396, "grad_norm": 11.54914665222168, "learning_rate": 8.765008576329332e-06, "loss": 0.2098, "step": 1225 }, { "epoch": 5.899408284023669, "grad_norm": 18.604154586791992, "learning_rate": 8.722126929674101e-06, "loss": 0.1964, "step": 1250 }, { "epoch": 6.014201183431953, "grad_norm": 13.384803771972656, "learning_rate": 8.67924528301887e-06, "loss": 0.2031, "step": 1275 }, { "epoch": 6.132544378698225, "grad_norm": 9.78126049041748, "learning_rate": 8.636363636363637e-06, "loss": 0.1747, "step": 1300 }, { "epoch": 6.250887573964497, "grad_norm": 8.569636344909668, "learning_rate": 8.593481989708405e-06, "loss": 0.1078, "step": 1325 }, { "epoch": 6.36923076923077, "grad_norm": 16.902490615844727, "learning_rate": 8.550600343053174e-06, "loss": 0.1253, "step": 1350 }, { "epoch": 6.487573964497042, "grad_norm": 15.703929901123047, "learning_rate": 8.507718696397942e-06, "loss": 0.1213, "step": 1375 }, { "epoch": 6.6059171597633135, "grad_norm": 10.21462631225586, "learning_rate": 8.464837049742711e-06, "loss": 0.1545, "step": 1400 }, { "epoch": 6.6059171597633135, "eval_loss": 0.720788836479187, "eval_runtime": 248.7794, "eval_samples_per_second": 1.576, "eval_steps_per_second": 0.788, "eval_wer": 0.38644386914568135, "step": 1400 }, { "epoch": 6.6059171597633135, "step": 1400, "total_flos": 1.138790955810816e+19, "train_loss": 1.9424426797458103, "train_runtime": 4520.3552, "train_samples_per_second": 11.209, "train_steps_per_second": 1.4 } ], "logging_steps": 25, "max_steps": 6330, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 200, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.138790955810816e+19, "train_batch_size": 2, "trial_name": null, "trial_params": null }