{ "best_metric": 0.5404770374298096, "best_model_checkpoint": "/scratch/skscla001/speech/results/whisper-medium-swagen-female-model/checkpoint-1200", "epoch": 4.225616921269095, "eval_steps": 200, "global_step": 1800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05875440658049354, "grad_norm": 73.04696655273438, "learning_rate": 4.2000000000000006e-07, "loss": 5.3434, "step": 25 }, { "epoch": 0.11750881316098707, "grad_norm": Infinity, "learning_rate": 9.000000000000001e-07, "loss": 4.1395, "step": 50 }, { "epoch": 0.1762632197414806, "grad_norm": 34.922523498535156, "learning_rate": 1.4000000000000001e-06, "loss": 3.1888, "step": 75 }, { "epoch": 0.23501762632197415, "grad_norm": 34.749629974365234, "learning_rate": 1.9000000000000002e-06, "loss": 2.3154, "step": 100 }, { "epoch": 0.2937720329024677, "grad_norm": 33.44667053222656, "learning_rate": 2.4000000000000003e-06, "loss": 1.691, "step": 125 }, { "epoch": 0.3525264394829612, "grad_norm": 38.09309768676758, "learning_rate": 2.9e-06, "loss": 1.5853, "step": 150 }, { "epoch": 0.4112808460634548, "grad_norm": 31.004915237426758, "learning_rate": 3.4000000000000005e-06, "loss": 1.522, "step": 175 }, { "epoch": 0.4700352526439483, "grad_norm": 31.333637237548828, "learning_rate": 3.900000000000001e-06, "loss": 1.4291, "step": 200 }, { "epoch": 0.4700352526439483, "eval_loss": 0.8216601610183716, "eval_runtime": 224.1212, "eval_samples_per_second": 2.579, "eval_steps_per_second": 0.647, "eval_wer": 0.49672320740169623, "step": 200 }, { "epoch": 0.5287896592244419, "grad_norm": 34.124698638916016, "learning_rate": 4.4e-06, "loss": 1.2954, "step": 225 }, { "epoch": 0.5875440658049353, "grad_norm": 38.50987243652344, "learning_rate": 4.9000000000000005e-06, "loss": 1.2541, "step": 250 }, { "epoch": 0.6462984723854289, "grad_norm": 38.04581069946289, "learning_rate": 5.400000000000001e-06, "loss": 1.1264, "step": 275 }, { "epoch": 0.7050528789659224, "grad_norm": 28.410049438476562, "learning_rate": 5.9e-06, "loss": 1.1311, "step": 300 }, { "epoch": 0.763807285546416, "grad_norm": 30.25857925415039, "learning_rate": 6.4000000000000006e-06, "loss": 1.0838, "step": 325 }, { "epoch": 0.8225616921269095, "grad_norm": 31.186445236206055, "learning_rate": 6.9e-06, "loss": 1.1503, "step": 350 }, { "epoch": 0.881316098707403, "grad_norm": 24.111587524414062, "learning_rate": 7.4e-06, "loss": 1.0892, "step": 375 }, { "epoch": 0.9400705052878966, "grad_norm": 22.854475021362305, "learning_rate": 7.9e-06, "loss": 0.9176, "step": 400 }, { "epoch": 0.9400705052878966, "eval_loss": 0.634784460067749, "eval_runtime": 226.4974, "eval_samples_per_second": 2.552, "eval_steps_per_second": 0.64, "eval_wer": 0.42656129529683884, "step": 400 }, { "epoch": 0.9988249118683902, "grad_norm": 36.47032928466797, "learning_rate": 8.400000000000001e-06, "loss": 1.0234, "step": 425 }, { "epoch": 1.0564042303172738, "grad_norm": 21.01776123046875, "learning_rate": 8.900000000000001e-06, "loss": 0.5964, "step": 450 }, { "epoch": 1.1151586368977673, "grad_norm": 17.60657501220703, "learning_rate": 9.4e-06, "loss": 0.7129, "step": 475 }, { "epoch": 1.1739130434782608, "grad_norm": 19.744020462036133, "learning_rate": 9.9e-06, "loss": 0.5569, "step": 500 }, { "epoch": 1.2326674500587544, "grad_norm": 15.34373664855957, "learning_rate": 9.955555555555556e-06, "loss": 0.5868, "step": 525 }, { "epoch": 1.291421856639248, "grad_norm": 26.74762535095215, "learning_rate": 9.9e-06, "loss": 0.6199, "step": 550 }, { "epoch": 1.3501762632197414, "grad_norm": 13.160558700561523, "learning_rate": 9.844444444444446e-06, "loss": 0.557, "step": 575 }, { "epoch": 1.408930669800235, "grad_norm": 14.749505043029785, "learning_rate": 9.78888888888889e-06, "loss": 0.5492, "step": 600 }, { "epoch": 1.408930669800235, "eval_loss": 0.5867528319358826, "eval_runtime": 230.3509, "eval_samples_per_second": 2.509, "eval_steps_per_second": 0.629, "eval_wer": 0.40940632228218965, "step": 600 }, { "epoch": 1.4676850763807285, "grad_norm": 16.107746124267578, "learning_rate": 9.733333333333334e-06, "loss": 0.5442, "step": 625 }, { "epoch": 1.526439482961222, "grad_norm": 29.692367553710938, "learning_rate": 9.677777777777778e-06, "loss": 0.6473, "step": 650 }, { "epoch": 1.5851938895417157, "grad_norm": 18.0280704498291, "learning_rate": 9.622222222222222e-06, "loss": 0.5739, "step": 675 }, { "epoch": 1.6439482961222092, "grad_norm": 27.483768463134766, "learning_rate": 9.566666666666668e-06, "loss": 0.6705, "step": 700 }, { "epoch": 1.7027027027027026, "grad_norm": 20.081012725830078, "learning_rate": 9.511111111111112e-06, "loss": 0.6338, "step": 725 }, { "epoch": 1.7614571092831963, "grad_norm": 22.985151290893555, "learning_rate": 9.455555555555557e-06, "loss": 0.6409, "step": 750 }, { "epoch": 1.8202115158636898, "grad_norm": 19.641082763671875, "learning_rate": 9.4e-06, "loss": 0.6777, "step": 775 }, { "epoch": 1.8789659224441833, "grad_norm": 27.92637825012207, "learning_rate": 9.344444444444446e-06, "loss": 0.6243, "step": 800 }, { "epoch": 1.8789659224441833, "eval_loss": 0.5535122752189636, "eval_runtime": 218.1215, "eval_samples_per_second": 2.65, "eval_steps_per_second": 0.665, "eval_wer": 0.3274865073245952, "step": 800 }, { "epoch": 1.937720329024677, "grad_norm": 24.763235092163086, "learning_rate": 9.28888888888889e-06, "loss": 0.5296, "step": 825 }, { "epoch": 1.9964747356051704, "grad_norm": 22.559097290039062, "learning_rate": 9.233333333333334e-06, "loss": 0.6109, "step": 850 }, { "epoch": 2.054054054054054, "grad_norm": 9.163862228393555, "learning_rate": 9.17777777777778e-06, "loss": 0.2217, "step": 875 }, { "epoch": 2.1128084606345476, "grad_norm": 14.249441146850586, "learning_rate": 9.122222222222223e-06, "loss": 0.2233, "step": 900 }, { "epoch": 2.1715628672150413, "grad_norm": 12.58755111694336, "learning_rate": 9.066666666666667e-06, "loss": 0.2103, "step": 925 }, { "epoch": 2.2303172737955346, "grad_norm": 12.163936614990234, "learning_rate": 9.011111111111111e-06, "loss": 0.2184, "step": 950 }, { "epoch": 2.2890716803760283, "grad_norm": 24.493022918701172, "learning_rate": 8.955555555555555e-06, "loss": 0.2056, "step": 975 }, { "epoch": 2.3478260869565215, "grad_norm": 16.613021850585938, "learning_rate": 8.900000000000001e-06, "loss": 0.2196, "step": 1000 }, { "epoch": 2.3478260869565215, "eval_loss": 0.5643105506896973, "eval_runtime": 222.0189, "eval_samples_per_second": 2.603, "eval_steps_per_second": 0.653, "eval_wer": 0.35774865073245954, "step": 1000 }, { "epoch": 2.406580493537015, "grad_norm": 9.612153053283691, "learning_rate": 8.844444444444445e-06, "loss": 0.2273, "step": 1025 }, { "epoch": 2.465334900117509, "grad_norm": 8.410961151123047, "learning_rate": 8.788888888888891e-06, "loss": 0.2034, "step": 1050 }, { "epoch": 2.524089306698002, "grad_norm": 21.08755874633789, "learning_rate": 8.733333333333333e-06, "loss": 0.2239, "step": 1075 }, { "epoch": 2.582843713278496, "grad_norm": 14.133148193359375, "learning_rate": 8.677777777777779e-06, "loss": 0.2086, "step": 1100 }, { "epoch": 2.6415981198589895, "grad_norm": 6.469407558441162, "learning_rate": 8.622222222222223e-06, "loss": 0.2349, "step": 1125 }, { "epoch": 2.7003525264394828, "grad_norm": 12.72140121459961, "learning_rate": 8.566666666666667e-06, "loss": 0.2101, "step": 1150 }, { "epoch": 2.7591069330199764, "grad_norm": 7.142697334289551, "learning_rate": 8.511111111111113e-06, "loss": 0.2615, "step": 1175 }, { "epoch": 2.81786133960047, "grad_norm": 11.419723510742188, "learning_rate": 8.455555555555555e-06, "loss": 0.2211, "step": 1200 }, { "epoch": 2.81786133960047, "eval_loss": 0.5404770374298096, "eval_runtime": 217.6495, "eval_samples_per_second": 2.656, "eval_steps_per_second": 0.666, "eval_wer": 0.33982266769468006, "step": 1200 }, { "epoch": 2.8766157461809634, "grad_norm": 8.013970375061035, "learning_rate": 8.400000000000001e-06, "loss": 0.2023, "step": 1225 }, { "epoch": 2.935370152761457, "grad_norm": 11.708134651184082, "learning_rate": 8.344444444444445e-06, "loss": 0.2406, "step": 1250 }, { "epoch": 2.9941245593419508, "grad_norm": 13.683923721313477, "learning_rate": 8.288888888888889e-06, "loss": 0.2345, "step": 1275 }, { "epoch": 3.0517038777908345, "grad_norm": 4.092015743255615, "learning_rate": 8.233333333333335e-06, "loss": 0.0736, "step": 1300 }, { "epoch": 3.1104582843713278, "grad_norm": 7.464056015014648, "learning_rate": 8.177777777777779e-06, "loss": 0.0939, "step": 1325 }, { "epoch": 3.1692126909518215, "grad_norm": 6.3624677658081055, "learning_rate": 8.122222222222223e-06, "loss": 0.0829, "step": 1350 }, { "epoch": 3.227967097532315, "grad_norm": 9.64356803894043, "learning_rate": 8.066666666666667e-06, "loss": 0.0939, "step": 1375 }, { "epoch": 3.2867215041128084, "grad_norm": 9.63764476776123, "learning_rate": 8.011111111111113e-06, "loss": 0.0999, "step": 1400 }, { "epoch": 3.2867215041128084, "eval_loss": 0.5825645923614502, "eval_runtime": 218.2885, "eval_samples_per_second": 2.648, "eval_steps_per_second": 0.664, "eval_wer": 0.3282575173477255, "step": 1400 }, { "epoch": 3.345475910693302, "grad_norm": 6.401581764221191, "learning_rate": 7.955555555555557e-06, "loss": 0.0829, "step": 1425 }, { "epoch": 3.4042303172737958, "grad_norm": 6.341639041900635, "learning_rate": 7.9e-06, "loss": 0.0914, "step": 1450 }, { "epoch": 3.462984723854289, "grad_norm": 5.883626937866211, "learning_rate": 7.844444444444446e-06, "loss": 0.1057, "step": 1475 }, { "epoch": 3.5217391304347827, "grad_norm": 10.137381553649902, "learning_rate": 7.788888888888889e-06, "loss": 0.0948, "step": 1500 }, { "epoch": 3.5804935370152764, "grad_norm": 3.2198357582092285, "learning_rate": 7.733333333333334e-06, "loss": 0.0916, "step": 1525 }, { "epoch": 3.6392479435957696, "grad_norm": 4.958520412445068, "learning_rate": 7.677777777777778e-06, "loss": 0.0847, "step": 1550 }, { "epoch": 3.6980023501762633, "grad_norm": 7.396123886108398, "learning_rate": 7.622222222222223e-06, "loss": 0.1091, "step": 1575 }, { "epoch": 3.756756756756757, "grad_norm": 8.138400077819824, "learning_rate": 7.566666666666667e-06, "loss": 0.1111, "step": 1600 }, { "epoch": 3.756756756756757, "eval_loss": 0.5536873936653137, "eval_runtime": 217.469, "eval_samples_per_second": 2.658, "eval_steps_per_second": 0.667, "eval_wer": 0.3276792598303778, "step": 1600 }, { "epoch": 3.8155111633372503, "grad_norm": 7.745422840118408, "learning_rate": 7.511111111111111e-06, "loss": 0.1031, "step": 1625 }, { "epoch": 3.874265569917744, "grad_norm": 7.68823766708374, "learning_rate": 7.455555555555556e-06, "loss": 0.0919, "step": 1650 }, { "epoch": 3.933019976498237, "grad_norm": 4.057219505310059, "learning_rate": 7.4e-06, "loss": 0.1069, "step": 1675 }, { "epoch": 3.991774383078731, "grad_norm": 7.178942680358887, "learning_rate": 7.344444444444445e-06, "loss": 0.0951, "step": 1700 }, { "epoch": 4.049353701527615, "grad_norm": 5.237444877624512, "learning_rate": 7.28888888888889e-06, "loss": 0.0495, "step": 1725 }, { "epoch": 4.108108108108108, "grad_norm": 7.6568684577941895, "learning_rate": 7.233333333333334e-06, "loss": 0.0546, "step": 1750 }, { "epoch": 4.166862514688602, "grad_norm": 5.4397149085998535, "learning_rate": 7.177777777777778e-06, "loss": 0.0521, "step": 1775 }, { "epoch": 4.225616921269095, "grad_norm": 7.218142032623291, "learning_rate": 7.122222222222222e-06, "loss": 0.0423, "step": 1800 }, { "epoch": 4.225616921269095, "eval_loss": 0.6012063026428223, "eval_runtime": 217.9147, "eval_samples_per_second": 2.652, "eval_steps_per_second": 0.665, "eval_wer": 0.31881264456437935, "step": 1800 }, { "epoch": 4.225616921269095, "step": 1800, "total_flos": 1.468038098976768e+19, "train_loss": 0.6555014891094632, "train_runtime": 4980.8387, "train_samples_per_second": 8.031, "train_steps_per_second": 1.004 } ], "logging_steps": 25, "max_steps": 5000, "num_input_tokens_seen": 0, "num_train_epochs": 12, "save_steps": 200, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.468038098976768e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }