|
{ |
|
"best_metric": 26.439685364873398, |
|
"best_model_checkpoint": "./whisper-ft-2/checkpoint-5000", |
|
"epoch": 1.0, |
|
"eval_steps": 1000, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 26.441381454467773, |
|
"learning_rate": 6.875e-06, |
|
"loss": 5.6669, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 16.109895706176758, |
|
"learning_rate": 9.969806763285025e-06, |
|
"loss": 3.5576, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 17.341266632080078, |
|
"learning_rate": 9.919484702093398e-06, |
|
"loss": 2.3173, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 14.84139347076416, |
|
"learning_rate": 9.869162640901772e-06, |
|
"loss": 1.732, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 13.36408519744873, |
|
"learning_rate": 9.818840579710146e-06, |
|
"loss": 1.4347, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 15.8803071975708, |
|
"learning_rate": 9.768518518518519e-06, |
|
"loss": 1.2313, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 17.588510513305664, |
|
"learning_rate": 9.718196457326893e-06, |
|
"loss": 1.0921, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 14.752327919006348, |
|
"learning_rate": 9.667874396135266e-06, |
|
"loss": 0.9493, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 13.243648529052734, |
|
"learning_rate": 9.61755233494364e-06, |
|
"loss": 0.8546, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 15.082148551940918, |
|
"learning_rate": 9.567230273752013e-06, |
|
"loss": 0.8029, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 7.932609558105469, |
|
"learning_rate": 9.516908212560388e-06, |
|
"loss": 0.7795, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 14.98747444152832, |
|
"learning_rate": 9.46658615136876e-06, |
|
"loss": 0.7094, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 10.299836158752441, |
|
"learning_rate": 9.416264090177135e-06, |
|
"loss": 0.6991, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 9.431194305419922, |
|
"learning_rate": 9.36594202898551e-06, |
|
"loss": 0.6727, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 12.089103698730469, |
|
"learning_rate": 9.315619967793882e-06, |
|
"loss": 0.6613, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 11.374141693115234, |
|
"learning_rate": 9.265297906602255e-06, |
|
"loss": 0.6281, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 10.616567611694336, |
|
"learning_rate": 9.214975845410629e-06, |
|
"loss": 0.5975, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 12.249051094055176, |
|
"learning_rate": 9.164653784219002e-06, |
|
"loss": 0.5734, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 12.755301475524902, |
|
"learning_rate": 9.114331723027376e-06, |
|
"loss": 0.525, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 8.402365684509277, |
|
"learning_rate": 9.06400966183575e-06, |
|
"loss": 0.5021, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 8.66573429107666, |
|
"learning_rate": 9.013687600644123e-06, |
|
"loss": 0.5239, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 11.849873542785645, |
|
"learning_rate": 8.963365539452496e-06, |
|
"loss": 0.4548, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 8.986794471740723, |
|
"learning_rate": 8.91304347826087e-06, |
|
"loss": 0.4634, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 18.423568725585938, |
|
"learning_rate": 8.862721417069245e-06, |
|
"loss": 0.4796, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 7.8966217041015625, |
|
"learning_rate": 8.812399355877618e-06, |
|
"loss": 0.4142, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 8.645583152770996, |
|
"learning_rate": 8.76207729468599e-06, |
|
"loss": 0.438, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 8.235858917236328, |
|
"learning_rate": 8.711755233494365e-06, |
|
"loss": 0.448, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 10.634264945983887, |
|
"learning_rate": 8.661433172302737e-06, |
|
"loss": 0.454, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 7.304662704467773, |
|
"learning_rate": 8.611111111111112e-06, |
|
"loss": 0.441, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 7.09723424911499, |
|
"learning_rate": 8.560789049919486e-06, |
|
"loss": 0.4172, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 11.544793128967285, |
|
"learning_rate": 8.510466988727859e-06, |
|
"loss": 0.4305, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 8.155645370483398, |
|
"learning_rate": 8.460144927536232e-06, |
|
"loss": 0.4273, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 7.164479732513428, |
|
"learning_rate": 8.409822866344606e-06, |
|
"loss": 0.4339, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 8.852354049682617, |
|
"learning_rate": 8.35950080515298e-06, |
|
"loss": 0.4595, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 9.475948333740234, |
|
"learning_rate": 8.309178743961353e-06, |
|
"loss": 0.4104, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 9.111871719360352, |
|
"learning_rate": 8.258856682769728e-06, |
|
"loss": 0.4573, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 8.254072189331055, |
|
"learning_rate": 8.2085346215781e-06, |
|
"loss": 0.5042, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 9.376124382019043, |
|
"learning_rate": 8.158212560386473e-06, |
|
"loss": 0.488, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 8.461128234863281, |
|
"learning_rate": 8.107890499194848e-06, |
|
"loss": 0.4621, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 8.678207397460938, |
|
"learning_rate": 8.057568438003222e-06, |
|
"loss": 0.4632, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 0.5091106295585632, |
|
"eval_runtime": 245.5352, |
|
"eval_samples_per_second": 4.073, |
|
"eval_steps_per_second": 0.509, |
|
"eval_wer": 38.22701957857266, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 8.345236778259277, |
|
"learning_rate": 8.007246376811595e-06, |
|
"loss": 0.4577, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 8.78304386138916, |
|
"learning_rate": 7.956924315619969e-06, |
|
"loss": 0.4378, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 9.15285873413086, |
|
"learning_rate": 7.906602254428342e-06, |
|
"loss": 0.4534, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 10.154984474182129, |
|
"learning_rate": 7.856280193236716e-06, |
|
"loss": 0.4185, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 8.451964378356934, |
|
"learning_rate": 7.805958132045089e-06, |
|
"loss": 0.4098, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 7.336400508880615, |
|
"learning_rate": 7.755636070853463e-06, |
|
"loss": 0.3994, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 8.46390151977539, |
|
"learning_rate": 7.705314009661836e-06, |
|
"loss": 0.4028, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 10.172577857971191, |
|
"learning_rate": 7.654991948470209e-06, |
|
"loss": 0.3936, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 10.022027015686035, |
|
"learning_rate": 7.604669887278584e-06, |
|
"loss": 0.4025, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 10.936142921447754, |
|
"learning_rate": 7.5543478260869576e-06, |
|
"loss": 0.3701, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 8.6853609085083, |
|
"learning_rate": 7.504025764895331e-06, |
|
"loss": 0.3529, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 8.691036224365234, |
|
"learning_rate": 7.453703703703704e-06, |
|
"loss": 0.3525, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 7.8596978187561035, |
|
"learning_rate": 7.403381642512077e-06, |
|
"loss": 0.3481, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 6.271942615509033, |
|
"learning_rate": 7.353059581320452e-06, |
|
"loss": 0.3225, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 7.836112022399902, |
|
"learning_rate": 7.302737520128825e-06, |
|
"loss": 0.3821, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 9.443171501159668, |
|
"learning_rate": 7.252415458937199e-06, |
|
"loss": 0.4297, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 8.32925033569336, |
|
"learning_rate": 7.202093397745572e-06, |
|
"loss": 0.4633, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 10.218457221984863, |
|
"learning_rate": 7.151771336553945e-06, |
|
"loss": 0.4489, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 8.612972259521484, |
|
"learning_rate": 7.10144927536232e-06, |
|
"loss": 0.4022, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 9.206517219543457, |
|
"learning_rate": 7.051127214170693e-06, |
|
"loss": 0.3668, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 10.358244895935059, |
|
"learning_rate": 7.000805152979067e-06, |
|
"loss": 0.4162, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 12.02837085723877, |
|
"learning_rate": 6.95048309178744e-06, |
|
"loss": 0.436, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 10.476570129394531, |
|
"learning_rate": 6.900161030595813e-06, |
|
"loss": 0.3599, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 8.006072044372559, |
|
"learning_rate": 6.8498389694041875e-06, |
|
"loss": 0.3669, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 16.798221588134766, |
|
"learning_rate": 6.799516908212561e-06, |
|
"loss": 0.3632, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 8.768495559692383, |
|
"learning_rate": 6.749194847020935e-06, |
|
"loss": 0.3225, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 10.018780708312988, |
|
"learning_rate": 6.698872785829308e-06, |
|
"loss": 0.325, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 10.418109893798828, |
|
"learning_rate": 6.648550724637681e-06, |
|
"loss": 0.3321, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 6.952179908752441, |
|
"learning_rate": 6.598228663446056e-06, |
|
"loss": 0.3242, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 10.024521827697754, |
|
"learning_rate": 6.547906602254429e-06, |
|
"loss": 0.3291, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 7.440296173095703, |
|
"learning_rate": 6.497584541062802e-06, |
|
"loss": 0.329, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 8.184727668762207, |
|
"learning_rate": 6.447262479871176e-06, |
|
"loss": 0.3228, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 8.101869583129883, |
|
"learning_rate": 6.3969404186795495e-06, |
|
"loss": 0.3222, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 12.450462341308594, |
|
"learning_rate": 6.346618357487924e-06, |
|
"loss": 0.35, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 13.891828536987305, |
|
"learning_rate": 6.296296296296297e-06, |
|
"loss": 0.3659, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 9.327531814575195, |
|
"learning_rate": 6.24597423510467e-06, |
|
"loss": 0.3609, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 7.067734241485596, |
|
"learning_rate": 6.195652173913044e-06, |
|
"loss": 0.3558, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 11.90934944152832, |
|
"learning_rate": 6.145330112721417e-06, |
|
"loss": 0.3312, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 8.565199851989746, |
|
"learning_rate": 6.095008051529792e-06, |
|
"loss": 0.3347, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 6.947161674499512, |
|
"learning_rate": 6.044685990338165e-06, |
|
"loss": 0.2988, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 0.42369821667671204, |
|
"eval_runtime": 243.0792, |
|
"eval_samples_per_second": 4.114, |
|
"eval_steps_per_second": 0.514, |
|
"eval_wer": 32.68071424470345, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 10.25965404510498, |
|
"learning_rate": 5.994363929146538e-06, |
|
"loss": 0.3133, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 12.943211555480957, |
|
"learning_rate": 5.944041867954912e-06, |
|
"loss": 0.3662, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 8.808210372924805, |
|
"learning_rate": 5.893719806763285e-06, |
|
"loss": 0.3933, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 8.858198165893555, |
|
"learning_rate": 5.84339774557166e-06, |
|
"loss": 0.4115, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 8.931889533996582, |
|
"learning_rate": 5.793075684380033e-06, |
|
"loss": 0.3585, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 9.787882804870605, |
|
"learning_rate": 5.742753623188406e-06, |
|
"loss": 0.351, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 8.339137077331543, |
|
"learning_rate": 5.692431561996779e-06, |
|
"loss": 0.3425, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 7.447598457336426, |
|
"learning_rate": 5.642109500805153e-06, |
|
"loss": 0.3567, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 7.393462181091309, |
|
"learning_rate": 5.591787439613527e-06, |
|
"loss": 0.3852, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 9.472132682800293, |
|
"learning_rate": 5.541465378421901e-06, |
|
"loss": 0.3978, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 9.781373023986816, |
|
"learning_rate": 5.4911433172302745e-06, |
|
"loss": 0.3917, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 10.88880443572998, |
|
"learning_rate": 5.440821256038647e-06, |
|
"loss": 0.4059, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 10.626157760620117, |
|
"learning_rate": 5.390499194847021e-06, |
|
"loss": 0.3995, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 10.283743858337402, |
|
"learning_rate": 5.340177133655395e-06, |
|
"loss": 0.4168, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 9.085090637207031, |
|
"learning_rate": 5.289855072463769e-06, |
|
"loss": 0.3932, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 9.050111770629883, |
|
"learning_rate": 5.239533011272142e-06, |
|
"loss": 0.386, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 7.001188278198242, |
|
"learning_rate": 5.189210950080515e-06, |
|
"loss": 0.3305, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 8.82314395904541, |
|
"learning_rate": 5.138888888888889e-06, |
|
"loss": 0.318, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 8.965357780456543, |
|
"learning_rate": 5.088566827697263e-06, |
|
"loss": 0.3568, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 8.673226356506348, |
|
"learning_rate": 5.038244766505637e-06, |
|
"loss": 0.3693, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 9.710456848144531, |
|
"learning_rate": 4.98792270531401e-06, |
|
"loss": 0.4114, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 11.537069320678711, |
|
"learning_rate": 4.937600644122384e-06, |
|
"loss": 0.4652, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 11.736916542053223, |
|
"learning_rate": 4.887278582930757e-06, |
|
"loss": 0.4348, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 8.418719291687012, |
|
"learning_rate": 4.836956521739131e-06, |
|
"loss": 0.3498, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 8.387438774108887, |
|
"learning_rate": 4.786634460547504e-06, |
|
"loss": 0.33, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 7.15954065322876, |
|
"learning_rate": 4.736312399355878e-06, |
|
"loss": 0.3454, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 7.480624675750732, |
|
"learning_rate": 4.6859903381642516e-06, |
|
"loss": 0.3317, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 7.614017963409424, |
|
"learning_rate": 4.635668276972625e-06, |
|
"loss": 0.3202, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 9.156942367553711, |
|
"learning_rate": 4.585346215780999e-06, |
|
"loss": 0.2789, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 10.527360916137695, |
|
"learning_rate": 4.535024154589372e-06, |
|
"loss": 0.2546, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 7.232975959777832, |
|
"learning_rate": 4.484702093397746e-06, |
|
"loss": 0.2724, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 6.251858711242676, |
|
"learning_rate": 4.434380032206119e-06, |
|
"loss": 0.2791, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 8.357397079467773, |
|
"learning_rate": 4.384057971014493e-06, |
|
"loss": 0.307, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 8.629735946655273, |
|
"learning_rate": 4.3337359098228665e-06, |
|
"loss": 0.3721, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 8.04019832611084, |
|
"learning_rate": 4.28341384863124e-06, |
|
"loss": 0.3302, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 10.451481819152832, |
|
"learning_rate": 4.233091787439614e-06, |
|
"loss": 0.3338, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 7.338428020477295, |
|
"learning_rate": 4.182769726247988e-06, |
|
"loss": 0.3381, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 6.68162727355957, |
|
"learning_rate": 4.132447665056361e-06, |
|
"loss": 0.2993, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 7.178064823150635, |
|
"learning_rate": 4.082125603864734e-06, |
|
"loss": 0.2728, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 6.970361709594727, |
|
"learning_rate": 4.031803542673109e-06, |
|
"loss": 0.2715, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 0.3627614378929138, |
|
"eval_runtime": 242.2219, |
|
"eval_samples_per_second": 4.128, |
|
"eval_steps_per_second": 0.516, |
|
"eval_wer": 28.190848022047426, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 7.680355072021484, |
|
"learning_rate": 3.9814814814814814e-06, |
|
"loss": 0.2641, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 7.352539539337158, |
|
"learning_rate": 3.931159420289856e-06, |
|
"loss": 0.2947, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 7.088489055633545, |
|
"learning_rate": 3.880837359098229e-06, |
|
"loss": 0.3204, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 9.058192253112793, |
|
"learning_rate": 3.830515297906602e-06, |
|
"loss": 0.3085, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 10.828897476196289, |
|
"learning_rate": 3.780193236714976e-06, |
|
"loss": 0.2772, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 7.811698913574219, |
|
"learning_rate": 3.7298711755233497e-06, |
|
"loss": 0.2449, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 7.802531719207764, |
|
"learning_rate": 3.6795491143317237e-06, |
|
"loss": 0.2515, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 7.107741832733154, |
|
"learning_rate": 3.629227053140097e-06, |
|
"loss": 0.2964, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 7.613478183746338, |
|
"learning_rate": 3.5789049919484704e-06, |
|
"loss": 0.2901, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 6.808854103088379, |
|
"learning_rate": 3.5285829307568444e-06, |
|
"loss": 0.2764, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 8.42264461517334, |
|
"learning_rate": 3.4782608695652175e-06, |
|
"loss": 0.2439, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 6.853939533233643, |
|
"learning_rate": 3.4279388083735915e-06, |
|
"loss": 0.263, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 7.323888301849365, |
|
"learning_rate": 3.377616747181965e-06, |
|
"loss": 0.3071, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 8.018033027648926, |
|
"learning_rate": 3.327294685990338e-06, |
|
"loss": 0.3161, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 6.292661666870117, |
|
"learning_rate": 3.276972624798712e-06, |
|
"loss": 0.2889, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 7.282548904418945, |
|
"learning_rate": 3.2266505636070853e-06, |
|
"loss": 0.2909, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 9.771224975585938, |
|
"learning_rate": 3.1763285024154593e-06, |
|
"loss": 0.2804, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 6.97499418258667, |
|
"learning_rate": 3.126006441223833e-06, |
|
"loss": 0.3079, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 6.536733627319336, |
|
"learning_rate": 3.075684380032206e-06, |
|
"loss": 0.2727, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 9.328429222106934, |
|
"learning_rate": 3.02536231884058e-06, |
|
"loss": 0.2927, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 10.615299224853516, |
|
"learning_rate": 2.9750402576489536e-06, |
|
"loss": 0.2957, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 8.456421852111816, |
|
"learning_rate": 2.9247181964573276e-06, |
|
"loss": 0.3089, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 7.571265697479248, |
|
"learning_rate": 2.8743961352657007e-06, |
|
"loss": 0.3166, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 7.515317440032959, |
|
"learning_rate": 2.8240740740740743e-06, |
|
"loss": 0.3027, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 9.137214660644531, |
|
"learning_rate": 2.773752012882448e-06, |
|
"loss": 0.2972, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 8.118337631225586, |
|
"learning_rate": 2.7234299516908214e-06, |
|
"loss": 0.3027, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 11.383882522583008, |
|
"learning_rate": 2.6731078904991954e-06, |
|
"loss": 0.3401, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 8.050200462341309, |
|
"learning_rate": 2.6227858293075685e-06, |
|
"loss": 0.3462, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 8.983549118041992, |
|
"learning_rate": 2.572463768115942e-06, |
|
"loss": 0.3346, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 5.890308380126953, |
|
"learning_rate": 2.522141706924316e-06, |
|
"loss": 0.3202, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 7.097532272338867, |
|
"learning_rate": 2.4718196457326892e-06, |
|
"loss": 0.2893, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 8.50672721862793, |
|
"learning_rate": 2.4214975845410628e-06, |
|
"loss": 0.2999, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 6.510353088378906, |
|
"learning_rate": 2.3711755233494368e-06, |
|
"loss": 0.3054, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 7.819746971130371, |
|
"learning_rate": 2.3208534621578103e-06, |
|
"loss": 0.2801, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 9.897961616516113, |
|
"learning_rate": 2.270531400966184e-06, |
|
"loss": 0.2668, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 6.692742347717285, |
|
"learning_rate": 2.220209339774557e-06, |
|
"loss": 0.2632, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 6.9810404777526855, |
|
"learning_rate": 2.169887278582931e-06, |
|
"loss": 0.24, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 6.765547275543213, |
|
"learning_rate": 2.1195652173913046e-06, |
|
"loss": 0.2831, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 8.033112525939941, |
|
"learning_rate": 2.069243156199678e-06, |
|
"loss": 0.316, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 9.17114543914795, |
|
"learning_rate": 2.0189210950080517e-06, |
|
"loss": 0.3315, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 0.3504044711589813, |
|
"eval_runtime": 243.0184, |
|
"eval_samples_per_second": 4.115, |
|
"eval_steps_per_second": 0.514, |
|
"eval_wer": 28.288453809496467, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 8.472304344177246, |
|
"learning_rate": 1.9685990338164253e-06, |
|
"loss": 0.3486, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 8.651259422302246, |
|
"learning_rate": 1.918276972624799e-06, |
|
"loss": 0.3579, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 7.6784772872924805, |
|
"learning_rate": 1.8679549114331724e-06, |
|
"loss": 0.34, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 8.391368865966797, |
|
"learning_rate": 1.817632850241546e-06, |
|
"loss": 0.3065, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 6.728703022003174, |
|
"learning_rate": 1.7673107890499197e-06, |
|
"loss": 0.2958, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 6.842586517333984, |
|
"learning_rate": 1.7169887278582933e-06, |
|
"loss": 0.2566, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 6.050861835479736, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 0.2687, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 9.184779167175293, |
|
"learning_rate": 1.6163446054750404e-06, |
|
"loss": 0.2652, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 7.263152122497559, |
|
"learning_rate": 1.566022544283414e-06, |
|
"loss": 0.2604, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 7.312118053436279, |
|
"learning_rate": 1.5157004830917876e-06, |
|
"loss": 0.2352, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 10.993212699890137, |
|
"learning_rate": 1.4653784219001613e-06, |
|
"loss": 0.253, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 5.131811618804932, |
|
"learning_rate": 1.4150563607085347e-06, |
|
"loss": 0.2723, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 7.325843334197998, |
|
"learning_rate": 1.3647342995169083e-06, |
|
"loss": 0.2579, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 7.593961715698242, |
|
"learning_rate": 1.3144122383252818e-06, |
|
"loss": 0.2666, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 8.811367988586426, |
|
"learning_rate": 1.2640901771336556e-06, |
|
"loss": 0.2799, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 10.823134422302246, |
|
"learning_rate": 1.2137681159420292e-06, |
|
"loss": 0.2999, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 7.269660949707031, |
|
"learning_rate": 1.1634460547504027e-06, |
|
"loss": 0.3029, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 9.235569953918457, |
|
"learning_rate": 1.1131239935587763e-06, |
|
"loss": 0.3232, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 8.534402847290039, |
|
"learning_rate": 1.0628019323671499e-06, |
|
"loss": 0.3101, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 9.410504341125488, |
|
"learning_rate": 1.0124798711755234e-06, |
|
"loss": 0.3178, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 8.4865083694458, |
|
"learning_rate": 9.62157809983897e-07, |
|
"loss": 0.2748, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 7.169732093811035, |
|
"learning_rate": 9.118357487922707e-07, |
|
"loss": 0.281, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 7.270644187927246, |
|
"learning_rate": 8.615136876006441e-07, |
|
"loss": 0.2714, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 6.573908805847168, |
|
"learning_rate": 8.111916264090178e-07, |
|
"loss": 0.278, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 9.489112854003906, |
|
"learning_rate": 7.608695652173914e-07, |
|
"loss": 0.2941, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 6.89200496673584, |
|
"learning_rate": 7.105475040257649e-07, |
|
"loss": 0.2792, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 8.81523323059082, |
|
"learning_rate": 6.602254428341386e-07, |
|
"loss": 0.2586, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 6.394697666168213, |
|
"learning_rate": 6.099033816425121e-07, |
|
"loss": 0.3147, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 7.134545803070068, |
|
"learning_rate": 5.595813204508857e-07, |
|
"loss": 0.3337, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 8.418858528137207, |
|
"learning_rate": 5.092592592592593e-07, |
|
"loss": 0.3001, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 7.931731224060059, |
|
"learning_rate": 4.5893719806763294e-07, |
|
"loss": 0.31, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 8.287907600402832, |
|
"learning_rate": 4.086151368760065e-07, |
|
"loss": 0.2927, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 6.868620872497559, |
|
"learning_rate": 3.5829307568438007e-07, |
|
"loss": 0.2962, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 8.824197769165039, |
|
"learning_rate": 3.079710144927537e-07, |
|
"loss": 0.2618, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 7.290480136871338, |
|
"learning_rate": 2.5764895330112725e-07, |
|
"loss": 0.248, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 5.285947799682617, |
|
"learning_rate": 2.073268921095008e-07, |
|
"loss": 0.2223, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 6.8819169998168945, |
|
"learning_rate": 1.570048309178744e-07, |
|
"loss": 0.2055, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 6.588069915771484, |
|
"learning_rate": 1.0668276972624801e-07, |
|
"loss": 0.2279, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 11.339116096496582, |
|
"learning_rate": 5.6360708534621584e-08, |
|
"loss": 0.2176, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 6.7692036628723145, |
|
"learning_rate": 6.0386473429951695e-09, |
|
"loss": 0.2065, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.3357778787612915, |
|
"eval_runtime": 244.8142, |
|
"eval_samples_per_second": 4.085, |
|
"eval_steps_per_second": 0.511, |
|
"eval_wer": 26.439685364873398, |
|
"step": 5000 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 1000, |
|
"total_flos": 1.9695108096e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|