|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.8866971373586723, |
|
"eval_steps": 4000, |
|
"global_step": 48000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0060139523694972335, |
|
"grad_norm": 2.5106329917907715, |
|
"learning_rate": 4.99007697859033e-05, |
|
"loss": 3.082, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.012027904738994467, |
|
"grad_norm": 2.184039831161499, |
|
"learning_rate": 4.9800537246411676e-05, |
|
"loss": 1.8328, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0180418571084917, |
|
"grad_norm": 2.2055342197418213, |
|
"learning_rate": 4.970030470692006e-05, |
|
"loss": 1.6836, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.024055809477988934, |
|
"grad_norm": 2.0695838928222656, |
|
"learning_rate": 4.960007216742843e-05, |
|
"loss": 1.6098, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.03006976184748617, |
|
"grad_norm": 1.9105968475341797, |
|
"learning_rate": 4.949983962793682e-05, |
|
"loss": 1.516, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0360837142169834, |
|
"grad_norm": 1.882120132446289, |
|
"learning_rate": 4.9399607088445195e-05, |
|
"loss": 1.4876, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.042097666586480634, |
|
"grad_norm": 1.937444806098938, |
|
"learning_rate": 4.9299374548953576e-05, |
|
"loss": 1.4493, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.04811161895597787, |
|
"grad_norm": 2.1085917949676514, |
|
"learning_rate": 4.919914200946195e-05, |
|
"loss": 1.4128, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.0541255713254751, |
|
"grad_norm": 1.9272162914276123, |
|
"learning_rate": 4.909890946997033e-05, |
|
"loss": 1.4105, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.06013952369497234, |
|
"grad_norm": 1.818624496459961, |
|
"learning_rate": 4.899867693047871e-05, |
|
"loss": 1.3912, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.06615347606446957, |
|
"grad_norm": 1.644021987915039, |
|
"learning_rate": 4.8898444390987094e-05, |
|
"loss": 1.3397, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.0721674284339668, |
|
"grad_norm": 1.7671422958374023, |
|
"learning_rate": 4.8798211851495476e-05, |
|
"loss": 1.3873, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.07818138080346404, |
|
"grad_norm": 2.032087564468384, |
|
"learning_rate": 4.869797931200385e-05, |
|
"loss": 1.3112, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.08419533317296127, |
|
"grad_norm": 1.7175960540771484, |
|
"learning_rate": 4.859774677251223e-05, |
|
"loss": 1.3089, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.09020928554245851, |
|
"grad_norm": 1.7282887697219849, |
|
"learning_rate": 4.8497514233020606e-05, |
|
"loss": 1.2983, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09622323791195574, |
|
"grad_norm": 1.836075782775879, |
|
"learning_rate": 4.839728169352899e-05, |
|
"loss": 1.3092, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.10223719028145298, |
|
"grad_norm": 1.931219220161438, |
|
"learning_rate": 4.829704915403737e-05, |
|
"loss": 1.2528, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.1082511426509502, |
|
"grad_norm": 1.9112814664840698, |
|
"learning_rate": 4.819681661454575e-05, |
|
"loss": 1.25, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.11426509502044743, |
|
"grad_norm": 2.1694140434265137, |
|
"learning_rate": 4.809658407505413e-05, |
|
"loss": 1.253, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.12027904738994467, |
|
"grad_norm": 1.9220771789550781, |
|
"learning_rate": 4.7996351535562506e-05, |
|
"loss": 1.2278, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12629299975944192, |
|
"grad_norm": 1.7120180130004883, |
|
"learning_rate": 4.789611899607089e-05, |
|
"loss": 1.2483, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.13230695212893914, |
|
"grad_norm": 1.917104959487915, |
|
"learning_rate": 4.779588645657926e-05, |
|
"loss": 1.2384, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.13832090449843637, |
|
"grad_norm": 2.0323758125305176, |
|
"learning_rate": 4.7695653917087644e-05, |
|
"loss": 1.2292, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.1443348568679336, |
|
"grad_norm": 1.779888391494751, |
|
"learning_rate": 4.7595421377596025e-05, |
|
"loss": 1.2258, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.15034880923743085, |
|
"grad_norm": 1.7667876482009888, |
|
"learning_rate": 4.7495188838104406e-05, |
|
"loss": 1.1875, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.15636276160692808, |
|
"grad_norm": 1.8692760467529297, |
|
"learning_rate": 4.739495629861279e-05, |
|
"loss": 1.2082, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.1623767139764253, |
|
"grad_norm": 1.8578811883926392, |
|
"learning_rate": 4.729472375912116e-05, |
|
"loss": 1.1939, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.16839066634592254, |
|
"grad_norm": 1.8867884874343872, |
|
"learning_rate": 4.7194491219629543e-05, |
|
"loss": 1.2108, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.17440461871541976, |
|
"grad_norm": 1.72930109500885, |
|
"learning_rate": 4.709425868013792e-05, |
|
"loss": 1.1943, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.18041857108491702, |
|
"grad_norm": 1.7612700462341309, |
|
"learning_rate": 4.69940261406463e-05, |
|
"loss": 1.1818, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.18643252345441425, |
|
"grad_norm": 1.6063175201416016, |
|
"learning_rate": 4.689379360115468e-05, |
|
"loss": 1.1926, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.19244647582391147, |
|
"grad_norm": 1.7698125839233398, |
|
"learning_rate": 4.679356106166306e-05, |
|
"loss": 1.2029, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.1984604281934087, |
|
"grad_norm": 1.6680766344070435, |
|
"learning_rate": 4.6693328522171443e-05, |
|
"loss": 1.185, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.20447438056290596, |
|
"grad_norm": 1.7791478633880615, |
|
"learning_rate": 4.659309598267982e-05, |
|
"loss": 1.1663, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.21048833293240318, |
|
"grad_norm": 1.6491518020629883, |
|
"learning_rate": 4.64928634431882e-05, |
|
"loss": 1.1492, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.2165022853019004, |
|
"grad_norm": 1.6312377452850342, |
|
"learning_rate": 4.6392630903696574e-05, |
|
"loss": 1.1696, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.22251623767139764, |
|
"grad_norm": 1.7078893184661865, |
|
"learning_rate": 4.6292398364204955e-05, |
|
"loss": 1.139, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.22853019004089486, |
|
"grad_norm": 2.0885114669799805, |
|
"learning_rate": 4.619216582471334e-05, |
|
"loss": 1.1399, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.23454414241039212, |
|
"grad_norm": 1.8433787822723389, |
|
"learning_rate": 4.609193328522172e-05, |
|
"loss": 1.1376, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.24055809477988935, |
|
"grad_norm": 1.527872085571289, |
|
"learning_rate": 4.59917007457301e-05, |
|
"loss": 1.1094, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.24055809477988935, |
|
"eval_cer": 0.7748897683360889, |
|
"eval_loss": 1.0861139297485352, |
|
"eval_runtime": 5003.1551, |
|
"eval_samples_per_second": 3.326, |
|
"eval_steps_per_second": 0.416, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.24657204714938658, |
|
"grad_norm": 1.4162925481796265, |
|
"learning_rate": 4.5891468206238474e-05, |
|
"loss": 1.1353, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.25258599951888383, |
|
"grad_norm": 1.5863301753997803, |
|
"learning_rate": 4.5791235666746855e-05, |
|
"loss": 1.1257, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.25859995188838103, |
|
"grad_norm": 1.6928209066390991, |
|
"learning_rate": 4.569100312725523e-05, |
|
"loss": 1.1086, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.2646139042578783, |
|
"grad_norm": 1.2880722284317017, |
|
"learning_rate": 4.559077058776362e-05, |
|
"loss": 1.119, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.2706278566273755, |
|
"grad_norm": 1.6384778022766113, |
|
"learning_rate": 4.549053804827199e-05, |
|
"loss": 1.1474, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.27664180899687274, |
|
"grad_norm": 1.7176462411880493, |
|
"learning_rate": 4.5390305508780374e-05, |
|
"loss": 1.0925, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.28265576136637, |
|
"grad_norm": 1.6025973558425903, |
|
"learning_rate": 4.529007296928875e-05, |
|
"loss": 1.112, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.2886697137358672, |
|
"grad_norm": 1.7415896654129028, |
|
"learning_rate": 4.518984042979713e-05, |
|
"loss": 1.1218, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.29468366610536445, |
|
"grad_norm": 1.310402750968933, |
|
"learning_rate": 4.508960789030551e-05, |
|
"loss": 1.0879, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.3006976184748617, |
|
"grad_norm": 1.7458600997924805, |
|
"learning_rate": 4.498937535081389e-05, |
|
"loss": 1.1088, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.3067115708443589, |
|
"grad_norm": 1.8199197053909302, |
|
"learning_rate": 4.4889142811322274e-05, |
|
"loss": 1.0987, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.31272552321385616, |
|
"grad_norm": 1.5931302309036255, |
|
"learning_rate": 4.478891027183065e-05, |
|
"loss": 1.0802, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.31873947558335336, |
|
"grad_norm": 1.5596023797988892, |
|
"learning_rate": 4.468867773233903e-05, |
|
"loss": 1.1009, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.3247534279528506, |
|
"grad_norm": 1.7147523164749146, |
|
"learning_rate": 4.4588445192847404e-05, |
|
"loss": 1.1144, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.33076738032234787, |
|
"grad_norm": 1.7172225713729858, |
|
"learning_rate": 4.4488212653355786e-05, |
|
"loss": 1.0928, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.33678133269184507, |
|
"grad_norm": 1.5675067901611328, |
|
"learning_rate": 4.438798011386417e-05, |
|
"loss": 1.0421, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.3427952850613423, |
|
"grad_norm": 1.755364179611206, |
|
"learning_rate": 4.428774757437255e-05, |
|
"loss": 1.0897, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.3488092374308395, |
|
"grad_norm": 1.5678260326385498, |
|
"learning_rate": 4.418751503488093e-05, |
|
"loss": 1.0703, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.3548231898003368, |
|
"grad_norm": 1.8641386032104492, |
|
"learning_rate": 4.4087282495389304e-05, |
|
"loss": 1.07, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.36083714216983404, |
|
"grad_norm": 1.571837306022644, |
|
"learning_rate": 4.3987049955897686e-05, |
|
"loss": 1.0493, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.36685109453933124, |
|
"grad_norm": 1.5046846866607666, |
|
"learning_rate": 4.388681741640606e-05, |
|
"loss": 1.0935, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.3728650469088285, |
|
"grad_norm": 1.88533353805542, |
|
"learning_rate": 4.378658487691444e-05, |
|
"loss": 1.0801, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.3788789992783257, |
|
"grad_norm": 1.5626581907272339, |
|
"learning_rate": 4.368635233742282e-05, |
|
"loss": 1.0442, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.38489295164782295, |
|
"grad_norm": 1.4634822607040405, |
|
"learning_rate": 4.3586119797931204e-05, |
|
"loss": 1.0552, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.3909069040173202, |
|
"grad_norm": 1.8044239282608032, |
|
"learning_rate": 4.3485887258439586e-05, |
|
"loss": 1.035, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.3969208563868174, |
|
"grad_norm": 1.6868014335632324, |
|
"learning_rate": 4.338565471894796e-05, |
|
"loss": 1.0497, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.40293480875631466, |
|
"grad_norm": 1.6949145793914795, |
|
"learning_rate": 4.328542217945634e-05, |
|
"loss": 1.0369, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.4089487611258119, |
|
"grad_norm": 1.7868014574050903, |
|
"learning_rate": 4.3186191965359634e-05, |
|
"loss": 1.0385, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.4149627134953091, |
|
"grad_norm": 1.550017237663269, |
|
"learning_rate": 4.3085959425868015e-05, |
|
"loss": 1.0326, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.42097666586480637, |
|
"grad_norm": 1.431362509727478, |
|
"learning_rate": 4.298572688637639e-05, |
|
"loss": 1.0154, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.42699061823430356, |
|
"grad_norm": 1.946357011795044, |
|
"learning_rate": 4.288549434688478e-05, |
|
"loss": 1.0532, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.4330045706038008, |
|
"grad_norm": 1.6147353649139404, |
|
"learning_rate": 4.278526180739316e-05, |
|
"loss": 1.0633, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.4390185229732981, |
|
"grad_norm": 1.655043363571167, |
|
"learning_rate": 4.2685029267901534e-05, |
|
"loss": 1.0558, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.4450324753427953, |
|
"grad_norm": 1.6090620756149292, |
|
"learning_rate": 4.2584796728409915e-05, |
|
"loss": 1.0289, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.45104642771229253, |
|
"grad_norm": 1.8263063430786133, |
|
"learning_rate": 4.248456418891829e-05, |
|
"loss": 1.0275, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.45706038008178973, |
|
"grad_norm": 1.6156238317489624, |
|
"learning_rate": 4.238433164942667e-05, |
|
"loss": 0.9976, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.463074332451287, |
|
"grad_norm": 1.4604992866516113, |
|
"learning_rate": 4.228409910993505e-05, |
|
"loss": 1.0607, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.46908828482078424, |
|
"grad_norm": 1.5745257139205933, |
|
"learning_rate": 4.2183866570443434e-05, |
|
"loss": 1.0297, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.47510223719028144, |
|
"grad_norm": 1.852102518081665, |
|
"learning_rate": 4.2083634030951815e-05, |
|
"loss": 1.0173, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.4811161895597787, |
|
"grad_norm": 1.6735659837722778, |
|
"learning_rate": 4.198340149146019e-05, |
|
"loss": 1.0145, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.4811161895597787, |
|
"eval_cer": 0.7706982683703557, |
|
"eval_loss": 0.9832878708839417, |
|
"eval_runtime": 5013.5334, |
|
"eval_samples_per_second": 3.319, |
|
"eval_steps_per_second": 0.415, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.4871301419292759, |
|
"grad_norm": 1.7805702686309814, |
|
"learning_rate": 4.188316895196857e-05, |
|
"loss": 1.0279, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.49314409429877315, |
|
"grad_norm": 1.4825282096862793, |
|
"learning_rate": 4.1782936412476945e-05, |
|
"loss": 1.0124, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.4991580466682704, |
|
"grad_norm": 1.5925073623657227, |
|
"learning_rate": 4.168270387298533e-05, |
|
"loss": 1.0026, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.5051719990377677, |
|
"grad_norm": 1.5965441465377808, |
|
"learning_rate": 4.158247133349371e-05, |
|
"loss": 1.0238, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.5111859514072649, |
|
"grad_norm": 1.5980632305145264, |
|
"learning_rate": 4.148223879400209e-05, |
|
"loss": 0.9909, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.5171999037767621, |
|
"grad_norm": 1.7301024198532104, |
|
"learning_rate": 4.1382006254510464e-05, |
|
"loss": 1.0172, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.5232138561462594, |
|
"grad_norm": 1.6884981393814087, |
|
"learning_rate": 4.1281773715018845e-05, |
|
"loss": 1.0326, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.5292278085157566, |
|
"grad_norm": 1.6702126264572144, |
|
"learning_rate": 4.118154117552723e-05, |
|
"loss": 1.016, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.5352417608852538, |
|
"grad_norm": 1.5463213920593262, |
|
"learning_rate": 4.108231096143052e-05, |
|
"loss": 1.0282, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.541255713254751, |
|
"grad_norm": 1.9977515935897827, |
|
"learning_rate": 4.09820784219389e-05, |
|
"loss": 1.0027, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.5472696656242483, |
|
"grad_norm": 1.4702314138412476, |
|
"learning_rate": 4.088184588244728e-05, |
|
"loss": 1.0138, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.5532836179937455, |
|
"grad_norm": 1.5981062650680542, |
|
"learning_rate": 4.078161334295566e-05, |
|
"loss": 1.0333, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.5592975703632427, |
|
"grad_norm": 1.4159762859344482, |
|
"learning_rate": 4.068138080346404e-05, |
|
"loss": 0.9956, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.56531152273274, |
|
"grad_norm": 1.4920343160629272, |
|
"learning_rate": 4.058114826397242e-05, |
|
"loss": 0.9978, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.5713254751022372, |
|
"grad_norm": 1.7335036993026733, |
|
"learning_rate": 4.048091572448079e-05, |
|
"loss": 1.0337, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.5773394274717344, |
|
"grad_norm": 1.4422550201416016, |
|
"learning_rate": 4.0380683184989175e-05, |
|
"loss": 1.0185, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.5833533798412317, |
|
"grad_norm": 1.776282787322998, |
|
"learning_rate": 4.0280450645497556e-05, |
|
"loss": 0.9941, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.5893673322107289, |
|
"grad_norm": 1.806261420249939, |
|
"learning_rate": 4.018021810600594e-05, |
|
"loss": 0.9772, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.5953812845802261, |
|
"grad_norm": 1.6673755645751953, |
|
"learning_rate": 4.007998556651432e-05, |
|
"loss": 0.9904, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.6013952369497234, |
|
"grad_norm": 1.5652821063995361, |
|
"learning_rate": 3.997975302702269e-05, |
|
"loss": 1.0008, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.6074091893192206, |
|
"grad_norm": 1.568590760231018, |
|
"learning_rate": 3.9879520487531075e-05, |
|
"loss": 1.0402, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.6134231416887178, |
|
"grad_norm": 1.505454659461975, |
|
"learning_rate": 3.977928794803945e-05, |
|
"loss": 1.0009, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.619437094058215, |
|
"grad_norm": 1.4691787958145142, |
|
"learning_rate": 3.967905540854783e-05, |
|
"loss": 1.0115, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.6254510464277123, |
|
"grad_norm": 1.5172970294952393, |
|
"learning_rate": 3.957882286905621e-05, |
|
"loss": 1.0209, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.6314649987972095, |
|
"grad_norm": 1.8390883207321167, |
|
"learning_rate": 3.947859032956459e-05, |
|
"loss": 1.0101, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.6374789511667067, |
|
"grad_norm": 1.5624459981918335, |
|
"learning_rate": 3.9378357790072975e-05, |
|
"loss": 1.0085, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.643492903536204, |
|
"grad_norm": 1.4316383600234985, |
|
"learning_rate": 3.927812525058135e-05, |
|
"loss": 1.0025, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.6495068559057012, |
|
"grad_norm": 1.5541032552719116, |
|
"learning_rate": 3.917789271108973e-05, |
|
"loss": 0.9938, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.6555208082751984, |
|
"grad_norm": 1.7416138648986816, |
|
"learning_rate": 3.907866249699302e-05, |
|
"loss": 0.9923, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.6615347606446957, |
|
"grad_norm": 1.7461607456207275, |
|
"learning_rate": 3.8978429957501404e-05, |
|
"loss": 0.9983, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.6675487130141929, |
|
"grad_norm": 1.5705294609069824, |
|
"learning_rate": 3.887819741800978e-05, |
|
"loss": 0.9809, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.6735626653836901, |
|
"grad_norm": 1.802960753440857, |
|
"learning_rate": 3.877796487851817e-05, |
|
"loss": 0.9675, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.6795766177531873, |
|
"grad_norm": 1.70058274269104, |
|
"learning_rate": 3.867773233902655e-05, |
|
"loss": 0.966, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.6855905701226847, |
|
"grad_norm": 1.5391656160354614, |
|
"learning_rate": 3.857749979953492e-05, |
|
"loss": 0.9412, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.6916045224921819, |
|
"grad_norm": 1.8226732015609741, |
|
"learning_rate": 3.8477267260043304e-05, |
|
"loss": 0.96, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.697618474861679, |
|
"grad_norm": 1.3753610849380493, |
|
"learning_rate": 3.837703472055168e-05, |
|
"loss": 0.9665, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.7036324272311764, |
|
"grad_norm": 1.6595444679260254, |
|
"learning_rate": 3.827680218106006e-05, |
|
"loss": 0.9851, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.7096463796006736, |
|
"grad_norm": 1.6920074224472046, |
|
"learning_rate": 3.817656964156844e-05, |
|
"loss": 0.983, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.7156603319701708, |
|
"grad_norm": 1.4369592666625977, |
|
"learning_rate": 3.807633710207682e-05, |
|
"loss": 0.9418, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.7216742843396681, |
|
"grad_norm": 1.7257956266403198, |
|
"learning_rate": 3.7976104562585204e-05, |
|
"loss": 0.9877, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.7216742843396681, |
|
"eval_cer": 0.7783561907148705, |
|
"eval_loss": 0.9317989349365234, |
|
"eval_runtime": 5220.1413, |
|
"eval_samples_per_second": 3.188, |
|
"eval_steps_per_second": 0.398, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.7276882367091653, |
|
"grad_norm": 1.6520166397094727, |
|
"learning_rate": 3.787587202309358e-05, |
|
"loss": 0.9415, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.7337021890786625, |
|
"grad_norm": 1.7703155279159546, |
|
"learning_rate": 3.777563948360196e-05, |
|
"loss": 0.9653, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.7397161414481598, |
|
"grad_norm": 1.4374829530715942, |
|
"learning_rate": 3.7675406944110334e-05, |
|
"loss": 0.9541, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.745730093817657, |
|
"grad_norm": 1.790893793106079, |
|
"learning_rate": 3.7575174404618716e-05, |
|
"loss": 0.9951, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.7517440461871542, |
|
"grad_norm": 1.6874678134918213, |
|
"learning_rate": 3.74749418651271e-05, |
|
"loss": 0.9704, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.7577579985566514, |
|
"grad_norm": 1.7682390213012695, |
|
"learning_rate": 3.737470932563548e-05, |
|
"loss": 0.9461, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.7637719509261487, |
|
"grad_norm": 1.6911418437957764, |
|
"learning_rate": 3.727447678614386e-05, |
|
"loss": 0.9952, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.7697859032956459, |
|
"grad_norm": 1.727547526359558, |
|
"learning_rate": 3.7174244246652234e-05, |
|
"loss": 0.9442, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.7757998556651431, |
|
"grad_norm": 1.4808145761489868, |
|
"learning_rate": 3.7074011707160616e-05, |
|
"loss": 0.9742, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.7818138080346404, |
|
"grad_norm": 1.876441240310669, |
|
"learning_rate": 3.697377916766899e-05, |
|
"loss": 0.9766, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.7878277604041376, |
|
"grad_norm": 1.4900315999984741, |
|
"learning_rate": 3.687354662817737e-05, |
|
"loss": 0.9612, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.7938417127736348, |
|
"grad_norm": 1.4132503271102905, |
|
"learning_rate": 3.677331408868575e-05, |
|
"loss": 0.9651, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.7998556651431321, |
|
"grad_norm": 1.6306148767471313, |
|
"learning_rate": 3.6673081549194134e-05, |
|
"loss": 0.9368, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.8058696175126293, |
|
"grad_norm": 1.7364792823791504, |
|
"learning_rate": 3.657284900970251e-05, |
|
"loss": 0.9427, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.8118835698821265, |
|
"grad_norm": 1.6632161140441895, |
|
"learning_rate": 3.647261647021089e-05, |
|
"loss": 0.9505, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.8178975222516238, |
|
"grad_norm": 1.5362128019332886, |
|
"learning_rate": 3.637238393071927e-05, |
|
"loss": 0.9692, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.823911474621121, |
|
"grad_norm": 1.3290611505508423, |
|
"learning_rate": 3.6272151391227646e-05, |
|
"loss": 0.9585, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.8299254269906182, |
|
"grad_norm": 1.6647266149520874, |
|
"learning_rate": 3.617191885173603e-05, |
|
"loss": 0.9725, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.8359393793601154, |
|
"grad_norm": 1.736165165901184, |
|
"learning_rate": 3.607168631224441e-05, |
|
"loss": 0.9611, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.8419533317296127, |
|
"grad_norm": 1.6238832473754883, |
|
"learning_rate": 3.597145377275279e-05, |
|
"loss": 0.9698, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.8479672840991099, |
|
"grad_norm": 1.741194486618042, |
|
"learning_rate": 3.5871221233261165e-05, |
|
"loss": 0.9338, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.8539812364686071, |
|
"grad_norm": 1.7224496603012085, |
|
"learning_rate": 3.5770988693769546e-05, |
|
"loss": 0.9715, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.8599951888381044, |
|
"grad_norm": 1.872253179550171, |
|
"learning_rate": 3.5671758479672845e-05, |
|
"loss": 0.9656, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.8660091412076016, |
|
"grad_norm": 1.5153071880340576, |
|
"learning_rate": 3.557152594018122e-05, |
|
"loss": 0.984, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.8720230935770988, |
|
"grad_norm": 1.667662262916565, |
|
"learning_rate": 3.54712934006896e-05, |
|
"loss": 0.9386, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.8780370459465962, |
|
"grad_norm": 1.7471551895141602, |
|
"learning_rate": 3.537106086119798e-05, |
|
"loss": 0.9486, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.8840509983160934, |
|
"grad_norm": 1.5477312803268433, |
|
"learning_rate": 3.5270828321706364e-05, |
|
"loss": 0.9451, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.8900649506855906, |
|
"grad_norm": 1.5079952478408813, |
|
"learning_rate": 3.517059578221474e-05, |
|
"loss": 0.9381, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.8960789030550877, |
|
"grad_norm": 1.5696821212768555, |
|
"learning_rate": 3.507036324272312e-05, |
|
"loss": 0.9418, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.9020928554245851, |
|
"grad_norm": 1.552612066268921, |
|
"learning_rate": 3.4970130703231494e-05, |
|
"loss": 0.9596, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.9081068077940823, |
|
"grad_norm": 1.7802802324295044, |
|
"learning_rate": 3.4869898163739876e-05, |
|
"loss": 0.9298, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.9141207601635795, |
|
"grad_norm": 1.5589861869812012, |
|
"learning_rate": 3.476966562424826e-05, |
|
"loss": 0.9621, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.9201347125330768, |
|
"grad_norm": 1.3050284385681152, |
|
"learning_rate": 3.466943308475664e-05, |
|
"loss": 0.9315, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.926148664902574, |
|
"grad_norm": 1.5091936588287354, |
|
"learning_rate": 3.456920054526502e-05, |
|
"loss": 0.9107, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.9321626172720712, |
|
"grad_norm": 1.5565592050552368, |
|
"learning_rate": 3.4468968005773394e-05, |
|
"loss": 0.9552, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.9381765696415685, |
|
"grad_norm": 1.7788596153259277, |
|
"learning_rate": 3.4368735466281775e-05, |
|
"loss": 0.9459, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.9441905220110657, |
|
"grad_norm": 1.8757565021514893, |
|
"learning_rate": 3.426850292679015e-05, |
|
"loss": 0.9254, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.9502044743805629, |
|
"grad_norm": 1.6978222131729126, |
|
"learning_rate": 3.416827038729853e-05, |
|
"loss": 0.9083, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.9562184267500602, |
|
"grad_norm": 1.6449016332626343, |
|
"learning_rate": 3.406803784780692e-05, |
|
"loss": 0.9767, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.9622323791195574, |
|
"grad_norm": 1.8677603006362915, |
|
"learning_rate": 3.3967805308315294e-05, |
|
"loss": 0.9469, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.9622323791195574, |
|
"eval_cer": 0.7716098871709525, |
|
"eval_loss": 0.8905403017997742, |
|
"eval_runtime": 5089.7823, |
|
"eval_samples_per_second": 3.269, |
|
"eval_steps_per_second": 0.409, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.9682463314890546, |
|
"grad_norm": 1.6178405284881592, |
|
"learning_rate": 3.3867572768823675e-05, |
|
"loss": 0.9441, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.9742602838585518, |
|
"grad_norm": 1.7407509088516235, |
|
"learning_rate": 3.376734022933205e-05, |
|
"loss": 0.9264, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.9802742362280491, |
|
"grad_norm": 1.5831618309020996, |
|
"learning_rate": 3.366710768984043e-05, |
|
"loss": 0.9583, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.9862881885975463, |
|
"grad_norm": 1.6853969097137451, |
|
"learning_rate": 3.3566875150348806e-05, |
|
"loss": 0.9226, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.9923021409670435, |
|
"grad_norm": 1.8264875411987305, |
|
"learning_rate": 3.3466642610857194e-05, |
|
"loss": 0.9492, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.9983160933365408, |
|
"grad_norm": 1.5579068660736084, |
|
"learning_rate": 3.336641007136557e-05, |
|
"loss": 0.8872, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 1.004330045706038, |
|
"grad_norm": 1.7848278284072876, |
|
"learning_rate": 3.326617753187395e-05, |
|
"loss": 0.8717, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 1.0103439980755353, |
|
"grad_norm": 1.5830621719360352, |
|
"learning_rate": 3.316694731777725e-05, |
|
"loss": 0.9032, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 1.0163579504450324, |
|
"grad_norm": 1.546217441558838, |
|
"learning_rate": 3.3066714778285623e-05, |
|
"loss": 0.9418, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 1.0223719028145297, |
|
"grad_norm": 1.4024384021759033, |
|
"learning_rate": 3.2966482238794005e-05, |
|
"loss": 0.9075, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.028385855184027, |
|
"grad_norm": 1.6523361206054688, |
|
"learning_rate": 3.286624969930238e-05, |
|
"loss": 0.9088, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 1.0343998075535241, |
|
"grad_norm": 1.4941192865371704, |
|
"learning_rate": 3.276601715981076e-05, |
|
"loss": 0.8703, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 1.0404137599230214, |
|
"grad_norm": 1.6586402654647827, |
|
"learning_rate": 3.266578462031914e-05, |
|
"loss": 0.9171, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 1.0464277122925187, |
|
"grad_norm": 1.5614475011825562, |
|
"learning_rate": 3.2565552080827523e-05, |
|
"loss": 0.8819, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 1.0524416646620158, |
|
"grad_norm": 1.6588680744171143, |
|
"learning_rate": 3.2465319541335905e-05, |
|
"loss": 0.9165, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.0584556170315131, |
|
"grad_norm": 1.4571171998977661, |
|
"learning_rate": 3.236508700184428e-05, |
|
"loss": 0.9056, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 1.0644695694010102, |
|
"grad_norm": 1.7484580278396606, |
|
"learning_rate": 3.226485446235266e-05, |
|
"loss": 0.9072, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 1.0704835217705075, |
|
"grad_norm": 1.388741374015808, |
|
"learning_rate": 3.2164621922861035e-05, |
|
"loss": 0.8645, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 1.0764974741400049, |
|
"grad_norm": 1.5871518850326538, |
|
"learning_rate": 3.206438938336942e-05, |
|
"loss": 0.908, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 1.0825114265095022, |
|
"grad_norm": 1.4603219032287598, |
|
"learning_rate": 3.19641568438778e-05, |
|
"loss": 0.8714, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.0885253788789993, |
|
"grad_norm": 1.443608283996582, |
|
"learning_rate": 3.186392430438618e-05, |
|
"loss": 0.8879, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 1.0945393312484966, |
|
"grad_norm": 1.4648326635360718, |
|
"learning_rate": 3.1763691764894554e-05, |
|
"loss": 0.923, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 1.1005532836179936, |
|
"grad_norm": 1.9082708358764648, |
|
"learning_rate": 3.1663459225402935e-05, |
|
"loss": 0.8898, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 1.106567235987491, |
|
"grad_norm": 1.740161418914795, |
|
"learning_rate": 3.1563226685911317e-05, |
|
"loss": 0.8913, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 1.1125811883569883, |
|
"grad_norm": 1.4581352472305298, |
|
"learning_rate": 3.146299414641969e-05, |
|
"loss": 0.9202, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.1185951407264854, |
|
"grad_norm": 1.5199153423309326, |
|
"learning_rate": 3.136276160692808e-05, |
|
"loss": 0.9032, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 1.1246090930959827, |
|
"grad_norm": 1.4630061388015747, |
|
"learning_rate": 3.1262529067436454e-05, |
|
"loss": 0.8771, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 1.13062304546548, |
|
"grad_norm": 1.7790659666061401, |
|
"learning_rate": 3.1162296527944835e-05, |
|
"loss": 0.8673, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 1.136636997834977, |
|
"grad_norm": 1.610372543334961, |
|
"learning_rate": 3.106206398845321e-05, |
|
"loss": 0.9019, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 1.1426509502044744, |
|
"grad_norm": 1.847385048866272, |
|
"learning_rate": 3.096183144896159e-05, |
|
"loss": 0.8907, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.1486649025739717, |
|
"grad_norm": 1.664432168006897, |
|
"learning_rate": 3.086159890946997e-05, |
|
"loss": 0.8745, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 1.1546788549434688, |
|
"grad_norm": 1.5932984352111816, |
|
"learning_rate": 3.0761366369978354e-05, |
|
"loss": 0.8697, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 1.160692807312966, |
|
"grad_norm": 1.4511469602584839, |
|
"learning_rate": 3.0662136155881646e-05, |
|
"loss": 0.8748, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 1.1667067596824634, |
|
"grad_norm": 1.9143450260162354, |
|
"learning_rate": 3.056190361639003e-05, |
|
"loss": 0.9045, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 1.1727207120519605, |
|
"grad_norm": 1.3927329778671265, |
|
"learning_rate": 3.046167107689841e-05, |
|
"loss": 0.8927, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.1787346644214578, |
|
"grad_norm": 1.8178434371948242, |
|
"learning_rate": 3.0361438537406783e-05, |
|
"loss": 0.9258, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 1.1847486167909551, |
|
"grad_norm": 1.6006417274475098, |
|
"learning_rate": 3.0261205997915165e-05, |
|
"loss": 0.886, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 1.1907625691604522, |
|
"grad_norm": 1.6698856353759766, |
|
"learning_rate": 3.0160973458423543e-05, |
|
"loss": 0.9188, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 1.1967765215299495, |
|
"grad_norm": 1.5293818712234497, |
|
"learning_rate": 3.0060740918931924e-05, |
|
"loss": 0.896, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 1.2027904738994466, |
|
"grad_norm": 1.6966098546981812, |
|
"learning_rate": 2.9960508379440305e-05, |
|
"loss": 0.8677, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.2027904738994466, |
|
"eval_cer": 0.7689350483770306, |
|
"eval_loss": 0.8620118498802185, |
|
"eval_runtime": 5134.4813, |
|
"eval_samples_per_second": 3.241, |
|
"eval_steps_per_second": 0.405, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.208804426268944, |
|
"grad_norm": 1.6066211462020874, |
|
"learning_rate": 2.9860275839948683e-05, |
|
"loss": 0.8648, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 1.2148183786384412, |
|
"grad_norm": 1.4755396842956543, |
|
"learning_rate": 2.9760043300457065e-05, |
|
"loss": 0.8683, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 1.2208323310079385, |
|
"grad_norm": 1.3547738790512085, |
|
"learning_rate": 2.965981076096544e-05, |
|
"loss": 0.8786, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 1.2268462833774356, |
|
"grad_norm": 1.6254231929779053, |
|
"learning_rate": 2.955957822147382e-05, |
|
"loss": 0.8758, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 1.232860235746933, |
|
"grad_norm": 1.5725833177566528, |
|
"learning_rate": 2.94593456819822e-05, |
|
"loss": 0.8831, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.23887418811643, |
|
"grad_norm": 1.6321443319320679, |
|
"learning_rate": 2.935911314249058e-05, |
|
"loss": 0.868, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 1.2448881404859273, |
|
"grad_norm": 1.544110894203186, |
|
"learning_rate": 2.925888060299896e-05, |
|
"loss": 0.9033, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 1.2509020928554246, |
|
"grad_norm": 1.5690948963165283, |
|
"learning_rate": 2.915864806350734e-05, |
|
"loss": 0.8736, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 1.256916045224922, |
|
"grad_norm": 1.3950625658035278, |
|
"learning_rate": 2.905841552401572e-05, |
|
"loss": 0.9012, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 1.262929997594419, |
|
"grad_norm": 1.4699276685714722, |
|
"learning_rate": 2.8958182984524095e-05, |
|
"loss": 0.8545, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.2689439499639164, |
|
"grad_norm": 1.607750654220581, |
|
"learning_rate": 2.8857950445032476e-05, |
|
"loss": 0.9101, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 1.2749579023334134, |
|
"grad_norm": 2.1910347938537598, |
|
"learning_rate": 2.8757717905540854e-05, |
|
"loss": 0.8803, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 1.2809718547029108, |
|
"grad_norm": 1.604390025138855, |
|
"learning_rate": 2.8657485366049236e-05, |
|
"loss": 0.8751, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 1.286985807072408, |
|
"grad_norm": 1.5971423387527466, |
|
"learning_rate": 2.8557252826557617e-05, |
|
"loss": 0.8543, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 1.2929997594419051, |
|
"grad_norm": 1.6325972080230713, |
|
"learning_rate": 2.8457020287065995e-05, |
|
"loss": 0.8612, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.2990137118114025, |
|
"grad_norm": 1.7952935695648193, |
|
"learning_rate": 2.8356787747574376e-05, |
|
"loss": 0.8399, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 1.3050276641808995, |
|
"grad_norm": 1.683236002922058, |
|
"learning_rate": 2.825655520808275e-05, |
|
"loss": 0.8585, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 1.3110416165503969, |
|
"grad_norm": 1.630666971206665, |
|
"learning_rate": 2.8156322668591136e-05, |
|
"loss": 0.8891, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 1.3170555689198942, |
|
"grad_norm": 1.7404346466064453, |
|
"learning_rate": 2.805609012909951e-05, |
|
"loss": 0.8514, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 1.3230695212893915, |
|
"grad_norm": 1.4314298629760742, |
|
"learning_rate": 2.795585758960789e-05, |
|
"loss": 0.8561, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.3290834736588886, |
|
"grad_norm": 1.7691779136657715, |
|
"learning_rate": 2.785562505011627e-05, |
|
"loss": 0.8474, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 1.3350974260283859, |
|
"grad_norm": 1.7155267000198364, |
|
"learning_rate": 2.775539251062465e-05, |
|
"loss": 0.8289, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 1.341111378397883, |
|
"grad_norm": 1.7087023258209229, |
|
"learning_rate": 2.7655159971133032e-05, |
|
"loss": 0.8544, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 1.3471253307673803, |
|
"grad_norm": 1.617749571800232, |
|
"learning_rate": 2.755492743164141e-05, |
|
"loss": 0.8781, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 1.3531392831368776, |
|
"grad_norm": 1.6493247747421265, |
|
"learning_rate": 2.745469489214979e-05, |
|
"loss": 0.8392, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.359153235506375, |
|
"grad_norm": 1.809634804725647, |
|
"learning_rate": 2.7354462352658166e-05, |
|
"loss": 0.8721, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 1.365167187875872, |
|
"grad_norm": 1.3698049783706665, |
|
"learning_rate": 2.7254229813166547e-05, |
|
"loss": 0.8533, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 1.3711811402453693, |
|
"grad_norm": 1.7568131685256958, |
|
"learning_rate": 2.7153997273674925e-05, |
|
"loss": 0.886, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 1.3771950926148664, |
|
"grad_norm": 1.867412805557251, |
|
"learning_rate": 2.7053764734183307e-05, |
|
"loss": 0.8637, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 1.3832090449843637, |
|
"grad_norm": 2.0730977058410645, |
|
"learning_rate": 2.6953532194691688e-05, |
|
"loss": 0.8626, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.389222997353861, |
|
"grad_norm": 1.8011558055877686, |
|
"learning_rate": 2.6853299655200066e-05, |
|
"loss": 0.8784, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 1.3952369497233583, |
|
"grad_norm": 1.6936458349227905, |
|
"learning_rate": 2.6753067115708447e-05, |
|
"loss": 0.8634, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 1.4012509020928554, |
|
"grad_norm": 1.7492289543151855, |
|
"learning_rate": 2.665383690161174e-05, |
|
"loss": 0.8678, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 1.4072648544623527, |
|
"grad_norm": 1.8972880840301514, |
|
"learning_rate": 2.655360436212012e-05, |
|
"loss": 0.8939, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 1.4132788068318498, |
|
"grad_norm": 1.6961406469345093, |
|
"learning_rate": 2.64533718226285e-05, |
|
"loss": 0.8727, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 1.4192927592013471, |
|
"grad_norm": 1.583854079246521, |
|
"learning_rate": 2.635313928313688e-05, |
|
"loss": 0.8332, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 1.4253067115708444, |
|
"grad_norm": 1.6541253328323364, |
|
"learning_rate": 2.6252906743645255e-05, |
|
"loss": 0.8798, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 1.4313206639403415, |
|
"grad_norm": 1.7607979774475098, |
|
"learning_rate": 2.6152674204153636e-05, |
|
"loss": 0.8472, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 1.4373346163098388, |
|
"grad_norm": 1.5591400861740112, |
|
"learning_rate": 2.605244166466202e-05, |
|
"loss": 0.8355, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 1.4433485686793361, |
|
"grad_norm": 1.4700669050216675, |
|
"learning_rate": 2.5952209125170395e-05, |
|
"loss": 0.8521, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.4433485686793361, |
|
"eval_cer": 0.7597912847133936, |
|
"eval_loss": 0.8321590423583984, |
|
"eval_runtime": 5084.7673, |
|
"eval_samples_per_second": 3.273, |
|
"eval_steps_per_second": 0.409, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.4493625210488332, |
|
"grad_norm": 1.4155552387237549, |
|
"learning_rate": 2.5851976585678777e-05, |
|
"loss": 0.8797, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 1.4553764734183305, |
|
"grad_norm": 1.4764596223831177, |
|
"learning_rate": 2.5751744046187155e-05, |
|
"loss": 0.8657, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 1.4613904257878279, |
|
"grad_norm": 1.463333010673523, |
|
"learning_rate": 2.5651511506695536e-05, |
|
"loss": 0.8746, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 1.467404378157325, |
|
"grad_norm": 1.5392202138900757, |
|
"learning_rate": 2.555127896720391e-05, |
|
"loss": 0.8512, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 1.4734183305268223, |
|
"grad_norm": 1.8480241298675537, |
|
"learning_rate": 2.5451046427712295e-05, |
|
"loss": 0.8562, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 1.4794322828963193, |
|
"grad_norm": 1.7533873319625854, |
|
"learning_rate": 2.5350813888220677e-05, |
|
"loss": 0.8534, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 1.4854462352658167, |
|
"grad_norm": 1.6647679805755615, |
|
"learning_rate": 2.525058134872905e-05, |
|
"loss": 0.8335, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 1.491460187635314, |
|
"grad_norm": 1.8899763822555542, |
|
"learning_rate": 2.5150348809237433e-05, |
|
"loss": 0.8486, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 1.4974741400048113, |
|
"grad_norm": 1.7569955587387085, |
|
"learning_rate": 2.505011626974581e-05, |
|
"loss": 0.8415, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 1.5034880923743084, |
|
"grad_norm": 1.5141854286193848, |
|
"learning_rate": 2.4949883730254192e-05, |
|
"loss": 0.8107, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.5095020447438057, |
|
"grad_norm": 1.8239057064056396, |
|
"learning_rate": 2.484965119076257e-05, |
|
"loss": 0.8841, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 1.5155159971133028, |
|
"grad_norm": 1.433118224143982, |
|
"learning_rate": 2.474941865127095e-05, |
|
"loss": 0.8408, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 1.5215299494828, |
|
"grad_norm": 1.6874032020568848, |
|
"learning_rate": 2.4650188437174247e-05, |
|
"loss": 0.8387, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 1.5275439018522974, |
|
"grad_norm": 1.4172905683517456, |
|
"learning_rate": 2.4549955897682625e-05, |
|
"loss": 0.8357, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 1.5335578542217947, |
|
"grad_norm": 1.4848599433898926, |
|
"learning_rate": 2.4449723358191003e-05, |
|
"loss": 0.8451, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 1.5395718065912918, |
|
"grad_norm": 1.4670342206954956, |
|
"learning_rate": 2.4349490818699384e-05, |
|
"loss": 0.8505, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 1.5455857589607889, |
|
"grad_norm": 1.5914552211761475, |
|
"learning_rate": 2.4249258279207762e-05, |
|
"loss": 0.8635, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 1.5515997113302862, |
|
"grad_norm": 1.5550841093063354, |
|
"learning_rate": 2.414902573971614e-05, |
|
"loss": 0.8763, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 1.5576136636997835, |
|
"grad_norm": 1.5907316207885742, |
|
"learning_rate": 2.4048793200224525e-05, |
|
"loss": 0.8664, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 1.5636276160692808, |
|
"grad_norm": 1.4494388103485107, |
|
"learning_rate": 2.3948560660732903e-05, |
|
"loss": 0.819, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.5696415684387781, |
|
"grad_norm": 1.599004864692688, |
|
"learning_rate": 2.384832812124128e-05, |
|
"loss": 0.8363, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 1.5756555208082752, |
|
"grad_norm": 1.887817621231079, |
|
"learning_rate": 2.3748095581749662e-05, |
|
"loss": 0.8845, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 1.5816694731777723, |
|
"grad_norm": 1.3124005794525146, |
|
"learning_rate": 2.364786304225804e-05, |
|
"loss": 0.858, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 1.5876834255472696, |
|
"grad_norm": 1.6560554504394531, |
|
"learning_rate": 2.3547630502766418e-05, |
|
"loss": 0.8553, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 1.593697377916767, |
|
"grad_norm": 1.5678675174713135, |
|
"learning_rate": 2.34473979632748e-05, |
|
"loss": 0.8488, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 1.5997113302862642, |
|
"grad_norm": 1.4168376922607422, |
|
"learning_rate": 2.334716542378318e-05, |
|
"loss": 0.8263, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 1.6057252826557615, |
|
"grad_norm": 1.6189205646514893, |
|
"learning_rate": 2.324693288429156e-05, |
|
"loss": 0.8489, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 1.6117392350252586, |
|
"grad_norm": 1.7498302459716797, |
|
"learning_rate": 2.3146700344799936e-05, |
|
"loss": 0.8069, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 1.6177531873947557, |
|
"grad_norm": 1.5609160661697388, |
|
"learning_rate": 2.3046467805308318e-05, |
|
"loss": 0.8337, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 1.623767139764253, |
|
"grad_norm": 1.7673338651657104, |
|
"learning_rate": 2.2946235265816696e-05, |
|
"loss": 0.8264, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.6297810921337503, |
|
"grad_norm": 1.593299150466919, |
|
"learning_rate": 2.2846002726325074e-05, |
|
"loss": 0.828, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 1.6357950445032476, |
|
"grad_norm": 1.5802645683288574, |
|
"learning_rate": 2.274677251222837e-05, |
|
"loss": 0.8428, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 1.6418089968727447, |
|
"grad_norm": 1.7584878206253052, |
|
"learning_rate": 2.264653997273675e-05, |
|
"loss": 0.8378, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 1.647822949242242, |
|
"grad_norm": 1.5360692739486694, |
|
"learning_rate": 2.254630743324513e-05, |
|
"loss": 0.8288, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 1.6538369016117391, |
|
"grad_norm": 1.5635976791381836, |
|
"learning_rate": 2.244607489375351e-05, |
|
"loss": 0.8208, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 1.6598508539812364, |
|
"grad_norm": 1.778735876083374, |
|
"learning_rate": 2.2345842354261888e-05, |
|
"loss": 0.847, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 1.6658648063507338, |
|
"grad_norm": 1.5961335897445679, |
|
"learning_rate": 2.224560981477027e-05, |
|
"loss": 0.8059, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 1.671878758720231, |
|
"grad_norm": 1.396517038345337, |
|
"learning_rate": 2.2145377275278647e-05, |
|
"loss": 0.8406, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 1.6778927110897282, |
|
"grad_norm": 1.554319977760315, |
|
"learning_rate": 2.2045144735787025e-05, |
|
"loss": 0.836, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 1.6839066634592252, |
|
"grad_norm": 1.5663318634033203, |
|
"learning_rate": 2.1944912196295406e-05, |
|
"loss": 0.8386, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.6839066634592252, |
|
"eval_cer": 0.7604478286805494, |
|
"eval_loss": 0.8103429079055786, |
|
"eval_runtime": 5152.8312, |
|
"eval_samples_per_second": 3.229, |
|
"eval_steps_per_second": 0.404, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.6899206158287225, |
|
"grad_norm": 1.7991820573806763, |
|
"learning_rate": 2.1844679656803784e-05, |
|
"loss": 0.8428, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 1.6959345681982199, |
|
"grad_norm": 1.6566849946975708, |
|
"learning_rate": 2.1744447117312162e-05, |
|
"loss": 0.8331, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 1.7019485205677172, |
|
"grad_norm": 1.650564432144165, |
|
"learning_rate": 2.1644214577820547e-05, |
|
"loss": 0.8379, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 1.7079624729372145, |
|
"grad_norm": 1.6355592012405396, |
|
"learning_rate": 2.1543982038328925e-05, |
|
"loss": 0.8576, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 1.7139764253067116, |
|
"grad_norm": 1.7112095355987549, |
|
"learning_rate": 2.1443749498837303e-05, |
|
"loss": 0.8173, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 1.7199903776762087, |
|
"grad_norm": 1.7781462669372559, |
|
"learning_rate": 2.1343516959345684e-05, |
|
"loss": 0.8292, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 1.726004330045706, |
|
"grad_norm": 1.708770513534546, |
|
"learning_rate": 2.1243284419854062e-05, |
|
"loss": 0.8312, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 1.7320182824152033, |
|
"grad_norm": 1.869710087776184, |
|
"learning_rate": 2.114305188036244e-05, |
|
"loss": 0.8529, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 1.7380322347847006, |
|
"grad_norm": 1.4506940841674805, |
|
"learning_rate": 2.104281934087082e-05, |
|
"loss": 0.8454, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 1.744046187154198, |
|
"grad_norm": 1.5264636278152466, |
|
"learning_rate": 2.0942586801379203e-05, |
|
"loss": 0.8281, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.750060139523695, |
|
"grad_norm": 1.9614264965057373, |
|
"learning_rate": 2.084235426188758e-05, |
|
"loss": 0.8328, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 1.756074091893192, |
|
"grad_norm": 1.4223591089248657, |
|
"learning_rate": 2.074212172239596e-05, |
|
"loss": 0.8142, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 1.7620880442626894, |
|
"grad_norm": 1.6863304376602173, |
|
"learning_rate": 2.064188918290434e-05, |
|
"loss": 0.8306, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 1.7681019966321867, |
|
"grad_norm": 1.5096672773361206, |
|
"learning_rate": 2.0541656643412718e-05, |
|
"loss": 0.7964, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 1.774115949001684, |
|
"grad_norm": 1.9181997776031494, |
|
"learning_rate": 2.0441424103921096e-05, |
|
"loss": 0.8221, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 1.780129901371181, |
|
"grad_norm": 1.9824730157852173, |
|
"learning_rate": 2.0341191564429478e-05, |
|
"loss": 0.8379, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 1.7861438537406784, |
|
"grad_norm": 1.4306327104568481, |
|
"learning_rate": 2.0240959024937855e-05, |
|
"loss": 0.805, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 1.7921578061101755, |
|
"grad_norm": 1.6249910593032837, |
|
"learning_rate": 2.0140726485446237e-05, |
|
"loss": 0.8145, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 1.7981717584796728, |
|
"grad_norm": 1.551161766052246, |
|
"learning_rate": 2.0040493945954618e-05, |
|
"loss": 0.8436, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 1.8041857108491701, |
|
"grad_norm": 1.5218690633773804, |
|
"learning_rate": 1.9940261406462996e-05, |
|
"loss": 0.8181, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.8101996632186674, |
|
"grad_norm": 1.5932899713516235, |
|
"learning_rate": 1.9840028866971374e-05, |
|
"loss": 0.8478, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 1.8162136155881645, |
|
"grad_norm": 1.4991642236709595, |
|
"learning_rate": 1.9739796327479752e-05, |
|
"loss": 0.8254, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 1.8222275679576616, |
|
"grad_norm": 1.647438883781433, |
|
"learning_rate": 1.9639563787988133e-05, |
|
"loss": 0.8342, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 1.828241520327159, |
|
"grad_norm": 1.6653351783752441, |
|
"learning_rate": 1.953933124849651e-05, |
|
"loss": 0.8249, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 1.8342554726966562, |
|
"grad_norm": 1.6969921588897705, |
|
"learning_rate": 1.943909870900489e-05, |
|
"loss": 0.8544, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 1.8402694250661535, |
|
"grad_norm": 1.7201420068740845, |
|
"learning_rate": 1.9338866169513274e-05, |
|
"loss": 0.8323, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 1.8462833774356509, |
|
"grad_norm": 1.7479013204574585, |
|
"learning_rate": 1.9238633630021652e-05, |
|
"loss": 0.8307, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 1.852297329805148, |
|
"grad_norm": 1.6966118812561035, |
|
"learning_rate": 1.913840109053003e-05, |
|
"loss": 0.8291, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 1.858311282174645, |
|
"grad_norm": 1.6381275653839111, |
|
"learning_rate": 1.903816855103841e-05, |
|
"loss": 0.8121, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 1.8643252345441423, |
|
"grad_norm": 1.4601831436157227, |
|
"learning_rate": 1.893793601154679e-05, |
|
"loss": 0.8174, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.8703391869136397, |
|
"grad_norm": 1.8310879468917847, |
|
"learning_rate": 1.8837703472055167e-05, |
|
"loss": 0.8023, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 1.876353139283137, |
|
"grad_norm": 1.4689913988113403, |
|
"learning_rate": 1.8738473257958463e-05, |
|
"loss": 0.8035, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 1.8823670916526343, |
|
"grad_norm": 1.4330099821090698, |
|
"learning_rate": 1.8638240718466844e-05, |
|
"loss": 0.8185, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 1.8883810440221314, |
|
"grad_norm": 1.8419737815856934, |
|
"learning_rate": 1.8538008178975225e-05, |
|
"loss": 0.8144, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 1.8943949963916284, |
|
"grad_norm": 1.2664531469345093, |
|
"learning_rate": 1.8437775639483603e-05, |
|
"loss": 0.816, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 1.9004089487611258, |
|
"grad_norm": 1.6704432964324951, |
|
"learning_rate": 1.833754309999198e-05, |
|
"loss": 0.817, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 1.906422901130623, |
|
"grad_norm": 1.7487777471542358, |
|
"learning_rate": 1.8237310560500363e-05, |
|
"loss": 0.8041, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 1.9124368535001204, |
|
"grad_norm": 1.2405571937561035, |
|
"learning_rate": 1.813707802100874e-05, |
|
"loss": 0.825, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 1.9184508058696175, |
|
"grad_norm": 1.543731689453125, |
|
"learning_rate": 1.803684548151712e-05, |
|
"loss": 0.8147, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 1.9244647582391148, |
|
"grad_norm": 1.5452948808670044, |
|
"learning_rate": 1.79366129420255e-05, |
|
"loss": 0.7973, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.9244647582391148, |
|
"eval_cer": 0.7489049827996835, |
|
"eval_loss": 0.7830217480659485, |
|
"eval_runtime": 5092.4712, |
|
"eval_samples_per_second": 3.268, |
|
"eval_steps_per_second": 0.408, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.9304787106086119, |
|
"grad_norm": 1.640507459640503, |
|
"learning_rate": 1.7836380402533878e-05, |
|
"loss": 0.831, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 1.9364926629781092, |
|
"grad_norm": 1.919505000114441, |
|
"learning_rate": 1.773614786304226e-05, |
|
"loss": 0.8216, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 1.9425066153476065, |
|
"grad_norm": 1.5534350872039795, |
|
"learning_rate": 1.763591532355064e-05, |
|
"loss": 0.8157, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 1.9485205677171038, |
|
"grad_norm": 1.5943905115127563, |
|
"learning_rate": 1.753568278405902e-05, |
|
"loss": 0.8026, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 1.954534520086601, |
|
"grad_norm": 1.788720965385437, |
|
"learning_rate": 1.7435450244567397e-05, |
|
"loss": 0.7798, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 1.9605484724560982, |
|
"grad_norm": 1.707412600517273, |
|
"learning_rate": 1.7335217705075778e-05, |
|
"loss": 0.8125, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 1.9665624248255953, |
|
"grad_norm": 1.6318702697753906, |
|
"learning_rate": 1.7234985165584156e-05, |
|
"loss": 0.8184, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 1.9725763771950926, |
|
"grad_norm": 1.6191486120224, |
|
"learning_rate": 1.7134752626092534e-05, |
|
"loss": 0.8056, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 1.97859032956459, |
|
"grad_norm": 1.6070526838302612, |
|
"learning_rate": 1.7034520086600915e-05, |
|
"loss": 0.8134, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 1.9846042819340872, |
|
"grad_norm": 1.3369784355163574, |
|
"learning_rate": 1.6934287547109297e-05, |
|
"loss": 0.8086, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.9906182343035843, |
|
"grad_norm": 1.7080943584442139, |
|
"learning_rate": 1.6834055007617674e-05, |
|
"loss": 0.8304, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 1.9966321866730814, |
|
"grad_norm": 1.3051142692565918, |
|
"learning_rate": 1.673482479352097e-05, |
|
"loss": 0.778, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 2.0026461390425787, |
|
"grad_norm": 1.9086428880691528, |
|
"learning_rate": 1.6634592254029348e-05, |
|
"loss": 0.7972, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 2.008660091412076, |
|
"grad_norm": 1.6656805276870728, |
|
"learning_rate": 1.653435971453773e-05, |
|
"loss": 0.7701, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 2.0146740437815733, |
|
"grad_norm": 1.5769175291061401, |
|
"learning_rate": 1.6434127175046107e-05, |
|
"loss": 0.7602, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 2.0206879961510706, |
|
"grad_norm": 1.6363704204559326, |
|
"learning_rate": 1.6333894635554485e-05, |
|
"loss": 0.7826, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 2.0267019485205675, |
|
"grad_norm": 1.784424066543579, |
|
"learning_rate": 1.6233662096062867e-05, |
|
"loss": 0.7805, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 2.032715900890065, |
|
"grad_norm": 1.5795265436172485, |
|
"learning_rate": 1.6133429556571248e-05, |
|
"loss": 0.7289, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 2.038729853259562, |
|
"grad_norm": 1.382318377494812, |
|
"learning_rate": 1.6033197017079626e-05, |
|
"loss": 0.7869, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 2.0447438056290594, |
|
"grad_norm": 2.0357506275177, |
|
"learning_rate": 1.5932964477588004e-05, |
|
"loss": 0.7477, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 2.0507577579985568, |
|
"grad_norm": 1.6945205926895142, |
|
"learning_rate": 1.5832731938096385e-05, |
|
"loss": 0.7582, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 2.056771710368054, |
|
"grad_norm": 1.4343385696411133, |
|
"learning_rate": 1.5732499398604763e-05, |
|
"loss": 0.7536, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 2.062785662737551, |
|
"grad_norm": 1.7783600091934204, |
|
"learning_rate": 1.563226685911314e-05, |
|
"loss": 0.8331, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 2.0687996151070482, |
|
"grad_norm": 1.8440674543380737, |
|
"learning_rate": 1.5532034319621522e-05, |
|
"loss": 0.7352, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 2.0748135674765456, |
|
"grad_norm": 1.8098615407943726, |
|
"learning_rate": 1.54318017801299e-05, |
|
"loss": 0.7951, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 2.080827519846043, |
|
"grad_norm": 1.521584391593933, |
|
"learning_rate": 1.5331569240638282e-05, |
|
"loss": 0.7684, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 2.08684147221554, |
|
"grad_norm": 1.738142490386963, |
|
"learning_rate": 1.5231336701146661e-05, |
|
"loss": 0.7589, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 2.0928554245850375, |
|
"grad_norm": 1.5031851530075073, |
|
"learning_rate": 1.5131104161655041e-05, |
|
"loss": 0.7508, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 2.0988693769545343, |
|
"grad_norm": 1.8703136444091797, |
|
"learning_rate": 1.503087162216342e-05, |
|
"loss": 0.803, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 2.1048833293240317, |
|
"grad_norm": 1.6643913984298706, |
|
"learning_rate": 1.4930639082671799e-05, |
|
"loss": 0.7689, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 2.110897281693529, |
|
"grad_norm": 1.4073503017425537, |
|
"learning_rate": 1.4830406543180178e-05, |
|
"loss": 0.7575, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 2.1169112340630263, |
|
"grad_norm": 1.6007989645004272, |
|
"learning_rate": 1.4731176329083474e-05, |
|
"loss": 0.7317, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 2.1229251864325236, |
|
"grad_norm": 1.7965283393859863, |
|
"learning_rate": 1.4630943789591854e-05, |
|
"loss": 0.7142, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 2.1289391388020205, |
|
"grad_norm": 1.3655446767807007, |
|
"learning_rate": 1.4530711250100235e-05, |
|
"loss": 0.7613, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 2.1349530911715178, |
|
"grad_norm": 1.779159426689148, |
|
"learning_rate": 1.4430478710608613e-05, |
|
"loss": 0.7827, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 2.140967043541015, |
|
"grad_norm": 1.9253307580947876, |
|
"learning_rate": 1.4330246171116993e-05, |
|
"loss": 0.7899, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 2.1469809959105124, |
|
"grad_norm": 1.3449054956436157, |
|
"learning_rate": 1.4230013631625372e-05, |
|
"loss": 0.7494, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 2.1529949482800097, |
|
"grad_norm": 1.9044090509414673, |
|
"learning_rate": 1.412978109213375e-05, |
|
"loss": 0.7714, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 2.159008900649507, |
|
"grad_norm": 1.674017071723938, |
|
"learning_rate": 1.402954855264213e-05, |
|
"loss": 0.7478, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 2.1650228530190043, |
|
"grad_norm": 1.8109982013702393, |
|
"learning_rate": 1.392931601315051e-05, |
|
"loss": 0.7541, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 2.1650228530190043, |
|
"eval_cer": 0.7396310458108724, |
|
"eval_loss": 0.7524659633636475, |
|
"eval_runtime": 5154.2928, |
|
"eval_samples_per_second": 3.228, |
|
"eval_steps_per_second": 0.404, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 2.171036805388501, |
|
"grad_norm": 1.316389560699463, |
|
"learning_rate": 1.3829083473658887e-05, |
|
"loss": 0.7647, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 2.1770507577579985, |
|
"grad_norm": 1.7067075967788696, |
|
"learning_rate": 1.372885093416727e-05, |
|
"loss": 0.7635, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 2.183064710127496, |
|
"grad_norm": 1.8793973922729492, |
|
"learning_rate": 1.3628618394675648e-05, |
|
"loss": 0.7313, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 2.189078662496993, |
|
"grad_norm": 1.769338607788086, |
|
"learning_rate": 1.3528385855184028e-05, |
|
"loss": 0.7573, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 2.1950926148664904, |
|
"grad_norm": 1.6032990217208862, |
|
"learning_rate": 1.3428153315692408e-05, |
|
"loss": 0.76, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 2.2011065672359873, |
|
"grad_norm": 1.5864907503128052, |
|
"learning_rate": 1.3327920776200786e-05, |
|
"loss": 0.7587, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 2.2071205196054846, |
|
"grad_norm": 1.2785674333572388, |
|
"learning_rate": 1.3227688236709165e-05, |
|
"loss": 0.7403, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 2.213134471974982, |
|
"grad_norm": 1.4437572956085205, |
|
"learning_rate": 1.3127455697217545e-05, |
|
"loss": 0.7358, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 2.2191484243444792, |
|
"grad_norm": 1.8562610149383545, |
|
"learning_rate": 1.3027223157725923e-05, |
|
"loss": 0.7429, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 2.2251623767139765, |
|
"grad_norm": 1.6878858804702759, |
|
"learning_rate": 1.2926990618234306e-05, |
|
"loss": 0.7526, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 2.231176329083474, |
|
"grad_norm": 1.9118757247924805, |
|
"learning_rate": 1.2826758078742684e-05, |
|
"loss": 0.7513, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 2.2371902814529707, |
|
"grad_norm": 1.3607146739959717, |
|
"learning_rate": 1.272752786464598e-05, |
|
"loss": 0.747, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 2.243204233822468, |
|
"grad_norm": 1.8414541482925415, |
|
"learning_rate": 1.2627295325154359e-05, |
|
"loss": 0.757, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 2.2492181861919653, |
|
"grad_norm": 1.5014030933380127, |
|
"learning_rate": 1.2527062785662739e-05, |
|
"loss": 0.7704, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 2.2552321385614627, |
|
"grad_norm": 1.850203514099121, |
|
"learning_rate": 1.2426830246171117e-05, |
|
"loss": 0.7532, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 2.26124609093096, |
|
"grad_norm": 1.9308381080627441, |
|
"learning_rate": 1.2326597706679498e-05, |
|
"loss": 0.7658, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 2.267260043300457, |
|
"grad_norm": 1.8409243822097778, |
|
"learning_rate": 1.2226365167187876e-05, |
|
"loss": 0.7307, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 2.273273995669954, |
|
"grad_norm": 1.7760223150253296, |
|
"learning_rate": 1.2126132627696256e-05, |
|
"loss": 0.7548, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 2.2792879480394515, |
|
"grad_norm": 1.4862762689590454, |
|
"learning_rate": 1.2025900088204635e-05, |
|
"loss": 0.7196, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 2.2853019004089488, |
|
"grad_norm": 1.6604909896850586, |
|
"learning_rate": 1.1925667548713015e-05, |
|
"loss": 0.7306, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 2.291315852778446, |
|
"grad_norm": 1.6279034614562988, |
|
"learning_rate": 1.1825435009221395e-05, |
|
"loss": 0.7683, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 2.2973298051479434, |
|
"grad_norm": 1.5816621780395508, |
|
"learning_rate": 1.1725202469729774e-05, |
|
"loss": 0.723, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 2.3033437575174407, |
|
"grad_norm": 1.7849699258804321, |
|
"learning_rate": 1.1624969930238152e-05, |
|
"loss": 0.7831, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 2.3093577098869376, |
|
"grad_norm": 1.851671814918518, |
|
"learning_rate": 1.1524737390746534e-05, |
|
"loss": 0.7162, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 2.315371662256435, |
|
"grad_norm": 1.5026549100875854, |
|
"learning_rate": 1.1424504851254912e-05, |
|
"loss": 0.7803, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 2.321385614625932, |
|
"grad_norm": 1.6620761156082153, |
|
"learning_rate": 1.1324272311763291e-05, |
|
"loss": 0.733, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 2.3273995669954295, |
|
"grad_norm": 1.2507511377334595, |
|
"learning_rate": 1.1224039772271671e-05, |
|
"loss": 0.733, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 2.333413519364927, |
|
"grad_norm": 1.938541293144226, |
|
"learning_rate": 1.112380723278005e-05, |
|
"loss": 0.7499, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 2.3394274717344237, |
|
"grad_norm": 1.796823263168335, |
|
"learning_rate": 1.102357469328843e-05, |
|
"loss": 0.7399, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 2.345441424103921, |
|
"grad_norm": 1.834004521369934, |
|
"learning_rate": 1.0923342153796808e-05, |
|
"loss": 0.7322, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 2.3514553764734183, |
|
"grad_norm": 1.7822822332382202, |
|
"learning_rate": 1.0823109614305188e-05, |
|
"loss": 0.7315, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 2.3574693288429156, |
|
"grad_norm": 1.7883449792861938, |
|
"learning_rate": 1.0722877074813569e-05, |
|
"loss": 0.7205, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 2.363483281212413, |
|
"grad_norm": 1.966545581817627, |
|
"learning_rate": 1.0623646860716865e-05, |
|
"loss": 0.768, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 2.3694972335819102, |
|
"grad_norm": 1.722288966178894, |
|
"learning_rate": 1.0523414321225243e-05, |
|
"loss": 0.7554, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 2.375511185951407, |
|
"grad_norm": 1.7346769571304321, |
|
"learning_rate": 1.0423181781733622e-05, |
|
"loss": 0.7158, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 2.3815251383209044, |
|
"grad_norm": 1.603703498840332, |
|
"learning_rate": 1.0322949242242002e-05, |
|
"loss": 0.7467, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 2.3875390906904017, |
|
"grad_norm": 1.8487290143966675, |
|
"learning_rate": 1.0222716702750382e-05, |
|
"loss": 0.7492, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 2.393553043059899, |
|
"grad_norm": 1.7814853191375732, |
|
"learning_rate": 1.0122484163258761e-05, |
|
"loss": 0.7153, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 2.3995669954293963, |
|
"grad_norm": 1.745309591293335, |
|
"learning_rate": 1.002225162376714e-05, |
|
"loss": 0.7417, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 2.405580947798893, |
|
"grad_norm": 1.8568013906478882, |
|
"learning_rate": 9.92201908427552e-06, |
|
"loss": 0.7176, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 2.405580947798893, |
|
"eval_cer": 0.7346033545787555, |
|
"eval_loss": 0.7291049957275391, |
|
"eval_runtime": 5180.4578, |
|
"eval_samples_per_second": 3.212, |
|
"eval_steps_per_second": 0.402, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 2.4115949001683905, |
|
"grad_norm": 2.15785813331604, |
|
"learning_rate": 9.8217865447839e-06, |
|
"loss": 0.7234, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 2.417608852537888, |
|
"grad_norm": 1.7856571674346924, |
|
"learning_rate": 9.721554005292278e-06, |
|
"loss": 0.7176, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 2.423622804907385, |
|
"grad_norm": 1.682504415512085, |
|
"learning_rate": 9.621321465800658e-06, |
|
"loss": 0.7231, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 2.4296367572768824, |
|
"grad_norm": 1.3437491655349731, |
|
"learning_rate": 9.521088926309037e-06, |
|
"loss": 0.7232, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 2.4356507096463798, |
|
"grad_norm": 1.6993297338485718, |
|
"learning_rate": 9.420856386817417e-06, |
|
"loss": 0.7539, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 2.441664662015877, |
|
"grad_norm": 1.942014455795288, |
|
"learning_rate": 9.320623847325797e-06, |
|
"loss": 0.7519, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 2.447678614385374, |
|
"grad_norm": 1.7039834260940552, |
|
"learning_rate": 9.220391307834175e-06, |
|
"loss": 0.7362, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 2.4536925667548712, |
|
"grad_norm": 1.7124940156936646, |
|
"learning_rate": 9.120158768342556e-06, |
|
"loss": 0.7457, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 2.4597065191243686, |
|
"grad_norm": 1.910249948501587, |
|
"learning_rate": 9.019926228850934e-06, |
|
"loss": 0.7236, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 2.465720471493866, |
|
"grad_norm": 1.5514588356018066, |
|
"learning_rate": 8.919693689359314e-06, |
|
"loss": 0.721, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 2.471734423863363, |
|
"grad_norm": 1.744598150253296, |
|
"learning_rate": 8.819461149867693e-06, |
|
"loss": 0.7008, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 2.47774837623286, |
|
"grad_norm": 1.7827790975570679, |
|
"learning_rate": 8.719228610376073e-06, |
|
"loss": 0.7196, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 2.4837623286023573, |
|
"grad_norm": 1.7352166175842285, |
|
"learning_rate": 8.619998396279369e-06, |
|
"loss": 0.7322, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 2.4897762809718547, |
|
"grad_norm": 1.7378534078598022, |
|
"learning_rate": 8.519765856787748e-06, |
|
"loss": 0.7434, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 2.495790233341352, |
|
"grad_norm": 1.712098240852356, |
|
"learning_rate": 8.419533317296128e-06, |
|
"loss": 0.6939, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 2.5018041857108493, |
|
"grad_norm": 1.7368323802947998, |
|
"learning_rate": 8.319300777804506e-06, |
|
"loss": 0.726, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 2.5078181380803466, |
|
"grad_norm": 1.7819844484329224, |
|
"learning_rate": 8.219068238312887e-06, |
|
"loss": 0.7271, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 2.513832090449844, |
|
"grad_norm": 1.892824411392212, |
|
"learning_rate": 8.118835698821265e-06, |
|
"loss": 0.7146, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 2.5198460428193408, |
|
"grad_norm": 1.8355358839035034, |
|
"learning_rate": 8.018603159329645e-06, |
|
"loss": 0.7276, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 2.525859995188838, |
|
"grad_norm": 1.428154468536377, |
|
"learning_rate": 7.918370619838024e-06, |
|
"loss": 0.726, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 2.5318739475583354, |
|
"grad_norm": 1.8524224758148193, |
|
"learning_rate": 7.818138080346404e-06, |
|
"loss": 0.7178, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 2.5378878999278327, |
|
"grad_norm": 2.0265612602233887, |
|
"learning_rate": 7.717905540854784e-06, |
|
"loss": 0.714, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 2.5439018522973296, |
|
"grad_norm": 1.721408486366272, |
|
"learning_rate": 7.6176730013631625e-06, |
|
"loss": 0.7319, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 2.549915804666827, |
|
"grad_norm": 1.725827932357788, |
|
"learning_rate": 7.517440461871543e-06, |
|
"loss": 0.7663, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 2.555929757036324, |
|
"grad_norm": 1.6882712841033936, |
|
"learning_rate": 7.417207922379922e-06, |
|
"loss": 0.7191, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 2.5619437094058215, |
|
"grad_norm": 1.743870496749878, |
|
"learning_rate": 7.3169753828883015e-06, |
|
"loss": 0.7372, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 2.567957661775319, |
|
"grad_norm": 1.8450312614440918, |
|
"learning_rate": 7.21674284339668e-06, |
|
"loss": 0.6971, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 2.573971614144816, |
|
"grad_norm": 1.6685088872909546, |
|
"learning_rate": 7.116510303905061e-06, |
|
"loss": 0.7331, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 2.5799855665143134, |
|
"grad_norm": 1.406771183013916, |
|
"learning_rate": 7.01627776441344e-06, |
|
"loss": 0.7292, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 2.5859995188838103, |
|
"grad_norm": 1.750435471534729, |
|
"learning_rate": 6.916045224921818e-06, |
|
"loss": 0.7298, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 2.5920134712533076, |
|
"grad_norm": 1.4334131479263306, |
|
"learning_rate": 6.815812685430198e-06, |
|
"loss": 0.7332, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 2.598027423622805, |
|
"grad_norm": 1.4097541570663452, |
|
"learning_rate": 6.7155801459385786e-06, |
|
"loss": 0.7183, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 2.6040413759923022, |
|
"grad_norm": 1.5977458953857422, |
|
"learning_rate": 6.616349931841874e-06, |
|
"loss": 0.7227, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 2.610055328361799, |
|
"grad_norm": 1.766050100326538, |
|
"learning_rate": 6.516117392350253e-06, |
|
"loss": 0.7275, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 2.6160692807312964, |
|
"grad_norm": 1.6626147031784058, |
|
"learning_rate": 6.415884852858632e-06, |
|
"loss": 0.7101, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 2.6220832331007937, |
|
"grad_norm": 2.104124069213867, |
|
"learning_rate": 6.315652313367011e-06, |
|
"loss": 0.7267, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 2.628097185470291, |
|
"grad_norm": 1.66257905960083, |
|
"learning_rate": 6.215419773875391e-06, |
|
"loss": 0.6873, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 2.6341111378397883, |
|
"grad_norm": 2.1178150177001953, |
|
"learning_rate": 6.115187234383771e-06, |
|
"loss": 0.7533, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 2.6401250902092857, |
|
"grad_norm": 1.5612884759902954, |
|
"learning_rate": 6.0149546948921495e-06, |
|
"loss": 0.6981, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 2.646139042578783, |
|
"grad_norm": 1.9699441194534302, |
|
"learning_rate": 5.914722155400529e-06, |
|
"loss": 0.7477, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 2.646139042578783, |
|
"eval_cer": 0.7222449991747579, |
|
"eval_loss": 0.7039721608161926, |
|
"eval_runtime": 5169.4179, |
|
"eval_samples_per_second": 3.219, |
|
"eval_steps_per_second": 0.402, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 2.6521529949482803, |
|
"grad_norm": 1.6843528747558594, |
|
"learning_rate": 5.814489615908909e-06, |
|
"loss": 0.7235, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 2.658166947317777, |
|
"grad_norm": 1.4490200281143188, |
|
"learning_rate": 5.7142570764172885e-06, |
|
"loss": 0.7022, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 2.6641808996872745, |
|
"grad_norm": 1.6219418048858643, |
|
"learning_rate": 5.614024536925667e-06, |
|
"loss": 0.7215, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 2.6701948520567718, |
|
"grad_norm": 1.7598456144332886, |
|
"learning_rate": 5.513791997434047e-06, |
|
"loss": 0.6884, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 2.676208804426269, |
|
"grad_norm": 1.4096354246139526, |
|
"learning_rate": 5.413559457942427e-06, |
|
"loss": 0.7142, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 2.682222756795766, |
|
"grad_norm": 1.890046238899231, |
|
"learning_rate": 5.313326918450806e-06, |
|
"loss": 0.7346, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 2.6882367091652632, |
|
"grad_norm": 1.9413307905197144, |
|
"learning_rate": 5.213094378959186e-06, |
|
"loss": 0.688, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 2.6942506615347606, |
|
"grad_norm": 1.8647221326828003, |
|
"learning_rate": 5.112861839467565e-06, |
|
"loss": 0.732, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 2.700264613904258, |
|
"grad_norm": 1.6978052854537964, |
|
"learning_rate": 5.012629299975944e-06, |
|
"loss": 0.7075, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 2.706278566273755, |
|
"grad_norm": 1.3552030324935913, |
|
"learning_rate": 4.912396760484324e-06, |
|
"loss": 0.7074, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 2.7122925186432525, |
|
"grad_norm": 1.762635350227356, |
|
"learning_rate": 4.812164220992704e-06, |
|
"loss": 0.7219, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 2.71830647101275, |
|
"grad_norm": 1.7568144798278809, |
|
"learning_rate": 4.7119316815010824e-06, |
|
"loss": 0.6925, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 2.7243204233822467, |
|
"grad_norm": 1.5333248376846313, |
|
"learning_rate": 4.612701467404378e-06, |
|
"loss": 0.7133, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 2.730334375751744, |
|
"grad_norm": 1.8916515111923218, |
|
"learning_rate": 4.512468927912758e-06, |
|
"loss": 0.717, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 2.7363483281212413, |
|
"grad_norm": 1.8751533031463623, |
|
"learning_rate": 4.412236388421137e-06, |
|
"loss": 0.7306, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 2.7423622804907386, |
|
"grad_norm": 1.8414188623428345, |
|
"learning_rate": 4.312003848929517e-06, |
|
"loss": 0.7241, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 2.748376232860236, |
|
"grad_norm": 1.4498494863510132, |
|
"learning_rate": 4.211771309437896e-06, |
|
"loss": 0.7358, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 2.754390185229733, |
|
"grad_norm": 1.519124984741211, |
|
"learning_rate": 4.111538769946275e-06, |
|
"loss": 0.7059, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 2.76040413759923, |
|
"grad_norm": 1.9624022245407104, |
|
"learning_rate": 4.011306230454655e-06, |
|
"loss": 0.7185, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 2.7664180899687274, |
|
"grad_norm": 1.7385069131851196, |
|
"learning_rate": 3.911073690963035e-06, |
|
"loss": 0.7065, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 2.7724320423382247, |
|
"grad_norm": 1.5801081657409668, |
|
"learning_rate": 3.8108411514714135e-06, |
|
"loss": 0.7242, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 2.778445994707722, |
|
"grad_norm": 2.89750337600708, |
|
"learning_rate": 3.710608611979793e-06, |
|
"loss": 0.7023, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 2.7844599470772193, |
|
"grad_norm": 1.7838506698608398, |
|
"learning_rate": 3.6103760724881724e-06, |
|
"loss": 0.709, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 2.7904738994467166, |
|
"grad_norm": 1.7187174558639526, |
|
"learning_rate": 3.510143532996552e-06, |
|
"loss": 0.7292, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 2.7964878518162135, |
|
"grad_norm": 1.692230463027954, |
|
"learning_rate": 3.4099109935049313e-06, |
|
"loss": 0.6929, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 2.802501804185711, |
|
"grad_norm": 1.8588602542877197, |
|
"learning_rate": 3.309678454013311e-06, |
|
"loss": 0.7182, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 2.808515756555208, |
|
"grad_norm": 1.7374218702316284, |
|
"learning_rate": 3.20944591452169e-06, |
|
"loss": 0.6878, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 2.8145297089247054, |
|
"grad_norm": 1.6396222114562988, |
|
"learning_rate": 3.10921337503007e-06, |
|
"loss": 0.7035, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 2.8205436612942023, |
|
"grad_norm": 1.4402636289596558, |
|
"learning_rate": 3.0089808355384495e-06, |
|
"loss": 0.7131, |
|
"step": 46900 |
|
}, |
|
{ |
|
"epoch": 2.8265576136636996, |
|
"grad_norm": 2.006782293319702, |
|
"learning_rate": 2.9087482960468287e-06, |
|
"loss": 0.7222, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 2.832571566033197, |
|
"grad_norm": 1.543579339981079, |
|
"learning_rate": 2.8085157565552084e-06, |
|
"loss": 0.683, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 2.8385855184026942, |
|
"grad_norm": 1.7819281816482544, |
|
"learning_rate": 2.7082832170635876e-06, |
|
"loss": 0.7095, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 2.8445994707721916, |
|
"grad_norm": 1.6801820993423462, |
|
"learning_rate": 2.608050677571967e-06, |
|
"loss": 0.7021, |
|
"step": 47300 |
|
}, |
|
{ |
|
"epoch": 2.850613423141689, |
|
"grad_norm": 1.8617347478866577, |
|
"learning_rate": 2.508820463475263e-06, |
|
"loss": 0.7094, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 2.856627375511186, |
|
"grad_norm": 1.8291690349578857, |
|
"learning_rate": 2.408587923983642e-06, |
|
"loss": 0.7004, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 2.862641327880683, |
|
"grad_norm": 1.8136513233184814, |
|
"learning_rate": 2.3083553844920213e-06, |
|
"loss": 0.6936, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 2.8686552802501804, |
|
"grad_norm": 2.1329762935638428, |
|
"learning_rate": 2.208122845000401e-06, |
|
"loss": 0.6904, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 2.8746692326196777, |
|
"grad_norm": 1.9445267915725708, |
|
"learning_rate": 2.1078903055087806e-06, |
|
"loss": 0.6832, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 2.880683184989175, |
|
"grad_norm": 1.6533479690551758, |
|
"learning_rate": 2.0076577660171602e-06, |
|
"loss": 0.6754, |
|
"step": 47900 |
|
}, |
|
{ |
|
"epoch": 2.8866971373586723, |
|
"grad_norm": 2.0631463527679443, |
|
"learning_rate": 1.9074252265255394e-06, |
|
"loss": 0.6882, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 2.8866971373586723, |
|
"eval_cer": 0.7184241484900502, |
|
"eval_loss": 0.6888573169708252, |
|
"eval_runtime": 5167.0943, |
|
"eval_samples_per_second": 3.22, |
|
"eval_steps_per_second": 0.403, |
|
"step": 48000 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 49884, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 4000, |
|
"total_flos": 4.985333650921882e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|