Lhasa_Kanjur_TrOCR_model / trainer_state.json
ta4tsering's picture
Upload folder using huggingface_hub
d32b580 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.8866971373586723,
"eval_steps": 4000,
"global_step": 48000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0060139523694972335,
"grad_norm": 2.5106329917907715,
"learning_rate": 4.99007697859033e-05,
"loss": 3.082,
"step": 100
},
{
"epoch": 0.012027904738994467,
"grad_norm": 2.184039831161499,
"learning_rate": 4.9800537246411676e-05,
"loss": 1.8328,
"step": 200
},
{
"epoch": 0.0180418571084917,
"grad_norm": 2.2055342197418213,
"learning_rate": 4.970030470692006e-05,
"loss": 1.6836,
"step": 300
},
{
"epoch": 0.024055809477988934,
"grad_norm": 2.0695838928222656,
"learning_rate": 4.960007216742843e-05,
"loss": 1.6098,
"step": 400
},
{
"epoch": 0.03006976184748617,
"grad_norm": 1.9105968475341797,
"learning_rate": 4.949983962793682e-05,
"loss": 1.516,
"step": 500
},
{
"epoch": 0.0360837142169834,
"grad_norm": 1.882120132446289,
"learning_rate": 4.9399607088445195e-05,
"loss": 1.4876,
"step": 600
},
{
"epoch": 0.042097666586480634,
"grad_norm": 1.937444806098938,
"learning_rate": 4.9299374548953576e-05,
"loss": 1.4493,
"step": 700
},
{
"epoch": 0.04811161895597787,
"grad_norm": 2.1085917949676514,
"learning_rate": 4.919914200946195e-05,
"loss": 1.4128,
"step": 800
},
{
"epoch": 0.0541255713254751,
"grad_norm": 1.9272162914276123,
"learning_rate": 4.909890946997033e-05,
"loss": 1.4105,
"step": 900
},
{
"epoch": 0.06013952369497234,
"grad_norm": 1.818624496459961,
"learning_rate": 4.899867693047871e-05,
"loss": 1.3912,
"step": 1000
},
{
"epoch": 0.06615347606446957,
"grad_norm": 1.644021987915039,
"learning_rate": 4.8898444390987094e-05,
"loss": 1.3397,
"step": 1100
},
{
"epoch": 0.0721674284339668,
"grad_norm": 1.7671422958374023,
"learning_rate": 4.8798211851495476e-05,
"loss": 1.3873,
"step": 1200
},
{
"epoch": 0.07818138080346404,
"grad_norm": 2.032087564468384,
"learning_rate": 4.869797931200385e-05,
"loss": 1.3112,
"step": 1300
},
{
"epoch": 0.08419533317296127,
"grad_norm": 1.7175960540771484,
"learning_rate": 4.859774677251223e-05,
"loss": 1.3089,
"step": 1400
},
{
"epoch": 0.09020928554245851,
"grad_norm": 1.7282887697219849,
"learning_rate": 4.8497514233020606e-05,
"loss": 1.2983,
"step": 1500
},
{
"epoch": 0.09622323791195574,
"grad_norm": 1.836075782775879,
"learning_rate": 4.839728169352899e-05,
"loss": 1.3092,
"step": 1600
},
{
"epoch": 0.10223719028145298,
"grad_norm": 1.931219220161438,
"learning_rate": 4.829704915403737e-05,
"loss": 1.2528,
"step": 1700
},
{
"epoch": 0.1082511426509502,
"grad_norm": 1.9112814664840698,
"learning_rate": 4.819681661454575e-05,
"loss": 1.25,
"step": 1800
},
{
"epoch": 0.11426509502044743,
"grad_norm": 2.1694140434265137,
"learning_rate": 4.809658407505413e-05,
"loss": 1.253,
"step": 1900
},
{
"epoch": 0.12027904738994467,
"grad_norm": 1.9220771789550781,
"learning_rate": 4.7996351535562506e-05,
"loss": 1.2278,
"step": 2000
},
{
"epoch": 0.12629299975944192,
"grad_norm": 1.7120180130004883,
"learning_rate": 4.789611899607089e-05,
"loss": 1.2483,
"step": 2100
},
{
"epoch": 0.13230695212893914,
"grad_norm": 1.917104959487915,
"learning_rate": 4.779588645657926e-05,
"loss": 1.2384,
"step": 2200
},
{
"epoch": 0.13832090449843637,
"grad_norm": 2.0323758125305176,
"learning_rate": 4.7695653917087644e-05,
"loss": 1.2292,
"step": 2300
},
{
"epoch": 0.1443348568679336,
"grad_norm": 1.779888391494751,
"learning_rate": 4.7595421377596025e-05,
"loss": 1.2258,
"step": 2400
},
{
"epoch": 0.15034880923743085,
"grad_norm": 1.7667876482009888,
"learning_rate": 4.7495188838104406e-05,
"loss": 1.1875,
"step": 2500
},
{
"epoch": 0.15636276160692808,
"grad_norm": 1.8692760467529297,
"learning_rate": 4.739495629861279e-05,
"loss": 1.2082,
"step": 2600
},
{
"epoch": 0.1623767139764253,
"grad_norm": 1.8578811883926392,
"learning_rate": 4.729472375912116e-05,
"loss": 1.1939,
"step": 2700
},
{
"epoch": 0.16839066634592254,
"grad_norm": 1.8867884874343872,
"learning_rate": 4.7194491219629543e-05,
"loss": 1.2108,
"step": 2800
},
{
"epoch": 0.17440461871541976,
"grad_norm": 1.72930109500885,
"learning_rate": 4.709425868013792e-05,
"loss": 1.1943,
"step": 2900
},
{
"epoch": 0.18041857108491702,
"grad_norm": 1.7612700462341309,
"learning_rate": 4.69940261406463e-05,
"loss": 1.1818,
"step": 3000
},
{
"epoch": 0.18643252345441425,
"grad_norm": 1.6063175201416016,
"learning_rate": 4.689379360115468e-05,
"loss": 1.1926,
"step": 3100
},
{
"epoch": 0.19244647582391147,
"grad_norm": 1.7698125839233398,
"learning_rate": 4.679356106166306e-05,
"loss": 1.2029,
"step": 3200
},
{
"epoch": 0.1984604281934087,
"grad_norm": 1.6680766344070435,
"learning_rate": 4.6693328522171443e-05,
"loss": 1.185,
"step": 3300
},
{
"epoch": 0.20447438056290596,
"grad_norm": 1.7791478633880615,
"learning_rate": 4.659309598267982e-05,
"loss": 1.1663,
"step": 3400
},
{
"epoch": 0.21048833293240318,
"grad_norm": 1.6491518020629883,
"learning_rate": 4.64928634431882e-05,
"loss": 1.1492,
"step": 3500
},
{
"epoch": 0.2165022853019004,
"grad_norm": 1.6312377452850342,
"learning_rate": 4.6392630903696574e-05,
"loss": 1.1696,
"step": 3600
},
{
"epoch": 0.22251623767139764,
"grad_norm": 1.7078893184661865,
"learning_rate": 4.6292398364204955e-05,
"loss": 1.139,
"step": 3700
},
{
"epoch": 0.22853019004089486,
"grad_norm": 2.0885114669799805,
"learning_rate": 4.619216582471334e-05,
"loss": 1.1399,
"step": 3800
},
{
"epoch": 0.23454414241039212,
"grad_norm": 1.8433787822723389,
"learning_rate": 4.609193328522172e-05,
"loss": 1.1376,
"step": 3900
},
{
"epoch": 0.24055809477988935,
"grad_norm": 1.527872085571289,
"learning_rate": 4.59917007457301e-05,
"loss": 1.1094,
"step": 4000
},
{
"epoch": 0.24055809477988935,
"eval_cer": 0.7748897683360889,
"eval_loss": 1.0861139297485352,
"eval_runtime": 5003.1551,
"eval_samples_per_second": 3.326,
"eval_steps_per_second": 0.416,
"step": 4000
},
{
"epoch": 0.24657204714938658,
"grad_norm": 1.4162925481796265,
"learning_rate": 4.5891468206238474e-05,
"loss": 1.1353,
"step": 4100
},
{
"epoch": 0.25258599951888383,
"grad_norm": 1.5863301753997803,
"learning_rate": 4.5791235666746855e-05,
"loss": 1.1257,
"step": 4200
},
{
"epoch": 0.25859995188838103,
"grad_norm": 1.6928209066390991,
"learning_rate": 4.569100312725523e-05,
"loss": 1.1086,
"step": 4300
},
{
"epoch": 0.2646139042578783,
"grad_norm": 1.2880722284317017,
"learning_rate": 4.559077058776362e-05,
"loss": 1.119,
"step": 4400
},
{
"epoch": 0.2706278566273755,
"grad_norm": 1.6384778022766113,
"learning_rate": 4.549053804827199e-05,
"loss": 1.1474,
"step": 4500
},
{
"epoch": 0.27664180899687274,
"grad_norm": 1.7176462411880493,
"learning_rate": 4.5390305508780374e-05,
"loss": 1.0925,
"step": 4600
},
{
"epoch": 0.28265576136637,
"grad_norm": 1.6025973558425903,
"learning_rate": 4.529007296928875e-05,
"loss": 1.112,
"step": 4700
},
{
"epoch": 0.2886697137358672,
"grad_norm": 1.7415896654129028,
"learning_rate": 4.518984042979713e-05,
"loss": 1.1218,
"step": 4800
},
{
"epoch": 0.29468366610536445,
"grad_norm": 1.310402750968933,
"learning_rate": 4.508960789030551e-05,
"loss": 1.0879,
"step": 4900
},
{
"epoch": 0.3006976184748617,
"grad_norm": 1.7458600997924805,
"learning_rate": 4.498937535081389e-05,
"loss": 1.1088,
"step": 5000
},
{
"epoch": 0.3067115708443589,
"grad_norm": 1.8199197053909302,
"learning_rate": 4.4889142811322274e-05,
"loss": 1.0987,
"step": 5100
},
{
"epoch": 0.31272552321385616,
"grad_norm": 1.5931302309036255,
"learning_rate": 4.478891027183065e-05,
"loss": 1.0802,
"step": 5200
},
{
"epoch": 0.31873947558335336,
"grad_norm": 1.5596023797988892,
"learning_rate": 4.468867773233903e-05,
"loss": 1.1009,
"step": 5300
},
{
"epoch": 0.3247534279528506,
"grad_norm": 1.7147523164749146,
"learning_rate": 4.4588445192847404e-05,
"loss": 1.1144,
"step": 5400
},
{
"epoch": 0.33076738032234787,
"grad_norm": 1.7172225713729858,
"learning_rate": 4.4488212653355786e-05,
"loss": 1.0928,
"step": 5500
},
{
"epoch": 0.33678133269184507,
"grad_norm": 1.5675067901611328,
"learning_rate": 4.438798011386417e-05,
"loss": 1.0421,
"step": 5600
},
{
"epoch": 0.3427952850613423,
"grad_norm": 1.755364179611206,
"learning_rate": 4.428774757437255e-05,
"loss": 1.0897,
"step": 5700
},
{
"epoch": 0.3488092374308395,
"grad_norm": 1.5678260326385498,
"learning_rate": 4.418751503488093e-05,
"loss": 1.0703,
"step": 5800
},
{
"epoch": 0.3548231898003368,
"grad_norm": 1.8641386032104492,
"learning_rate": 4.4087282495389304e-05,
"loss": 1.07,
"step": 5900
},
{
"epoch": 0.36083714216983404,
"grad_norm": 1.571837306022644,
"learning_rate": 4.3987049955897686e-05,
"loss": 1.0493,
"step": 6000
},
{
"epoch": 0.36685109453933124,
"grad_norm": 1.5046846866607666,
"learning_rate": 4.388681741640606e-05,
"loss": 1.0935,
"step": 6100
},
{
"epoch": 0.3728650469088285,
"grad_norm": 1.88533353805542,
"learning_rate": 4.378658487691444e-05,
"loss": 1.0801,
"step": 6200
},
{
"epoch": 0.3788789992783257,
"grad_norm": 1.5626581907272339,
"learning_rate": 4.368635233742282e-05,
"loss": 1.0442,
"step": 6300
},
{
"epoch": 0.38489295164782295,
"grad_norm": 1.4634822607040405,
"learning_rate": 4.3586119797931204e-05,
"loss": 1.0552,
"step": 6400
},
{
"epoch": 0.3909069040173202,
"grad_norm": 1.8044239282608032,
"learning_rate": 4.3485887258439586e-05,
"loss": 1.035,
"step": 6500
},
{
"epoch": 0.3969208563868174,
"grad_norm": 1.6868014335632324,
"learning_rate": 4.338565471894796e-05,
"loss": 1.0497,
"step": 6600
},
{
"epoch": 0.40293480875631466,
"grad_norm": 1.6949145793914795,
"learning_rate": 4.328542217945634e-05,
"loss": 1.0369,
"step": 6700
},
{
"epoch": 0.4089487611258119,
"grad_norm": 1.7868014574050903,
"learning_rate": 4.3186191965359634e-05,
"loss": 1.0385,
"step": 6800
},
{
"epoch": 0.4149627134953091,
"grad_norm": 1.550017237663269,
"learning_rate": 4.3085959425868015e-05,
"loss": 1.0326,
"step": 6900
},
{
"epoch": 0.42097666586480637,
"grad_norm": 1.431362509727478,
"learning_rate": 4.298572688637639e-05,
"loss": 1.0154,
"step": 7000
},
{
"epoch": 0.42699061823430356,
"grad_norm": 1.946357011795044,
"learning_rate": 4.288549434688478e-05,
"loss": 1.0532,
"step": 7100
},
{
"epoch": 0.4330045706038008,
"grad_norm": 1.6147353649139404,
"learning_rate": 4.278526180739316e-05,
"loss": 1.0633,
"step": 7200
},
{
"epoch": 0.4390185229732981,
"grad_norm": 1.655043363571167,
"learning_rate": 4.2685029267901534e-05,
"loss": 1.0558,
"step": 7300
},
{
"epoch": 0.4450324753427953,
"grad_norm": 1.6090620756149292,
"learning_rate": 4.2584796728409915e-05,
"loss": 1.0289,
"step": 7400
},
{
"epoch": 0.45104642771229253,
"grad_norm": 1.8263063430786133,
"learning_rate": 4.248456418891829e-05,
"loss": 1.0275,
"step": 7500
},
{
"epoch": 0.45706038008178973,
"grad_norm": 1.6156238317489624,
"learning_rate": 4.238433164942667e-05,
"loss": 0.9976,
"step": 7600
},
{
"epoch": 0.463074332451287,
"grad_norm": 1.4604992866516113,
"learning_rate": 4.228409910993505e-05,
"loss": 1.0607,
"step": 7700
},
{
"epoch": 0.46908828482078424,
"grad_norm": 1.5745257139205933,
"learning_rate": 4.2183866570443434e-05,
"loss": 1.0297,
"step": 7800
},
{
"epoch": 0.47510223719028144,
"grad_norm": 1.852102518081665,
"learning_rate": 4.2083634030951815e-05,
"loss": 1.0173,
"step": 7900
},
{
"epoch": 0.4811161895597787,
"grad_norm": 1.6735659837722778,
"learning_rate": 4.198340149146019e-05,
"loss": 1.0145,
"step": 8000
},
{
"epoch": 0.4811161895597787,
"eval_cer": 0.7706982683703557,
"eval_loss": 0.9832878708839417,
"eval_runtime": 5013.5334,
"eval_samples_per_second": 3.319,
"eval_steps_per_second": 0.415,
"step": 8000
},
{
"epoch": 0.4871301419292759,
"grad_norm": 1.7805702686309814,
"learning_rate": 4.188316895196857e-05,
"loss": 1.0279,
"step": 8100
},
{
"epoch": 0.49314409429877315,
"grad_norm": 1.4825282096862793,
"learning_rate": 4.1782936412476945e-05,
"loss": 1.0124,
"step": 8200
},
{
"epoch": 0.4991580466682704,
"grad_norm": 1.5925073623657227,
"learning_rate": 4.168270387298533e-05,
"loss": 1.0026,
"step": 8300
},
{
"epoch": 0.5051719990377677,
"grad_norm": 1.5965441465377808,
"learning_rate": 4.158247133349371e-05,
"loss": 1.0238,
"step": 8400
},
{
"epoch": 0.5111859514072649,
"grad_norm": 1.5980632305145264,
"learning_rate": 4.148223879400209e-05,
"loss": 0.9909,
"step": 8500
},
{
"epoch": 0.5171999037767621,
"grad_norm": 1.7301024198532104,
"learning_rate": 4.1382006254510464e-05,
"loss": 1.0172,
"step": 8600
},
{
"epoch": 0.5232138561462594,
"grad_norm": 1.6884981393814087,
"learning_rate": 4.1281773715018845e-05,
"loss": 1.0326,
"step": 8700
},
{
"epoch": 0.5292278085157566,
"grad_norm": 1.6702126264572144,
"learning_rate": 4.118154117552723e-05,
"loss": 1.016,
"step": 8800
},
{
"epoch": 0.5352417608852538,
"grad_norm": 1.5463213920593262,
"learning_rate": 4.108231096143052e-05,
"loss": 1.0282,
"step": 8900
},
{
"epoch": 0.541255713254751,
"grad_norm": 1.9977515935897827,
"learning_rate": 4.09820784219389e-05,
"loss": 1.0027,
"step": 9000
},
{
"epoch": 0.5472696656242483,
"grad_norm": 1.4702314138412476,
"learning_rate": 4.088184588244728e-05,
"loss": 1.0138,
"step": 9100
},
{
"epoch": 0.5532836179937455,
"grad_norm": 1.5981062650680542,
"learning_rate": 4.078161334295566e-05,
"loss": 1.0333,
"step": 9200
},
{
"epoch": 0.5592975703632427,
"grad_norm": 1.4159762859344482,
"learning_rate": 4.068138080346404e-05,
"loss": 0.9956,
"step": 9300
},
{
"epoch": 0.56531152273274,
"grad_norm": 1.4920343160629272,
"learning_rate": 4.058114826397242e-05,
"loss": 0.9978,
"step": 9400
},
{
"epoch": 0.5713254751022372,
"grad_norm": 1.7335036993026733,
"learning_rate": 4.048091572448079e-05,
"loss": 1.0337,
"step": 9500
},
{
"epoch": 0.5773394274717344,
"grad_norm": 1.4422550201416016,
"learning_rate": 4.0380683184989175e-05,
"loss": 1.0185,
"step": 9600
},
{
"epoch": 0.5833533798412317,
"grad_norm": 1.776282787322998,
"learning_rate": 4.0280450645497556e-05,
"loss": 0.9941,
"step": 9700
},
{
"epoch": 0.5893673322107289,
"grad_norm": 1.806261420249939,
"learning_rate": 4.018021810600594e-05,
"loss": 0.9772,
"step": 9800
},
{
"epoch": 0.5953812845802261,
"grad_norm": 1.6673755645751953,
"learning_rate": 4.007998556651432e-05,
"loss": 0.9904,
"step": 9900
},
{
"epoch": 0.6013952369497234,
"grad_norm": 1.5652821063995361,
"learning_rate": 3.997975302702269e-05,
"loss": 1.0008,
"step": 10000
},
{
"epoch": 0.6074091893192206,
"grad_norm": 1.568590760231018,
"learning_rate": 3.9879520487531075e-05,
"loss": 1.0402,
"step": 10100
},
{
"epoch": 0.6134231416887178,
"grad_norm": 1.505454659461975,
"learning_rate": 3.977928794803945e-05,
"loss": 1.0009,
"step": 10200
},
{
"epoch": 0.619437094058215,
"grad_norm": 1.4691787958145142,
"learning_rate": 3.967905540854783e-05,
"loss": 1.0115,
"step": 10300
},
{
"epoch": 0.6254510464277123,
"grad_norm": 1.5172970294952393,
"learning_rate": 3.957882286905621e-05,
"loss": 1.0209,
"step": 10400
},
{
"epoch": 0.6314649987972095,
"grad_norm": 1.8390883207321167,
"learning_rate": 3.947859032956459e-05,
"loss": 1.0101,
"step": 10500
},
{
"epoch": 0.6374789511667067,
"grad_norm": 1.5624459981918335,
"learning_rate": 3.9378357790072975e-05,
"loss": 1.0085,
"step": 10600
},
{
"epoch": 0.643492903536204,
"grad_norm": 1.4316383600234985,
"learning_rate": 3.927812525058135e-05,
"loss": 1.0025,
"step": 10700
},
{
"epoch": 0.6495068559057012,
"grad_norm": 1.5541032552719116,
"learning_rate": 3.917789271108973e-05,
"loss": 0.9938,
"step": 10800
},
{
"epoch": 0.6555208082751984,
"grad_norm": 1.7416138648986816,
"learning_rate": 3.907866249699302e-05,
"loss": 0.9923,
"step": 10900
},
{
"epoch": 0.6615347606446957,
"grad_norm": 1.7461607456207275,
"learning_rate": 3.8978429957501404e-05,
"loss": 0.9983,
"step": 11000
},
{
"epoch": 0.6675487130141929,
"grad_norm": 1.5705294609069824,
"learning_rate": 3.887819741800978e-05,
"loss": 0.9809,
"step": 11100
},
{
"epoch": 0.6735626653836901,
"grad_norm": 1.802960753440857,
"learning_rate": 3.877796487851817e-05,
"loss": 0.9675,
"step": 11200
},
{
"epoch": 0.6795766177531873,
"grad_norm": 1.70058274269104,
"learning_rate": 3.867773233902655e-05,
"loss": 0.966,
"step": 11300
},
{
"epoch": 0.6855905701226847,
"grad_norm": 1.5391656160354614,
"learning_rate": 3.857749979953492e-05,
"loss": 0.9412,
"step": 11400
},
{
"epoch": 0.6916045224921819,
"grad_norm": 1.8226732015609741,
"learning_rate": 3.8477267260043304e-05,
"loss": 0.96,
"step": 11500
},
{
"epoch": 0.697618474861679,
"grad_norm": 1.3753610849380493,
"learning_rate": 3.837703472055168e-05,
"loss": 0.9665,
"step": 11600
},
{
"epoch": 0.7036324272311764,
"grad_norm": 1.6595444679260254,
"learning_rate": 3.827680218106006e-05,
"loss": 0.9851,
"step": 11700
},
{
"epoch": 0.7096463796006736,
"grad_norm": 1.6920074224472046,
"learning_rate": 3.817656964156844e-05,
"loss": 0.983,
"step": 11800
},
{
"epoch": 0.7156603319701708,
"grad_norm": 1.4369592666625977,
"learning_rate": 3.807633710207682e-05,
"loss": 0.9418,
"step": 11900
},
{
"epoch": 0.7216742843396681,
"grad_norm": 1.7257956266403198,
"learning_rate": 3.7976104562585204e-05,
"loss": 0.9877,
"step": 12000
},
{
"epoch": 0.7216742843396681,
"eval_cer": 0.7783561907148705,
"eval_loss": 0.9317989349365234,
"eval_runtime": 5220.1413,
"eval_samples_per_second": 3.188,
"eval_steps_per_second": 0.398,
"step": 12000
},
{
"epoch": 0.7276882367091653,
"grad_norm": 1.6520166397094727,
"learning_rate": 3.787587202309358e-05,
"loss": 0.9415,
"step": 12100
},
{
"epoch": 0.7337021890786625,
"grad_norm": 1.7703155279159546,
"learning_rate": 3.777563948360196e-05,
"loss": 0.9653,
"step": 12200
},
{
"epoch": 0.7397161414481598,
"grad_norm": 1.4374829530715942,
"learning_rate": 3.7675406944110334e-05,
"loss": 0.9541,
"step": 12300
},
{
"epoch": 0.745730093817657,
"grad_norm": 1.790893793106079,
"learning_rate": 3.7575174404618716e-05,
"loss": 0.9951,
"step": 12400
},
{
"epoch": 0.7517440461871542,
"grad_norm": 1.6874678134918213,
"learning_rate": 3.74749418651271e-05,
"loss": 0.9704,
"step": 12500
},
{
"epoch": 0.7577579985566514,
"grad_norm": 1.7682390213012695,
"learning_rate": 3.737470932563548e-05,
"loss": 0.9461,
"step": 12600
},
{
"epoch": 0.7637719509261487,
"grad_norm": 1.6911418437957764,
"learning_rate": 3.727447678614386e-05,
"loss": 0.9952,
"step": 12700
},
{
"epoch": 0.7697859032956459,
"grad_norm": 1.727547526359558,
"learning_rate": 3.7174244246652234e-05,
"loss": 0.9442,
"step": 12800
},
{
"epoch": 0.7757998556651431,
"grad_norm": 1.4808145761489868,
"learning_rate": 3.7074011707160616e-05,
"loss": 0.9742,
"step": 12900
},
{
"epoch": 0.7818138080346404,
"grad_norm": 1.876441240310669,
"learning_rate": 3.697377916766899e-05,
"loss": 0.9766,
"step": 13000
},
{
"epoch": 0.7878277604041376,
"grad_norm": 1.4900315999984741,
"learning_rate": 3.687354662817737e-05,
"loss": 0.9612,
"step": 13100
},
{
"epoch": 0.7938417127736348,
"grad_norm": 1.4132503271102905,
"learning_rate": 3.677331408868575e-05,
"loss": 0.9651,
"step": 13200
},
{
"epoch": 0.7998556651431321,
"grad_norm": 1.6306148767471313,
"learning_rate": 3.6673081549194134e-05,
"loss": 0.9368,
"step": 13300
},
{
"epoch": 0.8058696175126293,
"grad_norm": 1.7364792823791504,
"learning_rate": 3.657284900970251e-05,
"loss": 0.9427,
"step": 13400
},
{
"epoch": 0.8118835698821265,
"grad_norm": 1.6632161140441895,
"learning_rate": 3.647261647021089e-05,
"loss": 0.9505,
"step": 13500
},
{
"epoch": 0.8178975222516238,
"grad_norm": 1.5362128019332886,
"learning_rate": 3.637238393071927e-05,
"loss": 0.9692,
"step": 13600
},
{
"epoch": 0.823911474621121,
"grad_norm": 1.3290611505508423,
"learning_rate": 3.6272151391227646e-05,
"loss": 0.9585,
"step": 13700
},
{
"epoch": 0.8299254269906182,
"grad_norm": 1.6647266149520874,
"learning_rate": 3.617191885173603e-05,
"loss": 0.9725,
"step": 13800
},
{
"epoch": 0.8359393793601154,
"grad_norm": 1.736165165901184,
"learning_rate": 3.607168631224441e-05,
"loss": 0.9611,
"step": 13900
},
{
"epoch": 0.8419533317296127,
"grad_norm": 1.6238832473754883,
"learning_rate": 3.597145377275279e-05,
"loss": 0.9698,
"step": 14000
},
{
"epoch": 0.8479672840991099,
"grad_norm": 1.741194486618042,
"learning_rate": 3.5871221233261165e-05,
"loss": 0.9338,
"step": 14100
},
{
"epoch": 0.8539812364686071,
"grad_norm": 1.7224496603012085,
"learning_rate": 3.5770988693769546e-05,
"loss": 0.9715,
"step": 14200
},
{
"epoch": 0.8599951888381044,
"grad_norm": 1.872253179550171,
"learning_rate": 3.5671758479672845e-05,
"loss": 0.9656,
"step": 14300
},
{
"epoch": 0.8660091412076016,
"grad_norm": 1.5153071880340576,
"learning_rate": 3.557152594018122e-05,
"loss": 0.984,
"step": 14400
},
{
"epoch": 0.8720230935770988,
"grad_norm": 1.667662262916565,
"learning_rate": 3.54712934006896e-05,
"loss": 0.9386,
"step": 14500
},
{
"epoch": 0.8780370459465962,
"grad_norm": 1.7471551895141602,
"learning_rate": 3.537106086119798e-05,
"loss": 0.9486,
"step": 14600
},
{
"epoch": 0.8840509983160934,
"grad_norm": 1.5477312803268433,
"learning_rate": 3.5270828321706364e-05,
"loss": 0.9451,
"step": 14700
},
{
"epoch": 0.8900649506855906,
"grad_norm": 1.5079952478408813,
"learning_rate": 3.517059578221474e-05,
"loss": 0.9381,
"step": 14800
},
{
"epoch": 0.8960789030550877,
"grad_norm": 1.5696821212768555,
"learning_rate": 3.507036324272312e-05,
"loss": 0.9418,
"step": 14900
},
{
"epoch": 0.9020928554245851,
"grad_norm": 1.552612066268921,
"learning_rate": 3.4970130703231494e-05,
"loss": 0.9596,
"step": 15000
},
{
"epoch": 0.9081068077940823,
"grad_norm": 1.7802802324295044,
"learning_rate": 3.4869898163739876e-05,
"loss": 0.9298,
"step": 15100
},
{
"epoch": 0.9141207601635795,
"grad_norm": 1.5589861869812012,
"learning_rate": 3.476966562424826e-05,
"loss": 0.9621,
"step": 15200
},
{
"epoch": 0.9201347125330768,
"grad_norm": 1.3050284385681152,
"learning_rate": 3.466943308475664e-05,
"loss": 0.9315,
"step": 15300
},
{
"epoch": 0.926148664902574,
"grad_norm": 1.5091936588287354,
"learning_rate": 3.456920054526502e-05,
"loss": 0.9107,
"step": 15400
},
{
"epoch": 0.9321626172720712,
"grad_norm": 1.5565592050552368,
"learning_rate": 3.4468968005773394e-05,
"loss": 0.9552,
"step": 15500
},
{
"epoch": 0.9381765696415685,
"grad_norm": 1.7788596153259277,
"learning_rate": 3.4368735466281775e-05,
"loss": 0.9459,
"step": 15600
},
{
"epoch": 0.9441905220110657,
"grad_norm": 1.8757565021514893,
"learning_rate": 3.426850292679015e-05,
"loss": 0.9254,
"step": 15700
},
{
"epoch": 0.9502044743805629,
"grad_norm": 1.6978222131729126,
"learning_rate": 3.416827038729853e-05,
"loss": 0.9083,
"step": 15800
},
{
"epoch": 0.9562184267500602,
"grad_norm": 1.6449016332626343,
"learning_rate": 3.406803784780692e-05,
"loss": 0.9767,
"step": 15900
},
{
"epoch": 0.9622323791195574,
"grad_norm": 1.8677603006362915,
"learning_rate": 3.3967805308315294e-05,
"loss": 0.9469,
"step": 16000
},
{
"epoch": 0.9622323791195574,
"eval_cer": 0.7716098871709525,
"eval_loss": 0.8905403017997742,
"eval_runtime": 5089.7823,
"eval_samples_per_second": 3.269,
"eval_steps_per_second": 0.409,
"step": 16000
},
{
"epoch": 0.9682463314890546,
"grad_norm": 1.6178405284881592,
"learning_rate": 3.3867572768823675e-05,
"loss": 0.9441,
"step": 16100
},
{
"epoch": 0.9742602838585518,
"grad_norm": 1.7407509088516235,
"learning_rate": 3.376734022933205e-05,
"loss": 0.9264,
"step": 16200
},
{
"epoch": 0.9802742362280491,
"grad_norm": 1.5831618309020996,
"learning_rate": 3.366710768984043e-05,
"loss": 0.9583,
"step": 16300
},
{
"epoch": 0.9862881885975463,
"grad_norm": 1.6853969097137451,
"learning_rate": 3.3566875150348806e-05,
"loss": 0.9226,
"step": 16400
},
{
"epoch": 0.9923021409670435,
"grad_norm": 1.8264875411987305,
"learning_rate": 3.3466642610857194e-05,
"loss": 0.9492,
"step": 16500
},
{
"epoch": 0.9983160933365408,
"grad_norm": 1.5579068660736084,
"learning_rate": 3.336641007136557e-05,
"loss": 0.8872,
"step": 16600
},
{
"epoch": 1.004330045706038,
"grad_norm": 1.7848278284072876,
"learning_rate": 3.326617753187395e-05,
"loss": 0.8717,
"step": 16700
},
{
"epoch": 1.0103439980755353,
"grad_norm": 1.5830621719360352,
"learning_rate": 3.316694731777725e-05,
"loss": 0.9032,
"step": 16800
},
{
"epoch": 1.0163579504450324,
"grad_norm": 1.546217441558838,
"learning_rate": 3.3066714778285623e-05,
"loss": 0.9418,
"step": 16900
},
{
"epoch": 1.0223719028145297,
"grad_norm": 1.4024384021759033,
"learning_rate": 3.2966482238794005e-05,
"loss": 0.9075,
"step": 17000
},
{
"epoch": 1.028385855184027,
"grad_norm": 1.6523361206054688,
"learning_rate": 3.286624969930238e-05,
"loss": 0.9088,
"step": 17100
},
{
"epoch": 1.0343998075535241,
"grad_norm": 1.4941192865371704,
"learning_rate": 3.276601715981076e-05,
"loss": 0.8703,
"step": 17200
},
{
"epoch": 1.0404137599230214,
"grad_norm": 1.6586402654647827,
"learning_rate": 3.266578462031914e-05,
"loss": 0.9171,
"step": 17300
},
{
"epoch": 1.0464277122925187,
"grad_norm": 1.5614475011825562,
"learning_rate": 3.2565552080827523e-05,
"loss": 0.8819,
"step": 17400
},
{
"epoch": 1.0524416646620158,
"grad_norm": 1.6588680744171143,
"learning_rate": 3.2465319541335905e-05,
"loss": 0.9165,
"step": 17500
},
{
"epoch": 1.0584556170315131,
"grad_norm": 1.4571171998977661,
"learning_rate": 3.236508700184428e-05,
"loss": 0.9056,
"step": 17600
},
{
"epoch": 1.0644695694010102,
"grad_norm": 1.7484580278396606,
"learning_rate": 3.226485446235266e-05,
"loss": 0.9072,
"step": 17700
},
{
"epoch": 1.0704835217705075,
"grad_norm": 1.388741374015808,
"learning_rate": 3.2164621922861035e-05,
"loss": 0.8645,
"step": 17800
},
{
"epoch": 1.0764974741400049,
"grad_norm": 1.5871518850326538,
"learning_rate": 3.206438938336942e-05,
"loss": 0.908,
"step": 17900
},
{
"epoch": 1.0825114265095022,
"grad_norm": 1.4603219032287598,
"learning_rate": 3.19641568438778e-05,
"loss": 0.8714,
"step": 18000
},
{
"epoch": 1.0885253788789993,
"grad_norm": 1.443608283996582,
"learning_rate": 3.186392430438618e-05,
"loss": 0.8879,
"step": 18100
},
{
"epoch": 1.0945393312484966,
"grad_norm": 1.4648326635360718,
"learning_rate": 3.1763691764894554e-05,
"loss": 0.923,
"step": 18200
},
{
"epoch": 1.1005532836179936,
"grad_norm": 1.9082708358764648,
"learning_rate": 3.1663459225402935e-05,
"loss": 0.8898,
"step": 18300
},
{
"epoch": 1.106567235987491,
"grad_norm": 1.740161418914795,
"learning_rate": 3.1563226685911317e-05,
"loss": 0.8913,
"step": 18400
},
{
"epoch": 1.1125811883569883,
"grad_norm": 1.4581352472305298,
"learning_rate": 3.146299414641969e-05,
"loss": 0.9202,
"step": 18500
},
{
"epoch": 1.1185951407264854,
"grad_norm": 1.5199153423309326,
"learning_rate": 3.136276160692808e-05,
"loss": 0.9032,
"step": 18600
},
{
"epoch": 1.1246090930959827,
"grad_norm": 1.4630061388015747,
"learning_rate": 3.1262529067436454e-05,
"loss": 0.8771,
"step": 18700
},
{
"epoch": 1.13062304546548,
"grad_norm": 1.7790659666061401,
"learning_rate": 3.1162296527944835e-05,
"loss": 0.8673,
"step": 18800
},
{
"epoch": 1.136636997834977,
"grad_norm": 1.610372543334961,
"learning_rate": 3.106206398845321e-05,
"loss": 0.9019,
"step": 18900
},
{
"epoch": 1.1426509502044744,
"grad_norm": 1.847385048866272,
"learning_rate": 3.096183144896159e-05,
"loss": 0.8907,
"step": 19000
},
{
"epoch": 1.1486649025739717,
"grad_norm": 1.664432168006897,
"learning_rate": 3.086159890946997e-05,
"loss": 0.8745,
"step": 19100
},
{
"epoch": 1.1546788549434688,
"grad_norm": 1.5932984352111816,
"learning_rate": 3.0761366369978354e-05,
"loss": 0.8697,
"step": 19200
},
{
"epoch": 1.160692807312966,
"grad_norm": 1.4511469602584839,
"learning_rate": 3.0662136155881646e-05,
"loss": 0.8748,
"step": 19300
},
{
"epoch": 1.1667067596824634,
"grad_norm": 1.9143450260162354,
"learning_rate": 3.056190361639003e-05,
"loss": 0.9045,
"step": 19400
},
{
"epoch": 1.1727207120519605,
"grad_norm": 1.3927329778671265,
"learning_rate": 3.046167107689841e-05,
"loss": 0.8927,
"step": 19500
},
{
"epoch": 1.1787346644214578,
"grad_norm": 1.8178434371948242,
"learning_rate": 3.0361438537406783e-05,
"loss": 0.9258,
"step": 19600
},
{
"epoch": 1.1847486167909551,
"grad_norm": 1.6006417274475098,
"learning_rate": 3.0261205997915165e-05,
"loss": 0.886,
"step": 19700
},
{
"epoch": 1.1907625691604522,
"grad_norm": 1.6698856353759766,
"learning_rate": 3.0160973458423543e-05,
"loss": 0.9188,
"step": 19800
},
{
"epoch": 1.1967765215299495,
"grad_norm": 1.5293818712234497,
"learning_rate": 3.0060740918931924e-05,
"loss": 0.896,
"step": 19900
},
{
"epoch": 1.2027904738994466,
"grad_norm": 1.6966098546981812,
"learning_rate": 2.9960508379440305e-05,
"loss": 0.8677,
"step": 20000
},
{
"epoch": 1.2027904738994466,
"eval_cer": 0.7689350483770306,
"eval_loss": 0.8620118498802185,
"eval_runtime": 5134.4813,
"eval_samples_per_second": 3.241,
"eval_steps_per_second": 0.405,
"step": 20000
},
{
"epoch": 1.208804426268944,
"grad_norm": 1.6066211462020874,
"learning_rate": 2.9860275839948683e-05,
"loss": 0.8648,
"step": 20100
},
{
"epoch": 1.2148183786384412,
"grad_norm": 1.4755396842956543,
"learning_rate": 2.9760043300457065e-05,
"loss": 0.8683,
"step": 20200
},
{
"epoch": 1.2208323310079385,
"grad_norm": 1.3547738790512085,
"learning_rate": 2.965981076096544e-05,
"loss": 0.8786,
"step": 20300
},
{
"epoch": 1.2268462833774356,
"grad_norm": 1.6254231929779053,
"learning_rate": 2.955957822147382e-05,
"loss": 0.8758,
"step": 20400
},
{
"epoch": 1.232860235746933,
"grad_norm": 1.5725833177566528,
"learning_rate": 2.94593456819822e-05,
"loss": 0.8831,
"step": 20500
},
{
"epoch": 1.23887418811643,
"grad_norm": 1.6321443319320679,
"learning_rate": 2.935911314249058e-05,
"loss": 0.868,
"step": 20600
},
{
"epoch": 1.2448881404859273,
"grad_norm": 1.544110894203186,
"learning_rate": 2.925888060299896e-05,
"loss": 0.9033,
"step": 20700
},
{
"epoch": 1.2509020928554246,
"grad_norm": 1.5690948963165283,
"learning_rate": 2.915864806350734e-05,
"loss": 0.8736,
"step": 20800
},
{
"epoch": 1.256916045224922,
"grad_norm": 1.3950625658035278,
"learning_rate": 2.905841552401572e-05,
"loss": 0.9012,
"step": 20900
},
{
"epoch": 1.262929997594419,
"grad_norm": 1.4699276685714722,
"learning_rate": 2.8958182984524095e-05,
"loss": 0.8545,
"step": 21000
},
{
"epoch": 1.2689439499639164,
"grad_norm": 1.607750654220581,
"learning_rate": 2.8857950445032476e-05,
"loss": 0.9101,
"step": 21100
},
{
"epoch": 1.2749579023334134,
"grad_norm": 2.1910347938537598,
"learning_rate": 2.8757717905540854e-05,
"loss": 0.8803,
"step": 21200
},
{
"epoch": 1.2809718547029108,
"grad_norm": 1.604390025138855,
"learning_rate": 2.8657485366049236e-05,
"loss": 0.8751,
"step": 21300
},
{
"epoch": 1.286985807072408,
"grad_norm": 1.5971423387527466,
"learning_rate": 2.8557252826557617e-05,
"loss": 0.8543,
"step": 21400
},
{
"epoch": 1.2929997594419051,
"grad_norm": 1.6325972080230713,
"learning_rate": 2.8457020287065995e-05,
"loss": 0.8612,
"step": 21500
},
{
"epoch": 1.2990137118114025,
"grad_norm": 1.7952935695648193,
"learning_rate": 2.8356787747574376e-05,
"loss": 0.8399,
"step": 21600
},
{
"epoch": 1.3050276641808995,
"grad_norm": 1.683236002922058,
"learning_rate": 2.825655520808275e-05,
"loss": 0.8585,
"step": 21700
},
{
"epoch": 1.3110416165503969,
"grad_norm": 1.630666971206665,
"learning_rate": 2.8156322668591136e-05,
"loss": 0.8891,
"step": 21800
},
{
"epoch": 1.3170555689198942,
"grad_norm": 1.7404346466064453,
"learning_rate": 2.805609012909951e-05,
"loss": 0.8514,
"step": 21900
},
{
"epoch": 1.3230695212893915,
"grad_norm": 1.4314298629760742,
"learning_rate": 2.795585758960789e-05,
"loss": 0.8561,
"step": 22000
},
{
"epoch": 1.3290834736588886,
"grad_norm": 1.7691779136657715,
"learning_rate": 2.785562505011627e-05,
"loss": 0.8474,
"step": 22100
},
{
"epoch": 1.3350974260283859,
"grad_norm": 1.7155267000198364,
"learning_rate": 2.775539251062465e-05,
"loss": 0.8289,
"step": 22200
},
{
"epoch": 1.341111378397883,
"grad_norm": 1.7087023258209229,
"learning_rate": 2.7655159971133032e-05,
"loss": 0.8544,
"step": 22300
},
{
"epoch": 1.3471253307673803,
"grad_norm": 1.617749571800232,
"learning_rate": 2.755492743164141e-05,
"loss": 0.8781,
"step": 22400
},
{
"epoch": 1.3531392831368776,
"grad_norm": 1.6493247747421265,
"learning_rate": 2.745469489214979e-05,
"loss": 0.8392,
"step": 22500
},
{
"epoch": 1.359153235506375,
"grad_norm": 1.809634804725647,
"learning_rate": 2.7354462352658166e-05,
"loss": 0.8721,
"step": 22600
},
{
"epoch": 1.365167187875872,
"grad_norm": 1.3698049783706665,
"learning_rate": 2.7254229813166547e-05,
"loss": 0.8533,
"step": 22700
},
{
"epoch": 1.3711811402453693,
"grad_norm": 1.7568131685256958,
"learning_rate": 2.7153997273674925e-05,
"loss": 0.886,
"step": 22800
},
{
"epoch": 1.3771950926148664,
"grad_norm": 1.867412805557251,
"learning_rate": 2.7053764734183307e-05,
"loss": 0.8637,
"step": 22900
},
{
"epoch": 1.3832090449843637,
"grad_norm": 2.0730977058410645,
"learning_rate": 2.6953532194691688e-05,
"loss": 0.8626,
"step": 23000
},
{
"epoch": 1.389222997353861,
"grad_norm": 1.8011558055877686,
"learning_rate": 2.6853299655200066e-05,
"loss": 0.8784,
"step": 23100
},
{
"epoch": 1.3952369497233583,
"grad_norm": 1.6936458349227905,
"learning_rate": 2.6753067115708447e-05,
"loss": 0.8634,
"step": 23200
},
{
"epoch": 1.4012509020928554,
"grad_norm": 1.7492289543151855,
"learning_rate": 2.665383690161174e-05,
"loss": 0.8678,
"step": 23300
},
{
"epoch": 1.4072648544623527,
"grad_norm": 1.8972880840301514,
"learning_rate": 2.655360436212012e-05,
"loss": 0.8939,
"step": 23400
},
{
"epoch": 1.4132788068318498,
"grad_norm": 1.6961406469345093,
"learning_rate": 2.64533718226285e-05,
"loss": 0.8727,
"step": 23500
},
{
"epoch": 1.4192927592013471,
"grad_norm": 1.583854079246521,
"learning_rate": 2.635313928313688e-05,
"loss": 0.8332,
"step": 23600
},
{
"epoch": 1.4253067115708444,
"grad_norm": 1.6541253328323364,
"learning_rate": 2.6252906743645255e-05,
"loss": 0.8798,
"step": 23700
},
{
"epoch": 1.4313206639403415,
"grad_norm": 1.7607979774475098,
"learning_rate": 2.6152674204153636e-05,
"loss": 0.8472,
"step": 23800
},
{
"epoch": 1.4373346163098388,
"grad_norm": 1.5591400861740112,
"learning_rate": 2.605244166466202e-05,
"loss": 0.8355,
"step": 23900
},
{
"epoch": 1.4433485686793361,
"grad_norm": 1.4700669050216675,
"learning_rate": 2.5952209125170395e-05,
"loss": 0.8521,
"step": 24000
},
{
"epoch": 1.4433485686793361,
"eval_cer": 0.7597912847133936,
"eval_loss": 0.8321590423583984,
"eval_runtime": 5084.7673,
"eval_samples_per_second": 3.273,
"eval_steps_per_second": 0.409,
"step": 24000
},
{
"epoch": 1.4493625210488332,
"grad_norm": 1.4155552387237549,
"learning_rate": 2.5851976585678777e-05,
"loss": 0.8797,
"step": 24100
},
{
"epoch": 1.4553764734183305,
"grad_norm": 1.4764596223831177,
"learning_rate": 2.5751744046187155e-05,
"loss": 0.8657,
"step": 24200
},
{
"epoch": 1.4613904257878279,
"grad_norm": 1.463333010673523,
"learning_rate": 2.5651511506695536e-05,
"loss": 0.8746,
"step": 24300
},
{
"epoch": 1.467404378157325,
"grad_norm": 1.5392202138900757,
"learning_rate": 2.555127896720391e-05,
"loss": 0.8512,
"step": 24400
},
{
"epoch": 1.4734183305268223,
"grad_norm": 1.8480241298675537,
"learning_rate": 2.5451046427712295e-05,
"loss": 0.8562,
"step": 24500
},
{
"epoch": 1.4794322828963193,
"grad_norm": 1.7533873319625854,
"learning_rate": 2.5350813888220677e-05,
"loss": 0.8534,
"step": 24600
},
{
"epoch": 1.4854462352658167,
"grad_norm": 1.6647679805755615,
"learning_rate": 2.525058134872905e-05,
"loss": 0.8335,
"step": 24700
},
{
"epoch": 1.491460187635314,
"grad_norm": 1.8899763822555542,
"learning_rate": 2.5150348809237433e-05,
"loss": 0.8486,
"step": 24800
},
{
"epoch": 1.4974741400048113,
"grad_norm": 1.7569955587387085,
"learning_rate": 2.505011626974581e-05,
"loss": 0.8415,
"step": 24900
},
{
"epoch": 1.5034880923743084,
"grad_norm": 1.5141854286193848,
"learning_rate": 2.4949883730254192e-05,
"loss": 0.8107,
"step": 25000
},
{
"epoch": 1.5095020447438057,
"grad_norm": 1.8239057064056396,
"learning_rate": 2.484965119076257e-05,
"loss": 0.8841,
"step": 25100
},
{
"epoch": 1.5155159971133028,
"grad_norm": 1.433118224143982,
"learning_rate": 2.474941865127095e-05,
"loss": 0.8408,
"step": 25200
},
{
"epoch": 1.5215299494828,
"grad_norm": 1.6874032020568848,
"learning_rate": 2.4650188437174247e-05,
"loss": 0.8387,
"step": 25300
},
{
"epoch": 1.5275439018522974,
"grad_norm": 1.4172905683517456,
"learning_rate": 2.4549955897682625e-05,
"loss": 0.8357,
"step": 25400
},
{
"epoch": 1.5335578542217947,
"grad_norm": 1.4848599433898926,
"learning_rate": 2.4449723358191003e-05,
"loss": 0.8451,
"step": 25500
},
{
"epoch": 1.5395718065912918,
"grad_norm": 1.4670342206954956,
"learning_rate": 2.4349490818699384e-05,
"loss": 0.8505,
"step": 25600
},
{
"epoch": 1.5455857589607889,
"grad_norm": 1.5914552211761475,
"learning_rate": 2.4249258279207762e-05,
"loss": 0.8635,
"step": 25700
},
{
"epoch": 1.5515997113302862,
"grad_norm": 1.5550841093063354,
"learning_rate": 2.414902573971614e-05,
"loss": 0.8763,
"step": 25800
},
{
"epoch": 1.5576136636997835,
"grad_norm": 1.5907316207885742,
"learning_rate": 2.4048793200224525e-05,
"loss": 0.8664,
"step": 25900
},
{
"epoch": 1.5636276160692808,
"grad_norm": 1.4494388103485107,
"learning_rate": 2.3948560660732903e-05,
"loss": 0.819,
"step": 26000
},
{
"epoch": 1.5696415684387781,
"grad_norm": 1.599004864692688,
"learning_rate": 2.384832812124128e-05,
"loss": 0.8363,
"step": 26100
},
{
"epoch": 1.5756555208082752,
"grad_norm": 1.887817621231079,
"learning_rate": 2.3748095581749662e-05,
"loss": 0.8845,
"step": 26200
},
{
"epoch": 1.5816694731777723,
"grad_norm": 1.3124005794525146,
"learning_rate": 2.364786304225804e-05,
"loss": 0.858,
"step": 26300
},
{
"epoch": 1.5876834255472696,
"grad_norm": 1.6560554504394531,
"learning_rate": 2.3547630502766418e-05,
"loss": 0.8553,
"step": 26400
},
{
"epoch": 1.593697377916767,
"grad_norm": 1.5678675174713135,
"learning_rate": 2.34473979632748e-05,
"loss": 0.8488,
"step": 26500
},
{
"epoch": 1.5997113302862642,
"grad_norm": 1.4168376922607422,
"learning_rate": 2.334716542378318e-05,
"loss": 0.8263,
"step": 26600
},
{
"epoch": 1.6057252826557615,
"grad_norm": 1.6189205646514893,
"learning_rate": 2.324693288429156e-05,
"loss": 0.8489,
"step": 26700
},
{
"epoch": 1.6117392350252586,
"grad_norm": 1.7498302459716797,
"learning_rate": 2.3146700344799936e-05,
"loss": 0.8069,
"step": 26800
},
{
"epoch": 1.6177531873947557,
"grad_norm": 1.5609160661697388,
"learning_rate": 2.3046467805308318e-05,
"loss": 0.8337,
"step": 26900
},
{
"epoch": 1.623767139764253,
"grad_norm": 1.7673338651657104,
"learning_rate": 2.2946235265816696e-05,
"loss": 0.8264,
"step": 27000
},
{
"epoch": 1.6297810921337503,
"grad_norm": 1.593299150466919,
"learning_rate": 2.2846002726325074e-05,
"loss": 0.828,
"step": 27100
},
{
"epoch": 1.6357950445032476,
"grad_norm": 1.5802645683288574,
"learning_rate": 2.274677251222837e-05,
"loss": 0.8428,
"step": 27200
},
{
"epoch": 1.6418089968727447,
"grad_norm": 1.7584878206253052,
"learning_rate": 2.264653997273675e-05,
"loss": 0.8378,
"step": 27300
},
{
"epoch": 1.647822949242242,
"grad_norm": 1.5360692739486694,
"learning_rate": 2.254630743324513e-05,
"loss": 0.8288,
"step": 27400
},
{
"epoch": 1.6538369016117391,
"grad_norm": 1.5635976791381836,
"learning_rate": 2.244607489375351e-05,
"loss": 0.8208,
"step": 27500
},
{
"epoch": 1.6598508539812364,
"grad_norm": 1.778735876083374,
"learning_rate": 2.2345842354261888e-05,
"loss": 0.847,
"step": 27600
},
{
"epoch": 1.6658648063507338,
"grad_norm": 1.5961335897445679,
"learning_rate": 2.224560981477027e-05,
"loss": 0.8059,
"step": 27700
},
{
"epoch": 1.671878758720231,
"grad_norm": 1.396517038345337,
"learning_rate": 2.2145377275278647e-05,
"loss": 0.8406,
"step": 27800
},
{
"epoch": 1.6778927110897282,
"grad_norm": 1.554319977760315,
"learning_rate": 2.2045144735787025e-05,
"loss": 0.836,
"step": 27900
},
{
"epoch": 1.6839066634592252,
"grad_norm": 1.5663318634033203,
"learning_rate": 2.1944912196295406e-05,
"loss": 0.8386,
"step": 28000
},
{
"epoch": 1.6839066634592252,
"eval_cer": 0.7604478286805494,
"eval_loss": 0.8103429079055786,
"eval_runtime": 5152.8312,
"eval_samples_per_second": 3.229,
"eval_steps_per_second": 0.404,
"step": 28000
},
{
"epoch": 1.6899206158287225,
"grad_norm": 1.7991820573806763,
"learning_rate": 2.1844679656803784e-05,
"loss": 0.8428,
"step": 28100
},
{
"epoch": 1.6959345681982199,
"grad_norm": 1.6566849946975708,
"learning_rate": 2.1744447117312162e-05,
"loss": 0.8331,
"step": 28200
},
{
"epoch": 1.7019485205677172,
"grad_norm": 1.650564432144165,
"learning_rate": 2.1644214577820547e-05,
"loss": 0.8379,
"step": 28300
},
{
"epoch": 1.7079624729372145,
"grad_norm": 1.6355592012405396,
"learning_rate": 2.1543982038328925e-05,
"loss": 0.8576,
"step": 28400
},
{
"epoch": 1.7139764253067116,
"grad_norm": 1.7112095355987549,
"learning_rate": 2.1443749498837303e-05,
"loss": 0.8173,
"step": 28500
},
{
"epoch": 1.7199903776762087,
"grad_norm": 1.7781462669372559,
"learning_rate": 2.1343516959345684e-05,
"loss": 0.8292,
"step": 28600
},
{
"epoch": 1.726004330045706,
"grad_norm": 1.708770513534546,
"learning_rate": 2.1243284419854062e-05,
"loss": 0.8312,
"step": 28700
},
{
"epoch": 1.7320182824152033,
"grad_norm": 1.869710087776184,
"learning_rate": 2.114305188036244e-05,
"loss": 0.8529,
"step": 28800
},
{
"epoch": 1.7380322347847006,
"grad_norm": 1.4506940841674805,
"learning_rate": 2.104281934087082e-05,
"loss": 0.8454,
"step": 28900
},
{
"epoch": 1.744046187154198,
"grad_norm": 1.5264636278152466,
"learning_rate": 2.0942586801379203e-05,
"loss": 0.8281,
"step": 29000
},
{
"epoch": 1.750060139523695,
"grad_norm": 1.9614264965057373,
"learning_rate": 2.084235426188758e-05,
"loss": 0.8328,
"step": 29100
},
{
"epoch": 1.756074091893192,
"grad_norm": 1.4223591089248657,
"learning_rate": 2.074212172239596e-05,
"loss": 0.8142,
"step": 29200
},
{
"epoch": 1.7620880442626894,
"grad_norm": 1.6863304376602173,
"learning_rate": 2.064188918290434e-05,
"loss": 0.8306,
"step": 29300
},
{
"epoch": 1.7681019966321867,
"grad_norm": 1.5096672773361206,
"learning_rate": 2.0541656643412718e-05,
"loss": 0.7964,
"step": 29400
},
{
"epoch": 1.774115949001684,
"grad_norm": 1.9181997776031494,
"learning_rate": 2.0441424103921096e-05,
"loss": 0.8221,
"step": 29500
},
{
"epoch": 1.780129901371181,
"grad_norm": 1.9824730157852173,
"learning_rate": 2.0341191564429478e-05,
"loss": 0.8379,
"step": 29600
},
{
"epoch": 1.7861438537406784,
"grad_norm": 1.4306327104568481,
"learning_rate": 2.0240959024937855e-05,
"loss": 0.805,
"step": 29700
},
{
"epoch": 1.7921578061101755,
"grad_norm": 1.6249910593032837,
"learning_rate": 2.0140726485446237e-05,
"loss": 0.8145,
"step": 29800
},
{
"epoch": 1.7981717584796728,
"grad_norm": 1.551161766052246,
"learning_rate": 2.0040493945954618e-05,
"loss": 0.8436,
"step": 29900
},
{
"epoch": 1.8041857108491701,
"grad_norm": 1.5218690633773804,
"learning_rate": 1.9940261406462996e-05,
"loss": 0.8181,
"step": 30000
},
{
"epoch": 1.8101996632186674,
"grad_norm": 1.5932899713516235,
"learning_rate": 1.9840028866971374e-05,
"loss": 0.8478,
"step": 30100
},
{
"epoch": 1.8162136155881645,
"grad_norm": 1.4991642236709595,
"learning_rate": 1.9739796327479752e-05,
"loss": 0.8254,
"step": 30200
},
{
"epoch": 1.8222275679576616,
"grad_norm": 1.647438883781433,
"learning_rate": 1.9639563787988133e-05,
"loss": 0.8342,
"step": 30300
},
{
"epoch": 1.828241520327159,
"grad_norm": 1.6653351783752441,
"learning_rate": 1.953933124849651e-05,
"loss": 0.8249,
"step": 30400
},
{
"epoch": 1.8342554726966562,
"grad_norm": 1.6969921588897705,
"learning_rate": 1.943909870900489e-05,
"loss": 0.8544,
"step": 30500
},
{
"epoch": 1.8402694250661535,
"grad_norm": 1.7201420068740845,
"learning_rate": 1.9338866169513274e-05,
"loss": 0.8323,
"step": 30600
},
{
"epoch": 1.8462833774356509,
"grad_norm": 1.7479013204574585,
"learning_rate": 1.9238633630021652e-05,
"loss": 0.8307,
"step": 30700
},
{
"epoch": 1.852297329805148,
"grad_norm": 1.6966118812561035,
"learning_rate": 1.913840109053003e-05,
"loss": 0.8291,
"step": 30800
},
{
"epoch": 1.858311282174645,
"grad_norm": 1.6381275653839111,
"learning_rate": 1.903816855103841e-05,
"loss": 0.8121,
"step": 30900
},
{
"epoch": 1.8643252345441423,
"grad_norm": 1.4601831436157227,
"learning_rate": 1.893793601154679e-05,
"loss": 0.8174,
"step": 31000
},
{
"epoch": 1.8703391869136397,
"grad_norm": 1.8310879468917847,
"learning_rate": 1.8837703472055167e-05,
"loss": 0.8023,
"step": 31100
},
{
"epoch": 1.876353139283137,
"grad_norm": 1.4689913988113403,
"learning_rate": 1.8738473257958463e-05,
"loss": 0.8035,
"step": 31200
},
{
"epoch": 1.8823670916526343,
"grad_norm": 1.4330099821090698,
"learning_rate": 1.8638240718466844e-05,
"loss": 0.8185,
"step": 31300
},
{
"epoch": 1.8883810440221314,
"grad_norm": 1.8419737815856934,
"learning_rate": 1.8538008178975225e-05,
"loss": 0.8144,
"step": 31400
},
{
"epoch": 1.8943949963916284,
"grad_norm": 1.2664531469345093,
"learning_rate": 1.8437775639483603e-05,
"loss": 0.816,
"step": 31500
},
{
"epoch": 1.9004089487611258,
"grad_norm": 1.6704432964324951,
"learning_rate": 1.833754309999198e-05,
"loss": 0.817,
"step": 31600
},
{
"epoch": 1.906422901130623,
"grad_norm": 1.7487777471542358,
"learning_rate": 1.8237310560500363e-05,
"loss": 0.8041,
"step": 31700
},
{
"epoch": 1.9124368535001204,
"grad_norm": 1.2405571937561035,
"learning_rate": 1.813707802100874e-05,
"loss": 0.825,
"step": 31800
},
{
"epoch": 1.9184508058696175,
"grad_norm": 1.543731689453125,
"learning_rate": 1.803684548151712e-05,
"loss": 0.8147,
"step": 31900
},
{
"epoch": 1.9244647582391148,
"grad_norm": 1.5452948808670044,
"learning_rate": 1.79366129420255e-05,
"loss": 0.7973,
"step": 32000
},
{
"epoch": 1.9244647582391148,
"eval_cer": 0.7489049827996835,
"eval_loss": 0.7830217480659485,
"eval_runtime": 5092.4712,
"eval_samples_per_second": 3.268,
"eval_steps_per_second": 0.408,
"step": 32000
},
{
"epoch": 1.9304787106086119,
"grad_norm": 1.640507459640503,
"learning_rate": 1.7836380402533878e-05,
"loss": 0.831,
"step": 32100
},
{
"epoch": 1.9364926629781092,
"grad_norm": 1.919505000114441,
"learning_rate": 1.773614786304226e-05,
"loss": 0.8216,
"step": 32200
},
{
"epoch": 1.9425066153476065,
"grad_norm": 1.5534350872039795,
"learning_rate": 1.763591532355064e-05,
"loss": 0.8157,
"step": 32300
},
{
"epoch": 1.9485205677171038,
"grad_norm": 1.5943905115127563,
"learning_rate": 1.753568278405902e-05,
"loss": 0.8026,
"step": 32400
},
{
"epoch": 1.954534520086601,
"grad_norm": 1.788720965385437,
"learning_rate": 1.7435450244567397e-05,
"loss": 0.7798,
"step": 32500
},
{
"epoch": 1.9605484724560982,
"grad_norm": 1.707412600517273,
"learning_rate": 1.7335217705075778e-05,
"loss": 0.8125,
"step": 32600
},
{
"epoch": 1.9665624248255953,
"grad_norm": 1.6318702697753906,
"learning_rate": 1.7234985165584156e-05,
"loss": 0.8184,
"step": 32700
},
{
"epoch": 1.9725763771950926,
"grad_norm": 1.6191486120224,
"learning_rate": 1.7134752626092534e-05,
"loss": 0.8056,
"step": 32800
},
{
"epoch": 1.97859032956459,
"grad_norm": 1.6070526838302612,
"learning_rate": 1.7034520086600915e-05,
"loss": 0.8134,
"step": 32900
},
{
"epoch": 1.9846042819340872,
"grad_norm": 1.3369784355163574,
"learning_rate": 1.6934287547109297e-05,
"loss": 0.8086,
"step": 33000
},
{
"epoch": 1.9906182343035843,
"grad_norm": 1.7080943584442139,
"learning_rate": 1.6834055007617674e-05,
"loss": 0.8304,
"step": 33100
},
{
"epoch": 1.9966321866730814,
"grad_norm": 1.3051142692565918,
"learning_rate": 1.673482479352097e-05,
"loss": 0.778,
"step": 33200
},
{
"epoch": 2.0026461390425787,
"grad_norm": 1.9086428880691528,
"learning_rate": 1.6634592254029348e-05,
"loss": 0.7972,
"step": 33300
},
{
"epoch": 2.008660091412076,
"grad_norm": 1.6656805276870728,
"learning_rate": 1.653435971453773e-05,
"loss": 0.7701,
"step": 33400
},
{
"epoch": 2.0146740437815733,
"grad_norm": 1.5769175291061401,
"learning_rate": 1.6434127175046107e-05,
"loss": 0.7602,
"step": 33500
},
{
"epoch": 2.0206879961510706,
"grad_norm": 1.6363704204559326,
"learning_rate": 1.6333894635554485e-05,
"loss": 0.7826,
"step": 33600
},
{
"epoch": 2.0267019485205675,
"grad_norm": 1.784424066543579,
"learning_rate": 1.6233662096062867e-05,
"loss": 0.7805,
"step": 33700
},
{
"epoch": 2.032715900890065,
"grad_norm": 1.5795265436172485,
"learning_rate": 1.6133429556571248e-05,
"loss": 0.7289,
"step": 33800
},
{
"epoch": 2.038729853259562,
"grad_norm": 1.382318377494812,
"learning_rate": 1.6033197017079626e-05,
"loss": 0.7869,
"step": 33900
},
{
"epoch": 2.0447438056290594,
"grad_norm": 2.0357506275177,
"learning_rate": 1.5932964477588004e-05,
"loss": 0.7477,
"step": 34000
},
{
"epoch": 2.0507577579985568,
"grad_norm": 1.6945205926895142,
"learning_rate": 1.5832731938096385e-05,
"loss": 0.7582,
"step": 34100
},
{
"epoch": 2.056771710368054,
"grad_norm": 1.4343385696411133,
"learning_rate": 1.5732499398604763e-05,
"loss": 0.7536,
"step": 34200
},
{
"epoch": 2.062785662737551,
"grad_norm": 1.7783600091934204,
"learning_rate": 1.563226685911314e-05,
"loss": 0.8331,
"step": 34300
},
{
"epoch": 2.0687996151070482,
"grad_norm": 1.8440674543380737,
"learning_rate": 1.5532034319621522e-05,
"loss": 0.7352,
"step": 34400
},
{
"epoch": 2.0748135674765456,
"grad_norm": 1.8098615407943726,
"learning_rate": 1.54318017801299e-05,
"loss": 0.7951,
"step": 34500
},
{
"epoch": 2.080827519846043,
"grad_norm": 1.521584391593933,
"learning_rate": 1.5331569240638282e-05,
"loss": 0.7684,
"step": 34600
},
{
"epoch": 2.08684147221554,
"grad_norm": 1.738142490386963,
"learning_rate": 1.5231336701146661e-05,
"loss": 0.7589,
"step": 34700
},
{
"epoch": 2.0928554245850375,
"grad_norm": 1.5031851530075073,
"learning_rate": 1.5131104161655041e-05,
"loss": 0.7508,
"step": 34800
},
{
"epoch": 2.0988693769545343,
"grad_norm": 1.8703136444091797,
"learning_rate": 1.503087162216342e-05,
"loss": 0.803,
"step": 34900
},
{
"epoch": 2.1048833293240317,
"grad_norm": 1.6643913984298706,
"learning_rate": 1.4930639082671799e-05,
"loss": 0.7689,
"step": 35000
},
{
"epoch": 2.110897281693529,
"grad_norm": 1.4073503017425537,
"learning_rate": 1.4830406543180178e-05,
"loss": 0.7575,
"step": 35100
},
{
"epoch": 2.1169112340630263,
"grad_norm": 1.6007989645004272,
"learning_rate": 1.4731176329083474e-05,
"loss": 0.7317,
"step": 35200
},
{
"epoch": 2.1229251864325236,
"grad_norm": 1.7965283393859863,
"learning_rate": 1.4630943789591854e-05,
"loss": 0.7142,
"step": 35300
},
{
"epoch": 2.1289391388020205,
"grad_norm": 1.3655446767807007,
"learning_rate": 1.4530711250100235e-05,
"loss": 0.7613,
"step": 35400
},
{
"epoch": 2.1349530911715178,
"grad_norm": 1.779159426689148,
"learning_rate": 1.4430478710608613e-05,
"loss": 0.7827,
"step": 35500
},
{
"epoch": 2.140967043541015,
"grad_norm": 1.9253307580947876,
"learning_rate": 1.4330246171116993e-05,
"loss": 0.7899,
"step": 35600
},
{
"epoch": 2.1469809959105124,
"grad_norm": 1.3449054956436157,
"learning_rate": 1.4230013631625372e-05,
"loss": 0.7494,
"step": 35700
},
{
"epoch": 2.1529949482800097,
"grad_norm": 1.9044090509414673,
"learning_rate": 1.412978109213375e-05,
"loss": 0.7714,
"step": 35800
},
{
"epoch": 2.159008900649507,
"grad_norm": 1.674017071723938,
"learning_rate": 1.402954855264213e-05,
"loss": 0.7478,
"step": 35900
},
{
"epoch": 2.1650228530190043,
"grad_norm": 1.8109982013702393,
"learning_rate": 1.392931601315051e-05,
"loss": 0.7541,
"step": 36000
},
{
"epoch": 2.1650228530190043,
"eval_cer": 0.7396310458108724,
"eval_loss": 0.7524659633636475,
"eval_runtime": 5154.2928,
"eval_samples_per_second": 3.228,
"eval_steps_per_second": 0.404,
"step": 36000
},
{
"epoch": 2.171036805388501,
"grad_norm": 1.316389560699463,
"learning_rate": 1.3829083473658887e-05,
"loss": 0.7647,
"step": 36100
},
{
"epoch": 2.1770507577579985,
"grad_norm": 1.7067075967788696,
"learning_rate": 1.372885093416727e-05,
"loss": 0.7635,
"step": 36200
},
{
"epoch": 2.183064710127496,
"grad_norm": 1.8793973922729492,
"learning_rate": 1.3628618394675648e-05,
"loss": 0.7313,
"step": 36300
},
{
"epoch": 2.189078662496993,
"grad_norm": 1.769338607788086,
"learning_rate": 1.3528385855184028e-05,
"loss": 0.7573,
"step": 36400
},
{
"epoch": 2.1950926148664904,
"grad_norm": 1.6032990217208862,
"learning_rate": 1.3428153315692408e-05,
"loss": 0.76,
"step": 36500
},
{
"epoch": 2.2011065672359873,
"grad_norm": 1.5864907503128052,
"learning_rate": 1.3327920776200786e-05,
"loss": 0.7587,
"step": 36600
},
{
"epoch": 2.2071205196054846,
"grad_norm": 1.2785674333572388,
"learning_rate": 1.3227688236709165e-05,
"loss": 0.7403,
"step": 36700
},
{
"epoch": 2.213134471974982,
"grad_norm": 1.4437572956085205,
"learning_rate": 1.3127455697217545e-05,
"loss": 0.7358,
"step": 36800
},
{
"epoch": 2.2191484243444792,
"grad_norm": 1.8562610149383545,
"learning_rate": 1.3027223157725923e-05,
"loss": 0.7429,
"step": 36900
},
{
"epoch": 2.2251623767139765,
"grad_norm": 1.6878858804702759,
"learning_rate": 1.2926990618234306e-05,
"loss": 0.7526,
"step": 37000
},
{
"epoch": 2.231176329083474,
"grad_norm": 1.9118757247924805,
"learning_rate": 1.2826758078742684e-05,
"loss": 0.7513,
"step": 37100
},
{
"epoch": 2.2371902814529707,
"grad_norm": 1.3607146739959717,
"learning_rate": 1.272752786464598e-05,
"loss": 0.747,
"step": 37200
},
{
"epoch": 2.243204233822468,
"grad_norm": 1.8414541482925415,
"learning_rate": 1.2627295325154359e-05,
"loss": 0.757,
"step": 37300
},
{
"epoch": 2.2492181861919653,
"grad_norm": 1.5014030933380127,
"learning_rate": 1.2527062785662739e-05,
"loss": 0.7704,
"step": 37400
},
{
"epoch": 2.2552321385614627,
"grad_norm": 1.850203514099121,
"learning_rate": 1.2426830246171117e-05,
"loss": 0.7532,
"step": 37500
},
{
"epoch": 2.26124609093096,
"grad_norm": 1.9308381080627441,
"learning_rate": 1.2326597706679498e-05,
"loss": 0.7658,
"step": 37600
},
{
"epoch": 2.267260043300457,
"grad_norm": 1.8409243822097778,
"learning_rate": 1.2226365167187876e-05,
"loss": 0.7307,
"step": 37700
},
{
"epoch": 2.273273995669954,
"grad_norm": 1.7760223150253296,
"learning_rate": 1.2126132627696256e-05,
"loss": 0.7548,
"step": 37800
},
{
"epoch": 2.2792879480394515,
"grad_norm": 1.4862762689590454,
"learning_rate": 1.2025900088204635e-05,
"loss": 0.7196,
"step": 37900
},
{
"epoch": 2.2853019004089488,
"grad_norm": 1.6604909896850586,
"learning_rate": 1.1925667548713015e-05,
"loss": 0.7306,
"step": 38000
},
{
"epoch": 2.291315852778446,
"grad_norm": 1.6279034614562988,
"learning_rate": 1.1825435009221395e-05,
"loss": 0.7683,
"step": 38100
},
{
"epoch": 2.2973298051479434,
"grad_norm": 1.5816621780395508,
"learning_rate": 1.1725202469729774e-05,
"loss": 0.723,
"step": 38200
},
{
"epoch": 2.3033437575174407,
"grad_norm": 1.7849699258804321,
"learning_rate": 1.1624969930238152e-05,
"loss": 0.7831,
"step": 38300
},
{
"epoch": 2.3093577098869376,
"grad_norm": 1.851671814918518,
"learning_rate": 1.1524737390746534e-05,
"loss": 0.7162,
"step": 38400
},
{
"epoch": 2.315371662256435,
"grad_norm": 1.5026549100875854,
"learning_rate": 1.1424504851254912e-05,
"loss": 0.7803,
"step": 38500
},
{
"epoch": 2.321385614625932,
"grad_norm": 1.6620761156082153,
"learning_rate": 1.1324272311763291e-05,
"loss": 0.733,
"step": 38600
},
{
"epoch": 2.3273995669954295,
"grad_norm": 1.2507511377334595,
"learning_rate": 1.1224039772271671e-05,
"loss": 0.733,
"step": 38700
},
{
"epoch": 2.333413519364927,
"grad_norm": 1.938541293144226,
"learning_rate": 1.112380723278005e-05,
"loss": 0.7499,
"step": 38800
},
{
"epoch": 2.3394274717344237,
"grad_norm": 1.796823263168335,
"learning_rate": 1.102357469328843e-05,
"loss": 0.7399,
"step": 38900
},
{
"epoch": 2.345441424103921,
"grad_norm": 1.834004521369934,
"learning_rate": 1.0923342153796808e-05,
"loss": 0.7322,
"step": 39000
},
{
"epoch": 2.3514553764734183,
"grad_norm": 1.7822822332382202,
"learning_rate": 1.0823109614305188e-05,
"loss": 0.7315,
"step": 39100
},
{
"epoch": 2.3574693288429156,
"grad_norm": 1.7883449792861938,
"learning_rate": 1.0722877074813569e-05,
"loss": 0.7205,
"step": 39200
},
{
"epoch": 2.363483281212413,
"grad_norm": 1.966545581817627,
"learning_rate": 1.0623646860716865e-05,
"loss": 0.768,
"step": 39300
},
{
"epoch": 2.3694972335819102,
"grad_norm": 1.722288966178894,
"learning_rate": 1.0523414321225243e-05,
"loss": 0.7554,
"step": 39400
},
{
"epoch": 2.375511185951407,
"grad_norm": 1.7346769571304321,
"learning_rate": 1.0423181781733622e-05,
"loss": 0.7158,
"step": 39500
},
{
"epoch": 2.3815251383209044,
"grad_norm": 1.603703498840332,
"learning_rate": 1.0322949242242002e-05,
"loss": 0.7467,
"step": 39600
},
{
"epoch": 2.3875390906904017,
"grad_norm": 1.8487290143966675,
"learning_rate": 1.0222716702750382e-05,
"loss": 0.7492,
"step": 39700
},
{
"epoch": 2.393553043059899,
"grad_norm": 1.7814853191375732,
"learning_rate": 1.0122484163258761e-05,
"loss": 0.7153,
"step": 39800
},
{
"epoch": 2.3995669954293963,
"grad_norm": 1.745309591293335,
"learning_rate": 1.002225162376714e-05,
"loss": 0.7417,
"step": 39900
},
{
"epoch": 2.405580947798893,
"grad_norm": 1.8568013906478882,
"learning_rate": 9.92201908427552e-06,
"loss": 0.7176,
"step": 40000
},
{
"epoch": 2.405580947798893,
"eval_cer": 0.7346033545787555,
"eval_loss": 0.7291049957275391,
"eval_runtime": 5180.4578,
"eval_samples_per_second": 3.212,
"eval_steps_per_second": 0.402,
"step": 40000
},
{
"epoch": 2.4115949001683905,
"grad_norm": 2.15785813331604,
"learning_rate": 9.8217865447839e-06,
"loss": 0.7234,
"step": 40100
},
{
"epoch": 2.417608852537888,
"grad_norm": 1.7856571674346924,
"learning_rate": 9.721554005292278e-06,
"loss": 0.7176,
"step": 40200
},
{
"epoch": 2.423622804907385,
"grad_norm": 1.682504415512085,
"learning_rate": 9.621321465800658e-06,
"loss": 0.7231,
"step": 40300
},
{
"epoch": 2.4296367572768824,
"grad_norm": 1.3437491655349731,
"learning_rate": 9.521088926309037e-06,
"loss": 0.7232,
"step": 40400
},
{
"epoch": 2.4356507096463798,
"grad_norm": 1.6993297338485718,
"learning_rate": 9.420856386817417e-06,
"loss": 0.7539,
"step": 40500
},
{
"epoch": 2.441664662015877,
"grad_norm": 1.942014455795288,
"learning_rate": 9.320623847325797e-06,
"loss": 0.7519,
"step": 40600
},
{
"epoch": 2.447678614385374,
"grad_norm": 1.7039834260940552,
"learning_rate": 9.220391307834175e-06,
"loss": 0.7362,
"step": 40700
},
{
"epoch": 2.4536925667548712,
"grad_norm": 1.7124940156936646,
"learning_rate": 9.120158768342556e-06,
"loss": 0.7457,
"step": 40800
},
{
"epoch": 2.4597065191243686,
"grad_norm": 1.910249948501587,
"learning_rate": 9.019926228850934e-06,
"loss": 0.7236,
"step": 40900
},
{
"epoch": 2.465720471493866,
"grad_norm": 1.5514588356018066,
"learning_rate": 8.919693689359314e-06,
"loss": 0.721,
"step": 41000
},
{
"epoch": 2.471734423863363,
"grad_norm": 1.744598150253296,
"learning_rate": 8.819461149867693e-06,
"loss": 0.7008,
"step": 41100
},
{
"epoch": 2.47774837623286,
"grad_norm": 1.7827790975570679,
"learning_rate": 8.719228610376073e-06,
"loss": 0.7196,
"step": 41200
},
{
"epoch": 2.4837623286023573,
"grad_norm": 1.7352166175842285,
"learning_rate": 8.619998396279369e-06,
"loss": 0.7322,
"step": 41300
},
{
"epoch": 2.4897762809718547,
"grad_norm": 1.7378534078598022,
"learning_rate": 8.519765856787748e-06,
"loss": 0.7434,
"step": 41400
},
{
"epoch": 2.495790233341352,
"grad_norm": 1.712098240852356,
"learning_rate": 8.419533317296128e-06,
"loss": 0.6939,
"step": 41500
},
{
"epoch": 2.5018041857108493,
"grad_norm": 1.7368323802947998,
"learning_rate": 8.319300777804506e-06,
"loss": 0.726,
"step": 41600
},
{
"epoch": 2.5078181380803466,
"grad_norm": 1.7819844484329224,
"learning_rate": 8.219068238312887e-06,
"loss": 0.7271,
"step": 41700
},
{
"epoch": 2.513832090449844,
"grad_norm": 1.892824411392212,
"learning_rate": 8.118835698821265e-06,
"loss": 0.7146,
"step": 41800
},
{
"epoch": 2.5198460428193408,
"grad_norm": 1.8355358839035034,
"learning_rate": 8.018603159329645e-06,
"loss": 0.7276,
"step": 41900
},
{
"epoch": 2.525859995188838,
"grad_norm": 1.428154468536377,
"learning_rate": 7.918370619838024e-06,
"loss": 0.726,
"step": 42000
},
{
"epoch": 2.5318739475583354,
"grad_norm": 1.8524224758148193,
"learning_rate": 7.818138080346404e-06,
"loss": 0.7178,
"step": 42100
},
{
"epoch": 2.5378878999278327,
"grad_norm": 2.0265612602233887,
"learning_rate": 7.717905540854784e-06,
"loss": 0.714,
"step": 42200
},
{
"epoch": 2.5439018522973296,
"grad_norm": 1.721408486366272,
"learning_rate": 7.6176730013631625e-06,
"loss": 0.7319,
"step": 42300
},
{
"epoch": 2.549915804666827,
"grad_norm": 1.725827932357788,
"learning_rate": 7.517440461871543e-06,
"loss": 0.7663,
"step": 42400
},
{
"epoch": 2.555929757036324,
"grad_norm": 1.6882712841033936,
"learning_rate": 7.417207922379922e-06,
"loss": 0.7191,
"step": 42500
},
{
"epoch": 2.5619437094058215,
"grad_norm": 1.743870496749878,
"learning_rate": 7.3169753828883015e-06,
"loss": 0.7372,
"step": 42600
},
{
"epoch": 2.567957661775319,
"grad_norm": 1.8450312614440918,
"learning_rate": 7.21674284339668e-06,
"loss": 0.6971,
"step": 42700
},
{
"epoch": 2.573971614144816,
"grad_norm": 1.6685088872909546,
"learning_rate": 7.116510303905061e-06,
"loss": 0.7331,
"step": 42800
},
{
"epoch": 2.5799855665143134,
"grad_norm": 1.406771183013916,
"learning_rate": 7.01627776441344e-06,
"loss": 0.7292,
"step": 42900
},
{
"epoch": 2.5859995188838103,
"grad_norm": 1.750435471534729,
"learning_rate": 6.916045224921818e-06,
"loss": 0.7298,
"step": 43000
},
{
"epoch": 2.5920134712533076,
"grad_norm": 1.4334131479263306,
"learning_rate": 6.815812685430198e-06,
"loss": 0.7332,
"step": 43100
},
{
"epoch": 2.598027423622805,
"grad_norm": 1.4097541570663452,
"learning_rate": 6.7155801459385786e-06,
"loss": 0.7183,
"step": 43200
},
{
"epoch": 2.6040413759923022,
"grad_norm": 1.5977458953857422,
"learning_rate": 6.616349931841874e-06,
"loss": 0.7227,
"step": 43300
},
{
"epoch": 2.610055328361799,
"grad_norm": 1.766050100326538,
"learning_rate": 6.516117392350253e-06,
"loss": 0.7275,
"step": 43400
},
{
"epoch": 2.6160692807312964,
"grad_norm": 1.6626147031784058,
"learning_rate": 6.415884852858632e-06,
"loss": 0.7101,
"step": 43500
},
{
"epoch": 2.6220832331007937,
"grad_norm": 2.104124069213867,
"learning_rate": 6.315652313367011e-06,
"loss": 0.7267,
"step": 43600
},
{
"epoch": 2.628097185470291,
"grad_norm": 1.66257905960083,
"learning_rate": 6.215419773875391e-06,
"loss": 0.6873,
"step": 43700
},
{
"epoch": 2.6341111378397883,
"grad_norm": 2.1178150177001953,
"learning_rate": 6.115187234383771e-06,
"loss": 0.7533,
"step": 43800
},
{
"epoch": 2.6401250902092857,
"grad_norm": 1.5612884759902954,
"learning_rate": 6.0149546948921495e-06,
"loss": 0.6981,
"step": 43900
},
{
"epoch": 2.646139042578783,
"grad_norm": 1.9699441194534302,
"learning_rate": 5.914722155400529e-06,
"loss": 0.7477,
"step": 44000
},
{
"epoch": 2.646139042578783,
"eval_cer": 0.7222449991747579,
"eval_loss": 0.7039721608161926,
"eval_runtime": 5169.4179,
"eval_samples_per_second": 3.219,
"eval_steps_per_second": 0.402,
"step": 44000
},
{
"epoch": 2.6521529949482803,
"grad_norm": 1.6843528747558594,
"learning_rate": 5.814489615908909e-06,
"loss": 0.7235,
"step": 44100
},
{
"epoch": 2.658166947317777,
"grad_norm": 1.4490200281143188,
"learning_rate": 5.7142570764172885e-06,
"loss": 0.7022,
"step": 44200
},
{
"epoch": 2.6641808996872745,
"grad_norm": 1.6219418048858643,
"learning_rate": 5.614024536925667e-06,
"loss": 0.7215,
"step": 44300
},
{
"epoch": 2.6701948520567718,
"grad_norm": 1.7598456144332886,
"learning_rate": 5.513791997434047e-06,
"loss": 0.6884,
"step": 44400
},
{
"epoch": 2.676208804426269,
"grad_norm": 1.4096354246139526,
"learning_rate": 5.413559457942427e-06,
"loss": 0.7142,
"step": 44500
},
{
"epoch": 2.682222756795766,
"grad_norm": 1.890046238899231,
"learning_rate": 5.313326918450806e-06,
"loss": 0.7346,
"step": 44600
},
{
"epoch": 2.6882367091652632,
"grad_norm": 1.9413307905197144,
"learning_rate": 5.213094378959186e-06,
"loss": 0.688,
"step": 44700
},
{
"epoch": 2.6942506615347606,
"grad_norm": 1.8647221326828003,
"learning_rate": 5.112861839467565e-06,
"loss": 0.732,
"step": 44800
},
{
"epoch": 2.700264613904258,
"grad_norm": 1.6978052854537964,
"learning_rate": 5.012629299975944e-06,
"loss": 0.7075,
"step": 44900
},
{
"epoch": 2.706278566273755,
"grad_norm": 1.3552030324935913,
"learning_rate": 4.912396760484324e-06,
"loss": 0.7074,
"step": 45000
},
{
"epoch": 2.7122925186432525,
"grad_norm": 1.762635350227356,
"learning_rate": 4.812164220992704e-06,
"loss": 0.7219,
"step": 45100
},
{
"epoch": 2.71830647101275,
"grad_norm": 1.7568144798278809,
"learning_rate": 4.7119316815010824e-06,
"loss": 0.6925,
"step": 45200
},
{
"epoch": 2.7243204233822467,
"grad_norm": 1.5333248376846313,
"learning_rate": 4.612701467404378e-06,
"loss": 0.7133,
"step": 45300
},
{
"epoch": 2.730334375751744,
"grad_norm": 1.8916515111923218,
"learning_rate": 4.512468927912758e-06,
"loss": 0.717,
"step": 45400
},
{
"epoch": 2.7363483281212413,
"grad_norm": 1.8751533031463623,
"learning_rate": 4.412236388421137e-06,
"loss": 0.7306,
"step": 45500
},
{
"epoch": 2.7423622804907386,
"grad_norm": 1.8414188623428345,
"learning_rate": 4.312003848929517e-06,
"loss": 0.7241,
"step": 45600
},
{
"epoch": 2.748376232860236,
"grad_norm": 1.4498494863510132,
"learning_rate": 4.211771309437896e-06,
"loss": 0.7358,
"step": 45700
},
{
"epoch": 2.754390185229733,
"grad_norm": 1.519124984741211,
"learning_rate": 4.111538769946275e-06,
"loss": 0.7059,
"step": 45800
},
{
"epoch": 2.76040413759923,
"grad_norm": 1.9624022245407104,
"learning_rate": 4.011306230454655e-06,
"loss": 0.7185,
"step": 45900
},
{
"epoch": 2.7664180899687274,
"grad_norm": 1.7385069131851196,
"learning_rate": 3.911073690963035e-06,
"loss": 0.7065,
"step": 46000
},
{
"epoch": 2.7724320423382247,
"grad_norm": 1.5801081657409668,
"learning_rate": 3.8108411514714135e-06,
"loss": 0.7242,
"step": 46100
},
{
"epoch": 2.778445994707722,
"grad_norm": 2.89750337600708,
"learning_rate": 3.710608611979793e-06,
"loss": 0.7023,
"step": 46200
},
{
"epoch": 2.7844599470772193,
"grad_norm": 1.7838506698608398,
"learning_rate": 3.6103760724881724e-06,
"loss": 0.709,
"step": 46300
},
{
"epoch": 2.7904738994467166,
"grad_norm": 1.7187174558639526,
"learning_rate": 3.510143532996552e-06,
"loss": 0.7292,
"step": 46400
},
{
"epoch": 2.7964878518162135,
"grad_norm": 1.692230463027954,
"learning_rate": 3.4099109935049313e-06,
"loss": 0.6929,
"step": 46500
},
{
"epoch": 2.802501804185711,
"grad_norm": 1.8588602542877197,
"learning_rate": 3.309678454013311e-06,
"loss": 0.7182,
"step": 46600
},
{
"epoch": 2.808515756555208,
"grad_norm": 1.7374218702316284,
"learning_rate": 3.20944591452169e-06,
"loss": 0.6878,
"step": 46700
},
{
"epoch": 2.8145297089247054,
"grad_norm": 1.6396222114562988,
"learning_rate": 3.10921337503007e-06,
"loss": 0.7035,
"step": 46800
},
{
"epoch": 2.8205436612942023,
"grad_norm": 1.4402636289596558,
"learning_rate": 3.0089808355384495e-06,
"loss": 0.7131,
"step": 46900
},
{
"epoch": 2.8265576136636996,
"grad_norm": 2.006782293319702,
"learning_rate": 2.9087482960468287e-06,
"loss": 0.7222,
"step": 47000
},
{
"epoch": 2.832571566033197,
"grad_norm": 1.543579339981079,
"learning_rate": 2.8085157565552084e-06,
"loss": 0.683,
"step": 47100
},
{
"epoch": 2.8385855184026942,
"grad_norm": 1.7819281816482544,
"learning_rate": 2.7082832170635876e-06,
"loss": 0.7095,
"step": 47200
},
{
"epoch": 2.8445994707721916,
"grad_norm": 1.6801820993423462,
"learning_rate": 2.608050677571967e-06,
"loss": 0.7021,
"step": 47300
},
{
"epoch": 2.850613423141689,
"grad_norm": 1.8617347478866577,
"learning_rate": 2.508820463475263e-06,
"loss": 0.7094,
"step": 47400
},
{
"epoch": 2.856627375511186,
"grad_norm": 1.8291690349578857,
"learning_rate": 2.408587923983642e-06,
"loss": 0.7004,
"step": 47500
},
{
"epoch": 2.862641327880683,
"grad_norm": 1.8136513233184814,
"learning_rate": 2.3083553844920213e-06,
"loss": 0.6936,
"step": 47600
},
{
"epoch": 2.8686552802501804,
"grad_norm": 2.1329762935638428,
"learning_rate": 2.208122845000401e-06,
"loss": 0.6904,
"step": 47700
},
{
"epoch": 2.8746692326196777,
"grad_norm": 1.9445267915725708,
"learning_rate": 2.1078903055087806e-06,
"loss": 0.6832,
"step": 47800
},
{
"epoch": 2.880683184989175,
"grad_norm": 1.6533479690551758,
"learning_rate": 2.0076577660171602e-06,
"loss": 0.6754,
"step": 47900
},
{
"epoch": 2.8866971373586723,
"grad_norm": 2.0631463527679443,
"learning_rate": 1.9074252265255394e-06,
"loss": 0.6882,
"step": 48000
},
{
"epoch": 2.8866971373586723,
"eval_cer": 0.7184241484900502,
"eval_loss": 0.6888573169708252,
"eval_runtime": 5167.0943,
"eval_samples_per_second": 3.22,
"eval_steps_per_second": 0.403,
"step": 48000
}
],
"logging_steps": 100,
"max_steps": 49884,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 4000,
"total_flos": 4.985333650921882e+19,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}