|
{ |
|
"best_metric": 0.28981047871583737, |
|
"best_model_checkpoint": "/content/drive/MyDrive/model_outputs/trial_4/checkpoint-1167", |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 1167, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00856898029134533, |
|
"grad_norm": 12.96240520477295, |
|
"learning_rate": 6.233595156363815e-07, |
|
"loss": 2.1941, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01713796058269066, |
|
"grad_norm": 15.678441047668457, |
|
"learning_rate": 1.246719031272763e-06, |
|
"loss": 2.2004, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02570694087403599, |
|
"grad_norm": 19.057016372680664, |
|
"learning_rate": 1.8700785469091444e-06, |
|
"loss": 2.2096, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03427592116538132, |
|
"grad_norm": 16.71215057373047, |
|
"learning_rate": 2.493438062545526e-06, |
|
"loss": 2.2218, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04284490145672665, |
|
"grad_norm": 19.787519454956055, |
|
"learning_rate": 3.1167975781819074e-06, |
|
"loss": 2.2402, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05141388174807198, |
|
"grad_norm": 13.118367195129395, |
|
"learning_rate": 3.7401570938182888e-06, |
|
"loss": 2.1593, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05998286203941731, |
|
"grad_norm": 14.878708839416504, |
|
"learning_rate": 4.363516609454671e-06, |
|
"loss": 2.1811, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06855184233076264, |
|
"grad_norm": 26.236587524414062, |
|
"learning_rate": 4.986876125091052e-06, |
|
"loss": 2.0935, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07712082262210797, |
|
"grad_norm": 14.972529411315918, |
|
"learning_rate": 5.610235640727433e-06, |
|
"loss": 2.1434, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0856898029134533, |
|
"grad_norm": 14.05119514465332, |
|
"learning_rate": 6.233595156363815e-06, |
|
"loss": 2.0917, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09425878320479864, |
|
"grad_norm": 13.932599067687988, |
|
"learning_rate": 6.856954672000195e-06, |
|
"loss": 2.0876, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.10282776349614396, |
|
"grad_norm": 11.802385330200195, |
|
"learning_rate": 7.4803141876365775e-06, |
|
"loss": 2.1551, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.11139674378748929, |
|
"grad_norm": 17.57957649230957, |
|
"learning_rate": 8.103673703272958e-06, |
|
"loss": 2.0485, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.11996572407883462, |
|
"grad_norm": 12.634247779846191, |
|
"learning_rate": 8.727033218909341e-06, |
|
"loss": 2.0586, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.12853470437017994, |
|
"grad_norm": 14.163841247558594, |
|
"learning_rate": 9.350392734545721e-06, |
|
"loss": 2.0832, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.13710368466152528, |
|
"grad_norm": 12.886134147644043, |
|
"learning_rate": 9.973752250182104e-06, |
|
"loss": 2.0785, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.1456726649528706, |
|
"grad_norm": 12.337018966674805, |
|
"learning_rate": 1.0597111765818484e-05, |
|
"loss": 2.1782, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.15424164524421594, |
|
"grad_norm": 15.206589698791504, |
|
"learning_rate": 1.1220471281454867e-05, |
|
"loss": 2.05, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.16281062553556128, |
|
"grad_norm": 15.637868881225586, |
|
"learning_rate": 1.1843830797091246e-05, |
|
"loss": 2.0865, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1713796058269066, |
|
"grad_norm": 13.318354606628418, |
|
"learning_rate": 1.246719031272763e-05, |
|
"loss": 1.9896, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.17994858611825193, |
|
"grad_norm": 15.895234107971191, |
|
"learning_rate": 1.3090549828364011e-05, |
|
"loss": 2.2041, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.18851756640959727, |
|
"grad_norm": 11.681656837463379, |
|
"learning_rate": 1.3580958892325523e-05, |
|
"loss": 2.0332, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.19708654670094258, |
|
"grad_norm": 12.1074800491333, |
|
"learning_rate": 1.353956614958756e-05, |
|
"loss": 1.9992, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.20565552699228792, |
|
"grad_norm": 16.131893157958984, |
|
"learning_rate": 1.3498173406849598e-05, |
|
"loss": 2.0392, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.21422450728363324, |
|
"grad_norm": 13.486928939819336, |
|
"learning_rate": 1.3456780664111635e-05, |
|
"loss": 2.0591, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.22279348757497858, |
|
"grad_norm": 12.962348937988281, |
|
"learning_rate": 1.3415387921373673e-05, |
|
"loss": 2.1737, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.23136246786632392, |
|
"grad_norm": 15.706825256347656, |
|
"learning_rate": 1.337399517863571e-05, |
|
"loss": 2.0788, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.23993144815766923, |
|
"grad_norm": 15.435516357421875, |
|
"learning_rate": 1.3332602435897748e-05, |
|
"loss": 2.1001, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.24850042844901457, |
|
"grad_norm": 10.667174339294434, |
|
"learning_rate": 1.3291209693159784e-05, |
|
"loss": 1.9447, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.2570694087403599, |
|
"grad_norm": 14.559552192687988, |
|
"learning_rate": 1.3249816950421821e-05, |
|
"loss": 2.0502, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2656383890317052, |
|
"grad_norm": 15.85730266571045, |
|
"learning_rate": 1.320842420768386e-05, |
|
"loss": 2.0924, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.27420736932305056, |
|
"grad_norm": 13.333989143371582, |
|
"learning_rate": 1.3167031464945898e-05, |
|
"loss": 2.1221, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.2827763496143959, |
|
"grad_norm": 13.683819770812988, |
|
"learning_rate": 1.3125638722207935e-05, |
|
"loss": 2.1562, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.2913453299057412, |
|
"grad_norm": 12.066128730773926, |
|
"learning_rate": 1.3084245979469973e-05, |
|
"loss": 2.0211, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.29991431019708653, |
|
"grad_norm": 14.573673248291016, |
|
"learning_rate": 1.3042853236732009e-05, |
|
"loss": 2.0365, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.30848329048843187, |
|
"grad_norm": 19.47612762451172, |
|
"learning_rate": 1.3001460493994046e-05, |
|
"loss": 1.9834, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.3170522707797772, |
|
"grad_norm": 13.694817543029785, |
|
"learning_rate": 1.2960067751256084e-05, |
|
"loss": 2.0116, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.32562125107112255, |
|
"grad_norm": 12.057513236999512, |
|
"learning_rate": 1.2918675008518121e-05, |
|
"loss": 2.0626, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.3341902313624679, |
|
"grad_norm": 12.8948974609375, |
|
"learning_rate": 1.2877282265780159e-05, |
|
"loss": 1.9787, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.3427592116538132, |
|
"grad_norm": 14.375242233276367, |
|
"learning_rate": 1.2835889523042196e-05, |
|
"loss": 2.0237, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3513281919451585, |
|
"grad_norm": 13.976824760437012, |
|
"learning_rate": 1.2794496780304234e-05, |
|
"loss": 2.1159, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.35989717223650386, |
|
"grad_norm": 15.234496116638184, |
|
"learning_rate": 1.2753104037566271e-05, |
|
"loss": 1.9116, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.3684661525278492, |
|
"grad_norm": 12.984347343444824, |
|
"learning_rate": 1.2711711294828309e-05, |
|
"loss": 2.0484, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.37703513281919454, |
|
"grad_norm": 10.440543174743652, |
|
"learning_rate": 1.2670318552090346e-05, |
|
"loss": 2.0348, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.3856041131105398, |
|
"grad_norm": 11.408565521240234, |
|
"learning_rate": 1.2628925809352384e-05, |
|
"loss": 2.0866, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.39417309340188517, |
|
"grad_norm": 15.430132865905762, |
|
"learning_rate": 1.2587533066614421e-05, |
|
"loss": 2.0252, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.4027420736932305, |
|
"grad_norm": 15.553166389465332, |
|
"learning_rate": 1.2546140323876457e-05, |
|
"loss": 2.0838, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.41131105398457585, |
|
"grad_norm": 14.190583229064941, |
|
"learning_rate": 1.2504747581138496e-05, |
|
"loss": 2.0674, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.4198800342759212, |
|
"grad_norm": 19.912630081176758, |
|
"learning_rate": 1.2463354838400534e-05, |
|
"loss": 2.0175, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.4284490145672665, |
|
"grad_norm": 11.050692558288574, |
|
"learning_rate": 1.2421962095662571e-05, |
|
"loss": 2.0227, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4370179948586118, |
|
"grad_norm": 13.648895263671875, |
|
"learning_rate": 1.2380569352924609e-05, |
|
"loss": 2.1153, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.44558697514995715, |
|
"grad_norm": 13.396595001220703, |
|
"learning_rate": 1.2339176610186645e-05, |
|
"loss": 2.0524, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.4541559554413025, |
|
"grad_norm": 12.05173110961914, |
|
"learning_rate": 1.2297783867448682e-05, |
|
"loss": 2.0801, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.46272493573264784, |
|
"grad_norm": 11.418408393859863, |
|
"learning_rate": 1.225639112471072e-05, |
|
"loss": 2.0219, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.4712939160239931, |
|
"grad_norm": 14.92882251739502, |
|
"learning_rate": 1.2214998381972757e-05, |
|
"loss": 1.9197, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.47986289631533846, |
|
"grad_norm": 13.67534351348877, |
|
"learning_rate": 1.2173605639234796e-05, |
|
"loss": 1.9579, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.4884318766066838, |
|
"grad_norm": 16.3277645111084, |
|
"learning_rate": 1.2132212896496834e-05, |
|
"loss": 2.0499, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.49700085689802914, |
|
"grad_norm": 12.686991691589355, |
|
"learning_rate": 1.209082015375887e-05, |
|
"loss": 1.8892, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.5055698371893744, |
|
"grad_norm": 14.13610553741455, |
|
"learning_rate": 1.2049427411020907e-05, |
|
"loss": 1.9821, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.5141388174807198, |
|
"grad_norm": 10.20384693145752, |
|
"learning_rate": 1.2008034668282945e-05, |
|
"loss": 1.8765, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5227077977720651, |
|
"grad_norm": 11.100608825683594, |
|
"learning_rate": 1.1966641925544982e-05, |
|
"loss": 1.928, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.5312767780634104, |
|
"grad_norm": 13.737257957458496, |
|
"learning_rate": 1.192524918280702e-05, |
|
"loss": 2.0266, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.5398457583547558, |
|
"grad_norm": 13.313102722167969, |
|
"learning_rate": 1.1883856440069057e-05, |
|
"loss": 1.9792, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.5484147386461011, |
|
"grad_norm": 16.294010162353516, |
|
"learning_rate": 1.1842463697331093e-05, |
|
"loss": 2.0274, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.5569837189374465, |
|
"grad_norm": 14.80037784576416, |
|
"learning_rate": 1.1801070954593132e-05, |
|
"loss": 2.0204, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.5655526992287918, |
|
"grad_norm": 16.782167434692383, |
|
"learning_rate": 1.175967821185517e-05, |
|
"loss": 1.896, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.5741216795201372, |
|
"grad_norm": 14.986900329589844, |
|
"learning_rate": 1.1718285469117207e-05, |
|
"loss": 1.9126, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.5826906598114824, |
|
"grad_norm": 15.64176082611084, |
|
"learning_rate": 1.1676892726379245e-05, |
|
"loss": 1.8983, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.5912596401028277, |
|
"grad_norm": 14.483860969543457, |
|
"learning_rate": 1.1635499983641282e-05, |
|
"loss": 2.1211, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.5998286203941731, |
|
"grad_norm": 9.888971328735352, |
|
"learning_rate": 1.1594107240903318e-05, |
|
"loss": 1.8572, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.6083976006855184, |
|
"grad_norm": 12.11154556274414, |
|
"learning_rate": 1.1552714498165355e-05, |
|
"loss": 1.866, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.6169665809768637, |
|
"grad_norm": 14.539010047912598, |
|
"learning_rate": 1.1511321755427393e-05, |
|
"loss": 1.9029, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.6255355612682091, |
|
"grad_norm": 17.459091186523438, |
|
"learning_rate": 1.1469929012689432e-05, |
|
"loss": 1.9333, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.6341045415595544, |
|
"grad_norm": 14.461770057678223, |
|
"learning_rate": 1.142853626995147e-05, |
|
"loss": 2.0981, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.6426735218508998, |
|
"grad_norm": 15.937264442443848, |
|
"learning_rate": 1.1387143527213507e-05, |
|
"loss": 1.8607, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.6512425021422451, |
|
"grad_norm": 16.7382869720459, |
|
"learning_rate": 1.1345750784475543e-05, |
|
"loss": 2.1263, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.6598114824335904, |
|
"grad_norm": 15.768759727478027, |
|
"learning_rate": 1.130435804173758e-05, |
|
"loss": 1.928, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.6683804627249358, |
|
"grad_norm": 18.80556869506836, |
|
"learning_rate": 1.1262965298999618e-05, |
|
"loss": 1.98, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.676949443016281, |
|
"grad_norm": 14.333468437194824, |
|
"learning_rate": 1.1221572556261655e-05, |
|
"loss": 1.9067, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.6855184233076264, |
|
"grad_norm": 19.20866584777832, |
|
"learning_rate": 1.1180179813523693e-05, |
|
"loss": 2.0511, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6940874035989717, |
|
"grad_norm": 18.691129684448242, |
|
"learning_rate": 1.113878707078573e-05, |
|
"loss": 1.872, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.702656383890317, |
|
"grad_norm": 14.753878593444824, |
|
"learning_rate": 1.1097394328047768e-05, |
|
"loss": 1.8993, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.7112253641816624, |
|
"grad_norm": 13.987009048461914, |
|
"learning_rate": 1.1056001585309806e-05, |
|
"loss": 1.8868, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.7197943444730077, |
|
"grad_norm": 16.61811637878418, |
|
"learning_rate": 1.1014608842571843e-05, |
|
"loss": 1.8531, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.7283633247643531, |
|
"grad_norm": 18.929941177368164, |
|
"learning_rate": 1.097321609983388e-05, |
|
"loss": 1.8008, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.7369323050556984, |
|
"grad_norm": 18.89458465576172, |
|
"learning_rate": 1.0931823357095918e-05, |
|
"loss": 1.935, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.7455012853470437, |
|
"grad_norm": 15.524568557739258, |
|
"learning_rate": 1.0890430614357956e-05, |
|
"loss": 2.0548, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.7540702656383891, |
|
"grad_norm": 17.038110733032227, |
|
"learning_rate": 1.0849037871619991e-05, |
|
"loss": 2.0176, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.7626392459297343, |
|
"grad_norm": 16.24259376525879, |
|
"learning_rate": 1.0807645128882029e-05, |
|
"loss": 2.0311, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.7712082262210797, |
|
"grad_norm": 12.702564239501953, |
|
"learning_rate": 1.0766252386144068e-05, |
|
"loss": 1.9613, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.779777206512425, |
|
"grad_norm": 13.47549057006836, |
|
"learning_rate": 1.0724859643406106e-05, |
|
"loss": 1.9554, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.7883461868037703, |
|
"grad_norm": 15.315031051635742, |
|
"learning_rate": 1.0683466900668143e-05, |
|
"loss": 1.8628, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.7969151670951157, |
|
"grad_norm": 12.436241149902344, |
|
"learning_rate": 1.0642074157930179e-05, |
|
"loss": 1.9767, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.805484147386461, |
|
"grad_norm": 17.100671768188477, |
|
"learning_rate": 1.0600681415192216e-05, |
|
"loss": 2.0268, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.8140531276778064, |
|
"grad_norm": 14.803923606872559, |
|
"learning_rate": 1.0559288672454254e-05, |
|
"loss": 2.0516, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.8226221079691517, |
|
"grad_norm": 17.308460235595703, |
|
"learning_rate": 1.0517895929716291e-05, |
|
"loss": 1.9354, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.831191088260497, |
|
"grad_norm": 38.664039611816406, |
|
"learning_rate": 1.0476503186978329e-05, |
|
"loss": 1.9623, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.8397600685518424, |
|
"grad_norm": 16.550312042236328, |
|
"learning_rate": 1.0435110444240368e-05, |
|
"loss": 1.888, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.8483290488431876, |
|
"grad_norm": 19.344846725463867, |
|
"learning_rate": 1.0393717701502404e-05, |
|
"loss": 1.9992, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.856898029134533, |
|
"grad_norm": 18.766752243041992, |
|
"learning_rate": 1.0352324958764441e-05, |
|
"loss": 1.8824, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8654670094258783, |
|
"grad_norm": 20.725662231445312, |
|
"learning_rate": 1.0310932216026479e-05, |
|
"loss": 1.7351, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.8740359897172236, |
|
"grad_norm": 15.772370338439941, |
|
"learning_rate": 1.0269539473288516e-05, |
|
"loss": 1.9839, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.882604970008569, |
|
"grad_norm": 15.904685974121094, |
|
"learning_rate": 1.0228146730550554e-05, |
|
"loss": 2.1096, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.8911739502999143, |
|
"grad_norm": 19.53571891784668, |
|
"learning_rate": 1.0186753987812591e-05, |
|
"loss": 1.9122, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.8997429305912596, |
|
"grad_norm": 14.403525352478027, |
|
"learning_rate": 1.0145361245074627e-05, |
|
"loss": 1.8495, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.908311910882605, |
|
"grad_norm": 20.06512451171875, |
|
"learning_rate": 1.0103968502336666e-05, |
|
"loss": 1.9467, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.9168808911739503, |
|
"grad_norm": 15.332779884338379, |
|
"learning_rate": 1.0062575759598704e-05, |
|
"loss": 1.7918, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.9254498714652957, |
|
"grad_norm": 18.091779708862305, |
|
"learning_rate": 1.0021183016860741e-05, |
|
"loss": 1.8323, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.934018851756641, |
|
"grad_norm": 17.92203712463379, |
|
"learning_rate": 9.979790274122779e-06, |
|
"loss": 1.9448, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.9425878320479862, |
|
"grad_norm": 11.862146377563477, |
|
"learning_rate": 9.938397531384816e-06, |
|
"loss": 1.9453, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.9511568123393316, |
|
"grad_norm": 16.67616844177246, |
|
"learning_rate": 9.897004788646852e-06, |
|
"loss": 1.9977, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.9597257926306769, |
|
"grad_norm": 17.18949317932129, |
|
"learning_rate": 9.85561204590889e-06, |
|
"loss": 1.8888, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.9682947729220223, |
|
"grad_norm": 19.521203994750977, |
|
"learning_rate": 9.814219303170927e-06, |
|
"loss": 1.948, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.9768637532133676, |
|
"grad_norm": 21.371353149414062, |
|
"learning_rate": 9.772826560432965e-06, |
|
"loss": 1.9285, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.9854327335047129, |
|
"grad_norm": 18.078819274902344, |
|
"learning_rate": 9.731433817695004e-06, |
|
"loss": 1.9144, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.9940017137960583, |
|
"grad_norm": 26.718231201171875, |
|
"learning_rate": 9.690041074957041e-06, |
|
"loss": 1.8113, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_classification_report": { |
|
"accuracy": 0.3035, |
|
"ar": { |
|
"f1-score": 0.23214285714285715, |
|
"precision": 0.3, |
|
"recall": 0.18932038834951456, |
|
"support": 206.0 |
|
}, |
|
"cl": { |
|
"f1-score": 0.2225609756097561, |
|
"precision": 0.1994535519125683, |
|
"recall": 0.2517241379310345, |
|
"support": 290.0 |
|
}, |
|
"co": { |
|
"f1-score": 0.3453038674033149, |
|
"precision": 0.28868360277136257, |
|
"recall": 0.42955326460481097, |
|
"support": 291.0 |
|
}, |
|
"es": { |
|
"f1-score": 0.3566666666666667, |
|
"precision": 0.3333333333333333, |
|
"recall": 0.3835125448028674, |
|
"support": 279.0 |
|
}, |
|
"macro avg": { |
|
"f1-score": 0.28981047871583737, |
|
"precision": 0.31679889037420644, |
|
"recall": 0.28794417545744694, |
|
"support": 2000.0 |
|
}, |
|
"mx": { |
|
"f1-score": 0.2676767676767677, |
|
"precision": 0.5047619047619047, |
|
"recall": 0.18213058419243985, |
|
"support": 291.0 |
|
}, |
|
"pe": { |
|
"f1-score": 0.27666666666666667, |
|
"precision": 0.2686084142394822, |
|
"recall": 0.2852233676975945, |
|
"support": 291.0 |
|
}, |
|
"pr": { |
|
"f1-score": 0.6162162162162163, |
|
"precision": 0.6785714285714286, |
|
"recall": 0.5643564356435643, |
|
"support": 101.0 |
|
}, |
|
"uy": { |
|
"f1-score": 0.2910602910602911, |
|
"precision": 0.2777777777777778, |
|
"recall": 0.3056768558951965, |
|
"support": 229.0 |
|
}, |
|
"ve": { |
|
"f1-score": 0.0, |
|
"precision": 0.0, |
|
"recall": 0.0, |
|
"support": 22.0 |
|
}, |
|
"weighted avg": { |
|
"f1-score": 0.2998260603986032, |
|
"precision": 0.3269230233436701, |
|
"recall": 0.3035, |
|
"support": 2000.0 |
|
} |
|
}, |
|
"eval_f1": 0.28981047871583737, |
|
"eval_loss": 1.8416967391967773, |
|
"eval_runtime": 5.6259, |
|
"eval_samples_per_second": 355.502, |
|
"eval_steps_per_second": 88.875, |
|
"step": 1167 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3501, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 153469167996672.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|