mariagrandury's picture
End of training
1c7d2cb verified
{
"best_metric": 0.28981047871583737,
"best_model_checkpoint": "/content/drive/MyDrive/model_outputs/trial_4/checkpoint-1167",
"epoch": 1.0,
"eval_steps": 500,
"global_step": 1167,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00856898029134533,
"grad_norm": 12.96240520477295,
"learning_rate": 6.233595156363815e-07,
"loss": 2.1941,
"step": 10
},
{
"epoch": 0.01713796058269066,
"grad_norm": 15.678441047668457,
"learning_rate": 1.246719031272763e-06,
"loss": 2.2004,
"step": 20
},
{
"epoch": 0.02570694087403599,
"grad_norm": 19.057016372680664,
"learning_rate": 1.8700785469091444e-06,
"loss": 2.2096,
"step": 30
},
{
"epoch": 0.03427592116538132,
"grad_norm": 16.71215057373047,
"learning_rate": 2.493438062545526e-06,
"loss": 2.2218,
"step": 40
},
{
"epoch": 0.04284490145672665,
"grad_norm": 19.787519454956055,
"learning_rate": 3.1167975781819074e-06,
"loss": 2.2402,
"step": 50
},
{
"epoch": 0.05141388174807198,
"grad_norm": 13.118367195129395,
"learning_rate": 3.7401570938182888e-06,
"loss": 2.1593,
"step": 60
},
{
"epoch": 0.05998286203941731,
"grad_norm": 14.878708839416504,
"learning_rate": 4.363516609454671e-06,
"loss": 2.1811,
"step": 70
},
{
"epoch": 0.06855184233076264,
"grad_norm": 26.236587524414062,
"learning_rate": 4.986876125091052e-06,
"loss": 2.0935,
"step": 80
},
{
"epoch": 0.07712082262210797,
"grad_norm": 14.972529411315918,
"learning_rate": 5.610235640727433e-06,
"loss": 2.1434,
"step": 90
},
{
"epoch": 0.0856898029134533,
"grad_norm": 14.05119514465332,
"learning_rate": 6.233595156363815e-06,
"loss": 2.0917,
"step": 100
},
{
"epoch": 0.09425878320479864,
"grad_norm": 13.932599067687988,
"learning_rate": 6.856954672000195e-06,
"loss": 2.0876,
"step": 110
},
{
"epoch": 0.10282776349614396,
"grad_norm": 11.802385330200195,
"learning_rate": 7.4803141876365775e-06,
"loss": 2.1551,
"step": 120
},
{
"epoch": 0.11139674378748929,
"grad_norm": 17.57957649230957,
"learning_rate": 8.103673703272958e-06,
"loss": 2.0485,
"step": 130
},
{
"epoch": 0.11996572407883462,
"grad_norm": 12.634247779846191,
"learning_rate": 8.727033218909341e-06,
"loss": 2.0586,
"step": 140
},
{
"epoch": 0.12853470437017994,
"grad_norm": 14.163841247558594,
"learning_rate": 9.350392734545721e-06,
"loss": 2.0832,
"step": 150
},
{
"epoch": 0.13710368466152528,
"grad_norm": 12.886134147644043,
"learning_rate": 9.973752250182104e-06,
"loss": 2.0785,
"step": 160
},
{
"epoch": 0.1456726649528706,
"grad_norm": 12.337018966674805,
"learning_rate": 1.0597111765818484e-05,
"loss": 2.1782,
"step": 170
},
{
"epoch": 0.15424164524421594,
"grad_norm": 15.206589698791504,
"learning_rate": 1.1220471281454867e-05,
"loss": 2.05,
"step": 180
},
{
"epoch": 0.16281062553556128,
"grad_norm": 15.637868881225586,
"learning_rate": 1.1843830797091246e-05,
"loss": 2.0865,
"step": 190
},
{
"epoch": 0.1713796058269066,
"grad_norm": 13.318354606628418,
"learning_rate": 1.246719031272763e-05,
"loss": 1.9896,
"step": 200
},
{
"epoch": 0.17994858611825193,
"grad_norm": 15.895234107971191,
"learning_rate": 1.3090549828364011e-05,
"loss": 2.2041,
"step": 210
},
{
"epoch": 0.18851756640959727,
"grad_norm": 11.681656837463379,
"learning_rate": 1.3580958892325523e-05,
"loss": 2.0332,
"step": 220
},
{
"epoch": 0.19708654670094258,
"grad_norm": 12.1074800491333,
"learning_rate": 1.353956614958756e-05,
"loss": 1.9992,
"step": 230
},
{
"epoch": 0.20565552699228792,
"grad_norm": 16.131893157958984,
"learning_rate": 1.3498173406849598e-05,
"loss": 2.0392,
"step": 240
},
{
"epoch": 0.21422450728363324,
"grad_norm": 13.486928939819336,
"learning_rate": 1.3456780664111635e-05,
"loss": 2.0591,
"step": 250
},
{
"epoch": 0.22279348757497858,
"grad_norm": 12.962348937988281,
"learning_rate": 1.3415387921373673e-05,
"loss": 2.1737,
"step": 260
},
{
"epoch": 0.23136246786632392,
"grad_norm": 15.706825256347656,
"learning_rate": 1.337399517863571e-05,
"loss": 2.0788,
"step": 270
},
{
"epoch": 0.23993144815766923,
"grad_norm": 15.435516357421875,
"learning_rate": 1.3332602435897748e-05,
"loss": 2.1001,
"step": 280
},
{
"epoch": 0.24850042844901457,
"grad_norm": 10.667174339294434,
"learning_rate": 1.3291209693159784e-05,
"loss": 1.9447,
"step": 290
},
{
"epoch": 0.2570694087403599,
"grad_norm": 14.559552192687988,
"learning_rate": 1.3249816950421821e-05,
"loss": 2.0502,
"step": 300
},
{
"epoch": 0.2656383890317052,
"grad_norm": 15.85730266571045,
"learning_rate": 1.320842420768386e-05,
"loss": 2.0924,
"step": 310
},
{
"epoch": 0.27420736932305056,
"grad_norm": 13.333989143371582,
"learning_rate": 1.3167031464945898e-05,
"loss": 2.1221,
"step": 320
},
{
"epoch": 0.2827763496143959,
"grad_norm": 13.683819770812988,
"learning_rate": 1.3125638722207935e-05,
"loss": 2.1562,
"step": 330
},
{
"epoch": 0.2913453299057412,
"grad_norm": 12.066128730773926,
"learning_rate": 1.3084245979469973e-05,
"loss": 2.0211,
"step": 340
},
{
"epoch": 0.29991431019708653,
"grad_norm": 14.573673248291016,
"learning_rate": 1.3042853236732009e-05,
"loss": 2.0365,
"step": 350
},
{
"epoch": 0.30848329048843187,
"grad_norm": 19.47612762451172,
"learning_rate": 1.3001460493994046e-05,
"loss": 1.9834,
"step": 360
},
{
"epoch": 0.3170522707797772,
"grad_norm": 13.694817543029785,
"learning_rate": 1.2960067751256084e-05,
"loss": 2.0116,
"step": 370
},
{
"epoch": 0.32562125107112255,
"grad_norm": 12.057513236999512,
"learning_rate": 1.2918675008518121e-05,
"loss": 2.0626,
"step": 380
},
{
"epoch": 0.3341902313624679,
"grad_norm": 12.8948974609375,
"learning_rate": 1.2877282265780159e-05,
"loss": 1.9787,
"step": 390
},
{
"epoch": 0.3427592116538132,
"grad_norm": 14.375242233276367,
"learning_rate": 1.2835889523042196e-05,
"loss": 2.0237,
"step": 400
},
{
"epoch": 0.3513281919451585,
"grad_norm": 13.976824760437012,
"learning_rate": 1.2794496780304234e-05,
"loss": 2.1159,
"step": 410
},
{
"epoch": 0.35989717223650386,
"grad_norm": 15.234496116638184,
"learning_rate": 1.2753104037566271e-05,
"loss": 1.9116,
"step": 420
},
{
"epoch": 0.3684661525278492,
"grad_norm": 12.984347343444824,
"learning_rate": 1.2711711294828309e-05,
"loss": 2.0484,
"step": 430
},
{
"epoch": 0.37703513281919454,
"grad_norm": 10.440543174743652,
"learning_rate": 1.2670318552090346e-05,
"loss": 2.0348,
"step": 440
},
{
"epoch": 0.3856041131105398,
"grad_norm": 11.408565521240234,
"learning_rate": 1.2628925809352384e-05,
"loss": 2.0866,
"step": 450
},
{
"epoch": 0.39417309340188517,
"grad_norm": 15.430132865905762,
"learning_rate": 1.2587533066614421e-05,
"loss": 2.0252,
"step": 460
},
{
"epoch": 0.4027420736932305,
"grad_norm": 15.553166389465332,
"learning_rate": 1.2546140323876457e-05,
"loss": 2.0838,
"step": 470
},
{
"epoch": 0.41131105398457585,
"grad_norm": 14.190583229064941,
"learning_rate": 1.2504747581138496e-05,
"loss": 2.0674,
"step": 480
},
{
"epoch": 0.4198800342759212,
"grad_norm": 19.912630081176758,
"learning_rate": 1.2463354838400534e-05,
"loss": 2.0175,
"step": 490
},
{
"epoch": 0.4284490145672665,
"grad_norm": 11.050692558288574,
"learning_rate": 1.2421962095662571e-05,
"loss": 2.0227,
"step": 500
},
{
"epoch": 0.4370179948586118,
"grad_norm": 13.648895263671875,
"learning_rate": 1.2380569352924609e-05,
"loss": 2.1153,
"step": 510
},
{
"epoch": 0.44558697514995715,
"grad_norm": 13.396595001220703,
"learning_rate": 1.2339176610186645e-05,
"loss": 2.0524,
"step": 520
},
{
"epoch": 0.4541559554413025,
"grad_norm": 12.05173110961914,
"learning_rate": 1.2297783867448682e-05,
"loss": 2.0801,
"step": 530
},
{
"epoch": 0.46272493573264784,
"grad_norm": 11.418408393859863,
"learning_rate": 1.225639112471072e-05,
"loss": 2.0219,
"step": 540
},
{
"epoch": 0.4712939160239931,
"grad_norm": 14.92882251739502,
"learning_rate": 1.2214998381972757e-05,
"loss": 1.9197,
"step": 550
},
{
"epoch": 0.47986289631533846,
"grad_norm": 13.67534351348877,
"learning_rate": 1.2173605639234796e-05,
"loss": 1.9579,
"step": 560
},
{
"epoch": 0.4884318766066838,
"grad_norm": 16.3277645111084,
"learning_rate": 1.2132212896496834e-05,
"loss": 2.0499,
"step": 570
},
{
"epoch": 0.49700085689802914,
"grad_norm": 12.686991691589355,
"learning_rate": 1.209082015375887e-05,
"loss": 1.8892,
"step": 580
},
{
"epoch": 0.5055698371893744,
"grad_norm": 14.13610553741455,
"learning_rate": 1.2049427411020907e-05,
"loss": 1.9821,
"step": 590
},
{
"epoch": 0.5141388174807198,
"grad_norm": 10.20384693145752,
"learning_rate": 1.2008034668282945e-05,
"loss": 1.8765,
"step": 600
},
{
"epoch": 0.5227077977720651,
"grad_norm": 11.100608825683594,
"learning_rate": 1.1966641925544982e-05,
"loss": 1.928,
"step": 610
},
{
"epoch": 0.5312767780634104,
"grad_norm": 13.737257957458496,
"learning_rate": 1.192524918280702e-05,
"loss": 2.0266,
"step": 620
},
{
"epoch": 0.5398457583547558,
"grad_norm": 13.313102722167969,
"learning_rate": 1.1883856440069057e-05,
"loss": 1.9792,
"step": 630
},
{
"epoch": 0.5484147386461011,
"grad_norm": 16.294010162353516,
"learning_rate": 1.1842463697331093e-05,
"loss": 2.0274,
"step": 640
},
{
"epoch": 0.5569837189374465,
"grad_norm": 14.80037784576416,
"learning_rate": 1.1801070954593132e-05,
"loss": 2.0204,
"step": 650
},
{
"epoch": 0.5655526992287918,
"grad_norm": 16.782167434692383,
"learning_rate": 1.175967821185517e-05,
"loss": 1.896,
"step": 660
},
{
"epoch": 0.5741216795201372,
"grad_norm": 14.986900329589844,
"learning_rate": 1.1718285469117207e-05,
"loss": 1.9126,
"step": 670
},
{
"epoch": 0.5826906598114824,
"grad_norm": 15.64176082611084,
"learning_rate": 1.1676892726379245e-05,
"loss": 1.8983,
"step": 680
},
{
"epoch": 0.5912596401028277,
"grad_norm": 14.483860969543457,
"learning_rate": 1.1635499983641282e-05,
"loss": 2.1211,
"step": 690
},
{
"epoch": 0.5998286203941731,
"grad_norm": 9.888971328735352,
"learning_rate": 1.1594107240903318e-05,
"loss": 1.8572,
"step": 700
},
{
"epoch": 0.6083976006855184,
"grad_norm": 12.11154556274414,
"learning_rate": 1.1552714498165355e-05,
"loss": 1.866,
"step": 710
},
{
"epoch": 0.6169665809768637,
"grad_norm": 14.539010047912598,
"learning_rate": 1.1511321755427393e-05,
"loss": 1.9029,
"step": 720
},
{
"epoch": 0.6255355612682091,
"grad_norm": 17.459091186523438,
"learning_rate": 1.1469929012689432e-05,
"loss": 1.9333,
"step": 730
},
{
"epoch": 0.6341045415595544,
"grad_norm": 14.461770057678223,
"learning_rate": 1.142853626995147e-05,
"loss": 2.0981,
"step": 740
},
{
"epoch": 0.6426735218508998,
"grad_norm": 15.937264442443848,
"learning_rate": 1.1387143527213507e-05,
"loss": 1.8607,
"step": 750
},
{
"epoch": 0.6512425021422451,
"grad_norm": 16.7382869720459,
"learning_rate": 1.1345750784475543e-05,
"loss": 2.1263,
"step": 760
},
{
"epoch": 0.6598114824335904,
"grad_norm": 15.768759727478027,
"learning_rate": 1.130435804173758e-05,
"loss": 1.928,
"step": 770
},
{
"epoch": 0.6683804627249358,
"grad_norm": 18.80556869506836,
"learning_rate": 1.1262965298999618e-05,
"loss": 1.98,
"step": 780
},
{
"epoch": 0.676949443016281,
"grad_norm": 14.333468437194824,
"learning_rate": 1.1221572556261655e-05,
"loss": 1.9067,
"step": 790
},
{
"epoch": 0.6855184233076264,
"grad_norm": 19.20866584777832,
"learning_rate": 1.1180179813523693e-05,
"loss": 2.0511,
"step": 800
},
{
"epoch": 0.6940874035989717,
"grad_norm": 18.691129684448242,
"learning_rate": 1.113878707078573e-05,
"loss": 1.872,
"step": 810
},
{
"epoch": 0.702656383890317,
"grad_norm": 14.753878593444824,
"learning_rate": 1.1097394328047768e-05,
"loss": 1.8993,
"step": 820
},
{
"epoch": 0.7112253641816624,
"grad_norm": 13.987009048461914,
"learning_rate": 1.1056001585309806e-05,
"loss": 1.8868,
"step": 830
},
{
"epoch": 0.7197943444730077,
"grad_norm": 16.61811637878418,
"learning_rate": 1.1014608842571843e-05,
"loss": 1.8531,
"step": 840
},
{
"epoch": 0.7283633247643531,
"grad_norm": 18.929941177368164,
"learning_rate": 1.097321609983388e-05,
"loss": 1.8008,
"step": 850
},
{
"epoch": 0.7369323050556984,
"grad_norm": 18.89458465576172,
"learning_rate": 1.0931823357095918e-05,
"loss": 1.935,
"step": 860
},
{
"epoch": 0.7455012853470437,
"grad_norm": 15.524568557739258,
"learning_rate": 1.0890430614357956e-05,
"loss": 2.0548,
"step": 870
},
{
"epoch": 0.7540702656383891,
"grad_norm": 17.038110733032227,
"learning_rate": 1.0849037871619991e-05,
"loss": 2.0176,
"step": 880
},
{
"epoch": 0.7626392459297343,
"grad_norm": 16.24259376525879,
"learning_rate": 1.0807645128882029e-05,
"loss": 2.0311,
"step": 890
},
{
"epoch": 0.7712082262210797,
"grad_norm": 12.702564239501953,
"learning_rate": 1.0766252386144068e-05,
"loss": 1.9613,
"step": 900
},
{
"epoch": 0.779777206512425,
"grad_norm": 13.47549057006836,
"learning_rate": 1.0724859643406106e-05,
"loss": 1.9554,
"step": 910
},
{
"epoch": 0.7883461868037703,
"grad_norm": 15.315031051635742,
"learning_rate": 1.0683466900668143e-05,
"loss": 1.8628,
"step": 920
},
{
"epoch": 0.7969151670951157,
"grad_norm": 12.436241149902344,
"learning_rate": 1.0642074157930179e-05,
"loss": 1.9767,
"step": 930
},
{
"epoch": 0.805484147386461,
"grad_norm": 17.100671768188477,
"learning_rate": 1.0600681415192216e-05,
"loss": 2.0268,
"step": 940
},
{
"epoch": 0.8140531276778064,
"grad_norm": 14.803923606872559,
"learning_rate": 1.0559288672454254e-05,
"loss": 2.0516,
"step": 950
},
{
"epoch": 0.8226221079691517,
"grad_norm": 17.308460235595703,
"learning_rate": 1.0517895929716291e-05,
"loss": 1.9354,
"step": 960
},
{
"epoch": 0.831191088260497,
"grad_norm": 38.664039611816406,
"learning_rate": 1.0476503186978329e-05,
"loss": 1.9623,
"step": 970
},
{
"epoch": 0.8397600685518424,
"grad_norm": 16.550312042236328,
"learning_rate": 1.0435110444240368e-05,
"loss": 1.888,
"step": 980
},
{
"epoch": 0.8483290488431876,
"grad_norm": 19.344846725463867,
"learning_rate": 1.0393717701502404e-05,
"loss": 1.9992,
"step": 990
},
{
"epoch": 0.856898029134533,
"grad_norm": 18.766752243041992,
"learning_rate": 1.0352324958764441e-05,
"loss": 1.8824,
"step": 1000
},
{
"epoch": 0.8654670094258783,
"grad_norm": 20.725662231445312,
"learning_rate": 1.0310932216026479e-05,
"loss": 1.7351,
"step": 1010
},
{
"epoch": 0.8740359897172236,
"grad_norm": 15.772370338439941,
"learning_rate": 1.0269539473288516e-05,
"loss": 1.9839,
"step": 1020
},
{
"epoch": 0.882604970008569,
"grad_norm": 15.904685974121094,
"learning_rate": 1.0228146730550554e-05,
"loss": 2.1096,
"step": 1030
},
{
"epoch": 0.8911739502999143,
"grad_norm": 19.53571891784668,
"learning_rate": 1.0186753987812591e-05,
"loss": 1.9122,
"step": 1040
},
{
"epoch": 0.8997429305912596,
"grad_norm": 14.403525352478027,
"learning_rate": 1.0145361245074627e-05,
"loss": 1.8495,
"step": 1050
},
{
"epoch": 0.908311910882605,
"grad_norm": 20.06512451171875,
"learning_rate": 1.0103968502336666e-05,
"loss": 1.9467,
"step": 1060
},
{
"epoch": 0.9168808911739503,
"grad_norm": 15.332779884338379,
"learning_rate": 1.0062575759598704e-05,
"loss": 1.7918,
"step": 1070
},
{
"epoch": 0.9254498714652957,
"grad_norm": 18.091779708862305,
"learning_rate": 1.0021183016860741e-05,
"loss": 1.8323,
"step": 1080
},
{
"epoch": 0.934018851756641,
"grad_norm": 17.92203712463379,
"learning_rate": 9.979790274122779e-06,
"loss": 1.9448,
"step": 1090
},
{
"epoch": 0.9425878320479862,
"grad_norm": 11.862146377563477,
"learning_rate": 9.938397531384816e-06,
"loss": 1.9453,
"step": 1100
},
{
"epoch": 0.9511568123393316,
"grad_norm": 16.67616844177246,
"learning_rate": 9.897004788646852e-06,
"loss": 1.9977,
"step": 1110
},
{
"epoch": 0.9597257926306769,
"grad_norm": 17.18949317932129,
"learning_rate": 9.85561204590889e-06,
"loss": 1.8888,
"step": 1120
},
{
"epoch": 0.9682947729220223,
"grad_norm": 19.521203994750977,
"learning_rate": 9.814219303170927e-06,
"loss": 1.948,
"step": 1130
},
{
"epoch": 0.9768637532133676,
"grad_norm": 21.371353149414062,
"learning_rate": 9.772826560432965e-06,
"loss": 1.9285,
"step": 1140
},
{
"epoch": 0.9854327335047129,
"grad_norm": 18.078819274902344,
"learning_rate": 9.731433817695004e-06,
"loss": 1.9144,
"step": 1150
},
{
"epoch": 0.9940017137960583,
"grad_norm": 26.718231201171875,
"learning_rate": 9.690041074957041e-06,
"loss": 1.8113,
"step": 1160
},
{
"epoch": 1.0,
"eval_classification_report": {
"accuracy": 0.3035,
"ar": {
"f1-score": 0.23214285714285715,
"precision": 0.3,
"recall": 0.18932038834951456,
"support": 206.0
},
"cl": {
"f1-score": 0.2225609756097561,
"precision": 0.1994535519125683,
"recall": 0.2517241379310345,
"support": 290.0
},
"co": {
"f1-score": 0.3453038674033149,
"precision": 0.28868360277136257,
"recall": 0.42955326460481097,
"support": 291.0
},
"es": {
"f1-score": 0.3566666666666667,
"precision": 0.3333333333333333,
"recall": 0.3835125448028674,
"support": 279.0
},
"macro avg": {
"f1-score": 0.28981047871583737,
"precision": 0.31679889037420644,
"recall": 0.28794417545744694,
"support": 2000.0
},
"mx": {
"f1-score": 0.2676767676767677,
"precision": 0.5047619047619047,
"recall": 0.18213058419243985,
"support": 291.0
},
"pe": {
"f1-score": 0.27666666666666667,
"precision": 0.2686084142394822,
"recall": 0.2852233676975945,
"support": 291.0
},
"pr": {
"f1-score": 0.6162162162162163,
"precision": 0.6785714285714286,
"recall": 0.5643564356435643,
"support": 101.0
},
"uy": {
"f1-score": 0.2910602910602911,
"precision": 0.2777777777777778,
"recall": 0.3056768558951965,
"support": 229.0
},
"ve": {
"f1-score": 0.0,
"precision": 0.0,
"recall": 0.0,
"support": 22.0
},
"weighted avg": {
"f1-score": 0.2998260603986032,
"precision": 0.3269230233436701,
"recall": 0.3035,
"support": 2000.0
}
},
"eval_f1": 0.28981047871583737,
"eval_loss": 1.8416967391967773,
"eval_runtime": 5.6259,
"eval_samples_per_second": 355.502,
"eval_steps_per_second": 88.875,
"step": 1167
}
],
"logging_steps": 10,
"max_steps": 3501,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 153469167996672.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}