mariagrandury's picture
End of training
1c7d2cb verified
{
"best_metric": 0.3755741665997943,
"best_model_checkpoint": "/content/drive/MyDrive/model_outputs/trial_9/checkpoint-1764",
"epoch": 2.0,
"eval_steps": 500,
"global_step": 1764,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.011337868480725623,
"grad_norm": 9.890141487121582,
"learning_rate": 9.346480722758123e-07,
"loss": 2.2337,
"step": 10
},
{
"epoch": 0.022675736961451247,
"grad_norm": 10.97945785522461,
"learning_rate": 1.8692961445516245e-06,
"loss": 2.2359,
"step": 20
},
{
"epoch": 0.034013605442176874,
"grad_norm": 10.244830131530762,
"learning_rate": 2.8039442168274367e-06,
"loss": 2.2034,
"step": 30
},
{
"epoch": 0.045351473922902494,
"grad_norm": 11.170487403869629,
"learning_rate": 3.738592289103249e-06,
"loss": 2.2238,
"step": 40
},
{
"epoch": 0.05668934240362812,
"grad_norm": 8.994032859802246,
"learning_rate": 4.673240361379061e-06,
"loss": 2.143,
"step": 50
},
{
"epoch": 0.06802721088435375,
"grad_norm": 11.781458854675293,
"learning_rate": 5.607888433654873e-06,
"loss": 2.1299,
"step": 60
},
{
"epoch": 0.07936507936507936,
"grad_norm": 10.54732894897461,
"learning_rate": 6.542536505930685e-06,
"loss": 2.102,
"step": 70
},
{
"epoch": 0.09070294784580499,
"grad_norm": 8.15542221069336,
"learning_rate": 7.477184578206498e-06,
"loss": 2.0436,
"step": 80
},
{
"epoch": 0.10204081632653061,
"grad_norm": 12.086600303649902,
"learning_rate": 8.41183265048231e-06,
"loss": 2.0417,
"step": 90
},
{
"epoch": 0.11337868480725624,
"grad_norm": 10.285418510437012,
"learning_rate": 9.346480722758123e-06,
"loss": 2.0767,
"step": 100
},
{
"epoch": 0.12471655328798185,
"grad_norm": 10.342191696166992,
"learning_rate": 1.0281128795033934e-05,
"loss": 2.0401,
"step": 110
},
{
"epoch": 0.1360544217687075,
"grad_norm": 9.20302677154541,
"learning_rate": 1.1215776867309747e-05,
"loss": 1.99,
"step": 120
},
{
"epoch": 0.1473922902494331,
"grad_norm": 9.420401573181152,
"learning_rate": 1.215042493958556e-05,
"loss": 1.9867,
"step": 130
},
{
"epoch": 0.15873015873015872,
"grad_norm": 7.75075101852417,
"learning_rate": 1.308507301186137e-05,
"loss": 1.9988,
"step": 140
},
{
"epoch": 0.17006802721088435,
"grad_norm": 8.807808876037598,
"learning_rate": 1.4019721084137183e-05,
"loss": 1.9886,
"step": 150
},
{
"epoch": 0.18140589569160998,
"grad_norm": 10.291740417480469,
"learning_rate": 1.4954369156412996e-05,
"loss": 1.9393,
"step": 160
},
{
"epoch": 0.1927437641723356,
"grad_norm": 8.806387901306152,
"learning_rate": 1.588901722868881e-05,
"loss": 1.9602,
"step": 170
},
{
"epoch": 0.20408163265306123,
"grad_norm": 7.684463977813721,
"learning_rate": 1.682366530096462e-05,
"loss": 2.0044,
"step": 180
},
{
"epoch": 0.21541950113378686,
"grad_norm": 8.645553588867188,
"learning_rate": 1.7758313373240435e-05,
"loss": 2.0519,
"step": 190
},
{
"epoch": 0.22675736961451248,
"grad_norm": 9.162885665893555,
"learning_rate": 1.8692961445516246e-05,
"loss": 2.0516,
"step": 200
},
{
"epoch": 0.23809523809523808,
"grad_norm": 7.536770343780518,
"learning_rate": 1.9627609517792057e-05,
"loss": 2.0299,
"step": 210
},
{
"epoch": 0.2494331065759637,
"grad_norm": 8.47108268737793,
"learning_rate": 2.0562257590067868e-05,
"loss": 2.0816,
"step": 220
},
{
"epoch": 0.26077097505668934,
"grad_norm": 6.5353498458862305,
"learning_rate": 2.1496905662343682e-05,
"loss": 2.0053,
"step": 230
},
{
"epoch": 0.272108843537415,
"grad_norm": 7.390655994415283,
"learning_rate": 2.2431553734619493e-05,
"loss": 1.956,
"step": 240
},
{
"epoch": 0.2834467120181406,
"grad_norm": 11.590116500854492,
"learning_rate": 2.3366201806895304e-05,
"loss": 1.9745,
"step": 250
},
{
"epoch": 0.2947845804988662,
"grad_norm": 7.277239799499512,
"learning_rate": 2.430084987917112e-05,
"loss": 2.0846,
"step": 260
},
{
"epoch": 0.30612244897959184,
"grad_norm": 7.210775375366211,
"learning_rate": 2.523549795144693e-05,
"loss": 1.9379,
"step": 270
},
{
"epoch": 0.31746031746031744,
"grad_norm": 9.101860046386719,
"learning_rate": 2.617014602372274e-05,
"loss": 1.9774,
"step": 280
},
{
"epoch": 0.3287981859410431,
"grad_norm": 8.541576385498047,
"learning_rate": 2.7104794095998556e-05,
"loss": 1.9359,
"step": 290
},
{
"epoch": 0.3401360544217687,
"grad_norm": 9.097405433654785,
"learning_rate": 2.8039442168274367e-05,
"loss": 1.9943,
"step": 300
},
{
"epoch": 0.35147392290249435,
"grad_norm": 10.134627342224121,
"learning_rate": 2.8551341727859275e-05,
"loss": 1.9935,
"step": 310
},
{
"epoch": 0.36281179138321995,
"grad_norm": 9.275653839111328,
"learning_rate": 2.842911851840782e-05,
"loss": 1.9708,
"step": 320
},
{
"epoch": 0.3741496598639456,
"grad_norm": 10.57584285736084,
"learning_rate": 2.830689530895637e-05,
"loss": 1.8931,
"step": 330
},
{
"epoch": 0.3854875283446712,
"grad_norm": 8.968500137329102,
"learning_rate": 2.818467209950492e-05,
"loss": 2.0358,
"step": 340
},
{
"epoch": 0.3968253968253968,
"grad_norm": 7.630152702331543,
"learning_rate": 2.806244889005346e-05,
"loss": 1.9313,
"step": 350
},
{
"epoch": 0.40816326530612246,
"grad_norm": 10.709409713745117,
"learning_rate": 2.794022568060201e-05,
"loss": 1.8677,
"step": 360
},
{
"epoch": 0.41950113378684806,
"grad_norm": 8.903763771057129,
"learning_rate": 2.781800247115056e-05,
"loss": 1.9309,
"step": 370
},
{
"epoch": 0.4308390022675737,
"grad_norm": 11.22157096862793,
"learning_rate": 2.769577926169911e-05,
"loss": 1.9434,
"step": 380
},
{
"epoch": 0.4421768707482993,
"grad_norm": 13.553994178771973,
"learning_rate": 2.7573556052247655e-05,
"loss": 1.9612,
"step": 390
},
{
"epoch": 0.45351473922902497,
"grad_norm": 10.572993278503418,
"learning_rate": 2.7451332842796204e-05,
"loss": 1.9312,
"step": 400
},
{
"epoch": 0.46485260770975056,
"grad_norm": 10.051871299743652,
"learning_rate": 2.7329109633344753e-05,
"loss": 2.0256,
"step": 410
},
{
"epoch": 0.47619047619047616,
"grad_norm": 8.120019912719727,
"learning_rate": 2.7206886423893295e-05,
"loss": 1.8118,
"step": 420
},
{
"epoch": 0.4875283446712018,
"grad_norm": 8.908893585205078,
"learning_rate": 2.7084663214441844e-05,
"loss": 1.9479,
"step": 430
},
{
"epoch": 0.4988662131519274,
"grad_norm": 9.484359741210938,
"learning_rate": 2.6962440004990394e-05,
"loss": 1.9766,
"step": 440
},
{
"epoch": 0.5102040816326531,
"grad_norm": 9.914438247680664,
"learning_rate": 2.684021679553894e-05,
"loss": 1.9492,
"step": 450
},
{
"epoch": 0.5215419501133787,
"grad_norm": 10.76697063446045,
"learning_rate": 2.671799358608749e-05,
"loss": 1.9384,
"step": 460
},
{
"epoch": 0.5328798185941043,
"grad_norm": 11.358717918395996,
"learning_rate": 2.6595770376636038e-05,
"loss": 1.8805,
"step": 470
},
{
"epoch": 0.54421768707483,
"grad_norm": 10.204866409301758,
"learning_rate": 2.6473547167184583e-05,
"loss": 1.7569,
"step": 480
},
{
"epoch": 0.5555555555555556,
"grad_norm": 9.388846397399902,
"learning_rate": 2.635132395773313e-05,
"loss": 1.9267,
"step": 490
},
{
"epoch": 0.5668934240362812,
"grad_norm": 10.731882095336914,
"learning_rate": 2.6229100748281678e-05,
"loss": 1.9189,
"step": 500
},
{
"epoch": 0.5782312925170068,
"grad_norm": 17.54758644104004,
"learning_rate": 2.6106877538830227e-05,
"loss": 1.9435,
"step": 510
},
{
"epoch": 0.5895691609977324,
"grad_norm": 16.29359245300293,
"learning_rate": 2.5984654329378773e-05,
"loss": 1.9415,
"step": 520
},
{
"epoch": 0.6009070294784581,
"grad_norm": 12.303582191467285,
"learning_rate": 2.5862431119927322e-05,
"loss": 1.9217,
"step": 530
},
{
"epoch": 0.6122448979591837,
"grad_norm": 11.611120223999023,
"learning_rate": 2.574020791047587e-05,
"loss": 1.9101,
"step": 540
},
{
"epoch": 0.6235827664399093,
"grad_norm": 8.830730438232422,
"learning_rate": 2.5617984701024417e-05,
"loss": 1.7771,
"step": 550
},
{
"epoch": 0.6349206349206349,
"grad_norm": 17.929471969604492,
"learning_rate": 2.5495761491572966e-05,
"loss": 1.9179,
"step": 560
},
{
"epoch": 0.6462585034013606,
"grad_norm": 10.709088325500488,
"learning_rate": 2.5373538282121512e-05,
"loss": 1.9077,
"step": 570
},
{
"epoch": 0.6575963718820862,
"grad_norm": 11.527586936950684,
"learning_rate": 2.5251315072670058e-05,
"loss": 1.8224,
"step": 580
},
{
"epoch": 0.6689342403628118,
"grad_norm": 10.246384620666504,
"learning_rate": 2.5129091863218607e-05,
"loss": 1.8023,
"step": 590
},
{
"epoch": 0.6802721088435374,
"grad_norm": 16.176572799682617,
"learning_rate": 2.5006868653767156e-05,
"loss": 1.9447,
"step": 600
},
{
"epoch": 0.691609977324263,
"grad_norm": 13.913769721984863,
"learning_rate": 2.4884645444315702e-05,
"loss": 1.8641,
"step": 610
},
{
"epoch": 0.7029478458049887,
"grad_norm": 12.541318893432617,
"learning_rate": 2.476242223486425e-05,
"loss": 1.9395,
"step": 620
},
{
"epoch": 0.7142857142857143,
"grad_norm": 12.410688400268555,
"learning_rate": 2.46401990254128e-05,
"loss": 1.9303,
"step": 630
},
{
"epoch": 0.7256235827664399,
"grad_norm": 9.643453598022461,
"learning_rate": 2.4517975815961346e-05,
"loss": 1.9703,
"step": 640
},
{
"epoch": 0.7369614512471655,
"grad_norm": 19.51215934753418,
"learning_rate": 2.439575260650989e-05,
"loss": 1.8502,
"step": 650
},
{
"epoch": 0.7482993197278912,
"grad_norm": 16.120214462280273,
"learning_rate": 2.427352939705844e-05,
"loss": 1.8644,
"step": 660
},
{
"epoch": 0.7596371882086168,
"grad_norm": 9.631799697875977,
"learning_rate": 2.415130618760699e-05,
"loss": 1.7562,
"step": 670
},
{
"epoch": 0.7709750566893424,
"grad_norm": 11.26856803894043,
"learning_rate": 2.4029082978155535e-05,
"loss": 1.9283,
"step": 680
},
{
"epoch": 0.782312925170068,
"grad_norm": 14.097902297973633,
"learning_rate": 2.3906859768704085e-05,
"loss": 1.8127,
"step": 690
},
{
"epoch": 0.7936507936507936,
"grad_norm": 10.835921287536621,
"learning_rate": 2.3784636559252634e-05,
"loss": 1.6211,
"step": 700
},
{
"epoch": 0.8049886621315193,
"grad_norm": 13.751789093017578,
"learning_rate": 2.3662413349801176e-05,
"loss": 1.7555,
"step": 710
},
{
"epoch": 0.8163265306122449,
"grad_norm": 14.243096351623535,
"learning_rate": 2.3540190140349725e-05,
"loss": 1.6117,
"step": 720
},
{
"epoch": 0.8276643990929705,
"grad_norm": 15.838502883911133,
"learning_rate": 2.3417966930898274e-05,
"loss": 1.7722,
"step": 730
},
{
"epoch": 0.8390022675736961,
"grad_norm": 12.460963249206543,
"learning_rate": 2.329574372144682e-05,
"loss": 1.7567,
"step": 740
},
{
"epoch": 0.8503401360544217,
"grad_norm": 17.053138732910156,
"learning_rate": 2.317352051199537e-05,
"loss": 1.907,
"step": 750
},
{
"epoch": 0.8616780045351474,
"grad_norm": 12.155874252319336,
"learning_rate": 2.305129730254392e-05,
"loss": 1.9573,
"step": 760
},
{
"epoch": 0.873015873015873,
"grad_norm": 9.946329116821289,
"learning_rate": 2.2929074093092468e-05,
"loss": 1.7964,
"step": 770
},
{
"epoch": 0.8843537414965986,
"grad_norm": 13.480246543884277,
"learning_rate": 2.2806850883641013e-05,
"loss": 1.9251,
"step": 780
},
{
"epoch": 0.8956916099773242,
"grad_norm": 9.594596862792969,
"learning_rate": 2.268462767418956e-05,
"loss": 1.6834,
"step": 790
},
{
"epoch": 0.9070294784580499,
"grad_norm": 11.284895896911621,
"learning_rate": 2.2562404464738108e-05,
"loss": 1.7469,
"step": 800
},
{
"epoch": 0.9183673469387755,
"grad_norm": 13.537227630615234,
"learning_rate": 2.2440181255286654e-05,
"loss": 1.6632,
"step": 810
},
{
"epoch": 0.9297052154195011,
"grad_norm": 11.549741744995117,
"learning_rate": 2.2317958045835203e-05,
"loss": 1.788,
"step": 820
},
{
"epoch": 0.9410430839002267,
"grad_norm": 53.76149368286133,
"learning_rate": 2.2195734836383752e-05,
"loss": 1.8974,
"step": 830
},
{
"epoch": 0.9523809523809523,
"grad_norm": 9.953411102294922,
"learning_rate": 2.2073511626932298e-05,
"loss": 1.8805,
"step": 840
},
{
"epoch": 0.963718820861678,
"grad_norm": 18.290058135986328,
"learning_rate": 2.1951288417480847e-05,
"loss": 1.6644,
"step": 850
},
{
"epoch": 0.9750566893424036,
"grad_norm": 12.544032096862793,
"learning_rate": 2.1829065208029393e-05,
"loss": 1.6205,
"step": 860
},
{
"epoch": 0.9863945578231292,
"grad_norm": 20.213836669921875,
"learning_rate": 2.170684199857794e-05,
"loss": 1.8964,
"step": 870
},
{
"epoch": 0.9977324263038548,
"grad_norm": 17.492652893066406,
"learning_rate": 2.1584618789126488e-05,
"loss": 1.7601,
"step": 880
},
{
"epoch": 1.0,
"eval_classification_report": {
"accuracy": 0.3479986768111148,
"ar": {
"f1-score": 0.36553524804177545,
"precision": 0.358974358974359,
"recall": 0.3723404255319149,
"support": 376.0
},
"cl": {
"f1-score": 0.2568250758341759,
"precision": 0.3075060532687651,
"recall": 0.2204861111111111,
"support": 576.0
},
"co": {
"f1-score": 0.25892857142857145,
"precision": 0.5576923076923077,
"recall": 0.1686046511627907,
"support": 344.0
},
"es": {
"f1-score": 0.41894353369763204,
"precision": 0.4144144144144144,
"recall": 0.42357274401473294,
"support": 543.0
},
"macro avg": {
"f1-score": 0.3142646730168648,
"precision": 0.3640216920469342,
"recall": 0.3175752127167582,
"support": 3023.0
},
"mx": {
"f1-score": 0.38181818181818183,
"precision": 0.2786729857819905,
"recall": 0.6061855670103092,
"support": 485.0
},
"pe": {
"f1-score": 0.3090909090909091,
"precision": 0.3269230769230769,
"recall": 0.29310344827586204,
"support": 348.0
},
"pr": {
"f1-score": 0.5769230769230769,
"precision": 0.5607476635514018,
"recall": 0.594059405940594,
"support": 101.0
},
"uy": {
"f1-score": 0.26031746031746034,
"precision": 0.47126436781609193,
"recall": 0.17982456140350878,
"support": 228.0
},
"ve": {
"f1-score": 0.0,
"precision": 0.0,
"recall": 0.0,
"support": 22.0
},
"weighted avg": {
"f1-score": 0.3348651440888197,
"precision": 0.37776395808289953,
"recall": 0.3479986768111148,
"support": 3023.0
}
},
"eval_f1": 0.3142646730168648,
"eval_loss": 1.7349679470062256,
"eval_runtime": 4.5054,
"eval_samples_per_second": 670.979,
"eval_steps_per_second": 83.9,
"step": 882
},
{
"epoch": 1.0090702947845804,
"grad_norm": 15.810832023620605,
"learning_rate": 2.1462395579675037e-05,
"loss": 1.6151,
"step": 890
},
{
"epoch": 1.0204081632653061,
"grad_norm": 13.969820022583008,
"learning_rate": 2.1340172370223586e-05,
"loss": 1.6903,
"step": 900
},
{
"epoch": 1.0317460317460316,
"grad_norm": 10.878263473510742,
"learning_rate": 2.1217949160772132e-05,
"loss": 1.3787,
"step": 910
},
{
"epoch": 1.0430839002267573,
"grad_norm": 13.959477424621582,
"learning_rate": 2.109572595132068e-05,
"loss": 1.6575,
"step": 920
},
{
"epoch": 1.054421768707483,
"grad_norm": 17.30800437927246,
"learning_rate": 2.0973502741869227e-05,
"loss": 1.5016,
"step": 930
},
{
"epoch": 1.0657596371882085,
"grad_norm": 14.826261520385742,
"learning_rate": 2.0851279532417772e-05,
"loss": 1.6254,
"step": 940
},
{
"epoch": 1.0770975056689343,
"grad_norm": 15.56223201751709,
"learning_rate": 2.072905632296632e-05,
"loss": 1.5939,
"step": 950
},
{
"epoch": 1.08843537414966,
"grad_norm": 26.888315200805664,
"learning_rate": 2.060683311351487e-05,
"loss": 1.521,
"step": 960
},
{
"epoch": 1.0997732426303855,
"grad_norm": 15.208369255065918,
"learning_rate": 2.0484609904063416e-05,
"loss": 1.601,
"step": 970
},
{
"epoch": 1.1111111111111112,
"grad_norm": 20.432218551635742,
"learning_rate": 2.0362386694611965e-05,
"loss": 1.5258,
"step": 980
},
{
"epoch": 1.1224489795918366,
"grad_norm": 16.719057083129883,
"learning_rate": 2.0240163485160515e-05,
"loss": 1.4809,
"step": 990
},
{
"epoch": 1.1337868480725624,
"grad_norm": 20.315963745117188,
"learning_rate": 2.011794027570906e-05,
"loss": 1.4214,
"step": 1000
},
{
"epoch": 1.145124716553288,
"grad_norm": 17.45546531677246,
"learning_rate": 1.9995717066257606e-05,
"loss": 1.5853,
"step": 1010
},
{
"epoch": 1.1564625850340136,
"grad_norm": 16.830751419067383,
"learning_rate": 1.9873493856806155e-05,
"loss": 1.3777,
"step": 1020
},
{
"epoch": 1.1678004535147393,
"grad_norm": 19.829317092895508,
"learning_rate": 1.9751270647354704e-05,
"loss": 1.3531,
"step": 1030
},
{
"epoch": 1.179138321995465,
"grad_norm": 23.709720611572266,
"learning_rate": 1.962904743790325e-05,
"loss": 1.485,
"step": 1040
},
{
"epoch": 1.1904761904761905,
"grad_norm": 14.404341697692871,
"learning_rate": 1.95068242284518e-05,
"loss": 1.2469,
"step": 1050
},
{
"epoch": 1.2018140589569162,
"grad_norm": 26.16022300720215,
"learning_rate": 1.938460101900035e-05,
"loss": 1.5754,
"step": 1060
},
{
"epoch": 1.2131519274376417,
"grad_norm": 20.90950584411621,
"learning_rate": 1.9262377809548894e-05,
"loss": 1.7132,
"step": 1070
},
{
"epoch": 1.2244897959183674,
"grad_norm": 20.9376220703125,
"learning_rate": 1.914015460009744e-05,
"loss": 1.5629,
"step": 1080
},
{
"epoch": 1.235827664399093,
"grad_norm": 18.35310935974121,
"learning_rate": 1.901793139064599e-05,
"loss": 1.4152,
"step": 1090
},
{
"epoch": 1.2471655328798186,
"grad_norm": 18.69922637939453,
"learning_rate": 1.8895708181194535e-05,
"loss": 1.3757,
"step": 1100
},
{
"epoch": 1.2585034013605443,
"grad_norm": 15.565815925598145,
"learning_rate": 1.8773484971743084e-05,
"loss": 1.3995,
"step": 1110
},
{
"epoch": 1.2698412698412698,
"grad_norm": 20.754438400268555,
"learning_rate": 1.8651261762291633e-05,
"loss": 1.5485,
"step": 1120
},
{
"epoch": 1.2811791383219955,
"grad_norm": 31.195865631103516,
"learning_rate": 1.852903855284018e-05,
"loss": 1.4945,
"step": 1130
},
{
"epoch": 1.2925170068027212,
"grad_norm": 22.87392234802246,
"learning_rate": 1.8406815343388728e-05,
"loss": 1.5909,
"step": 1140
},
{
"epoch": 1.3038548752834467,
"grad_norm": 16.350553512573242,
"learning_rate": 1.8284592133937274e-05,
"loss": 1.3411,
"step": 1150
},
{
"epoch": 1.3151927437641724,
"grad_norm": 18.95813751220703,
"learning_rate": 1.8162368924485823e-05,
"loss": 1.5416,
"step": 1160
},
{
"epoch": 1.3265306122448979,
"grad_norm": 21.95383071899414,
"learning_rate": 1.804014571503437e-05,
"loss": 1.4762,
"step": 1170
},
{
"epoch": 1.3378684807256236,
"grad_norm": 22.101272583007812,
"learning_rate": 1.7917922505582918e-05,
"loss": 1.4737,
"step": 1180
},
{
"epoch": 1.3492063492063493,
"grad_norm": 21.4808406829834,
"learning_rate": 1.7795699296131467e-05,
"loss": 1.2548,
"step": 1190
},
{
"epoch": 1.3605442176870748,
"grad_norm": 17.070913314819336,
"learning_rate": 1.7673476086680013e-05,
"loss": 1.438,
"step": 1200
},
{
"epoch": 1.3718820861678005,
"grad_norm": 23.818998336791992,
"learning_rate": 1.7551252877228562e-05,
"loss": 1.4455,
"step": 1210
},
{
"epoch": 1.383219954648526,
"grad_norm": 25.911645889282227,
"learning_rate": 1.742902966777711e-05,
"loss": 1.4403,
"step": 1220
},
{
"epoch": 1.3945578231292517,
"grad_norm": 16.849903106689453,
"learning_rate": 1.7306806458325653e-05,
"loss": 1.3663,
"step": 1230
},
{
"epoch": 1.4058956916099774,
"grad_norm": 16.710933685302734,
"learning_rate": 1.7184583248874202e-05,
"loss": 1.3915,
"step": 1240
},
{
"epoch": 1.417233560090703,
"grad_norm": 22.40735626220703,
"learning_rate": 1.706236003942275e-05,
"loss": 1.472,
"step": 1250
},
{
"epoch": 1.4285714285714286,
"grad_norm": 23.049968719482422,
"learning_rate": 1.6940136829971297e-05,
"loss": 1.3523,
"step": 1260
},
{
"epoch": 1.439909297052154,
"grad_norm": 12.89521598815918,
"learning_rate": 1.6817913620519846e-05,
"loss": 1.5376,
"step": 1270
},
{
"epoch": 1.4512471655328798,
"grad_norm": 19.498533248901367,
"learning_rate": 1.6695690411068396e-05,
"loss": 1.5389,
"step": 1280
},
{
"epoch": 1.4625850340136055,
"grad_norm": 18.624237060546875,
"learning_rate": 1.657346720161694e-05,
"loss": 1.4731,
"step": 1290
},
{
"epoch": 1.473922902494331,
"grad_norm": 21.690345764160156,
"learning_rate": 1.6451243992165487e-05,
"loss": 1.3718,
"step": 1300
},
{
"epoch": 1.4852607709750567,
"grad_norm": 21.58686065673828,
"learning_rate": 1.6329020782714036e-05,
"loss": 1.6325,
"step": 1310
},
{
"epoch": 1.4965986394557822,
"grad_norm": 17.337610244750977,
"learning_rate": 1.6206797573262585e-05,
"loss": 1.5196,
"step": 1320
},
{
"epoch": 1.507936507936508,
"grad_norm": 22.74974822998047,
"learning_rate": 1.608457436381113e-05,
"loss": 1.6169,
"step": 1330
},
{
"epoch": 1.5192743764172336,
"grad_norm": 24.127099990844727,
"learning_rate": 1.596235115435968e-05,
"loss": 1.3358,
"step": 1340
},
{
"epoch": 1.5306122448979593,
"grad_norm": 18.630477905273438,
"learning_rate": 1.584012794490823e-05,
"loss": 1.2559,
"step": 1350
},
{
"epoch": 1.5419501133786848,
"grad_norm": 22.353515625,
"learning_rate": 1.5717904735456775e-05,
"loss": 1.3693,
"step": 1360
},
{
"epoch": 1.5532879818594103,
"grad_norm": 25.24136734008789,
"learning_rate": 1.559568152600532e-05,
"loss": 1.4876,
"step": 1370
},
{
"epoch": 1.564625850340136,
"grad_norm": 13.99299144744873,
"learning_rate": 1.547345831655387e-05,
"loss": 1.3684,
"step": 1380
},
{
"epoch": 1.5759637188208617,
"grad_norm": 17.318729400634766,
"learning_rate": 1.5351235107102416e-05,
"loss": 1.2686,
"step": 1390
},
{
"epoch": 1.5873015873015874,
"grad_norm": 16.49215316772461,
"learning_rate": 1.5229011897650965e-05,
"loss": 1.3287,
"step": 1400
},
{
"epoch": 1.598639455782313,
"grad_norm": 17.993852615356445,
"learning_rate": 1.5106788688199514e-05,
"loss": 1.3661,
"step": 1410
},
{
"epoch": 1.6099773242630384,
"grad_norm": 17.669912338256836,
"learning_rate": 1.4984565478748061e-05,
"loss": 1.4371,
"step": 1420
},
{
"epoch": 1.6213151927437641,
"grad_norm": 32.347835540771484,
"learning_rate": 1.4862342269296609e-05,
"loss": 1.5957,
"step": 1430
},
{
"epoch": 1.6326530612244898,
"grad_norm": 15.98144245147705,
"learning_rate": 1.4740119059845155e-05,
"loss": 1.3895,
"step": 1440
},
{
"epoch": 1.6439909297052155,
"grad_norm": 15.446006774902344,
"learning_rate": 1.4617895850393702e-05,
"loss": 1.4985,
"step": 1450
},
{
"epoch": 1.655328798185941,
"grad_norm": 20.650182723999023,
"learning_rate": 1.4495672640942251e-05,
"loss": 1.3036,
"step": 1460
},
{
"epoch": 1.6666666666666665,
"grad_norm": 18.82501220703125,
"learning_rate": 1.4373449431490799e-05,
"loss": 1.3777,
"step": 1470
},
{
"epoch": 1.6780045351473922,
"grad_norm": 24.08733558654785,
"learning_rate": 1.4251226222039346e-05,
"loss": 1.4847,
"step": 1480
},
{
"epoch": 1.689342403628118,
"grad_norm": 15.655111312866211,
"learning_rate": 1.4129003012587895e-05,
"loss": 1.6208,
"step": 1490
},
{
"epoch": 1.7006802721088436,
"grad_norm": 14.29283618927002,
"learning_rate": 1.4006779803136441e-05,
"loss": 1.4783,
"step": 1500
},
{
"epoch": 1.7120181405895691,
"grad_norm": 27.245153427124023,
"learning_rate": 1.388455659368499e-05,
"loss": 1.3621,
"step": 1510
},
{
"epoch": 1.7233560090702946,
"grad_norm": 17.18270492553711,
"learning_rate": 1.3762333384233537e-05,
"loss": 1.4615,
"step": 1520
},
{
"epoch": 1.7346938775510203,
"grad_norm": 30.546113967895508,
"learning_rate": 1.3640110174782085e-05,
"loss": 1.4101,
"step": 1530
},
{
"epoch": 1.746031746031746,
"grad_norm": 27.630264282226562,
"learning_rate": 1.3517886965330632e-05,
"loss": 1.5457,
"step": 1540
},
{
"epoch": 1.7573696145124718,
"grad_norm": 24.351619720458984,
"learning_rate": 1.339566375587918e-05,
"loss": 1.4364,
"step": 1550
},
{
"epoch": 1.7687074829931972,
"grad_norm": 22.478717803955078,
"learning_rate": 1.3273440546427727e-05,
"loss": 1.5078,
"step": 1560
},
{
"epoch": 1.780045351473923,
"grad_norm": 33.06885528564453,
"learning_rate": 1.3151217336976275e-05,
"loss": 1.3344,
"step": 1570
},
{
"epoch": 1.7913832199546484,
"grad_norm": 25.309829711914062,
"learning_rate": 1.3028994127524822e-05,
"loss": 1.4234,
"step": 1580
},
{
"epoch": 1.8027210884353742,
"grad_norm": 26.68161392211914,
"learning_rate": 1.2906770918073371e-05,
"loss": 1.326,
"step": 1590
},
{
"epoch": 1.8140589569160999,
"grad_norm": 24.11896324157715,
"learning_rate": 1.2784547708621919e-05,
"loss": 1.5031,
"step": 1600
},
{
"epoch": 1.8253968253968254,
"grad_norm": 19.29245948791504,
"learning_rate": 1.2662324499170464e-05,
"loss": 1.3132,
"step": 1610
},
{
"epoch": 1.836734693877551,
"grad_norm": 18.402624130249023,
"learning_rate": 1.2540101289719014e-05,
"loss": 1.359,
"step": 1620
},
{
"epoch": 1.8480725623582765,
"grad_norm": 22.64293670654297,
"learning_rate": 1.2417878080267561e-05,
"loss": 1.6493,
"step": 1630
},
{
"epoch": 1.8594104308390023,
"grad_norm": 20.680465698242188,
"learning_rate": 1.2295654870816108e-05,
"loss": 1.3705,
"step": 1640
},
{
"epoch": 1.870748299319728,
"grad_norm": 21.203262329101562,
"learning_rate": 1.2173431661364656e-05,
"loss": 1.4593,
"step": 1650
},
{
"epoch": 1.8820861678004537,
"grad_norm": 19.054349899291992,
"learning_rate": 1.2051208451913203e-05,
"loss": 1.316,
"step": 1660
},
{
"epoch": 1.8934240362811792,
"grad_norm": 18.196651458740234,
"learning_rate": 1.1928985242461752e-05,
"loss": 1.4121,
"step": 1670
},
{
"epoch": 1.9047619047619047,
"grad_norm": 14.884149551391602,
"learning_rate": 1.1806762033010298e-05,
"loss": 1.2392,
"step": 1680
},
{
"epoch": 1.9160997732426304,
"grad_norm": 21.171005249023438,
"learning_rate": 1.1684538823558846e-05,
"loss": 1.3379,
"step": 1690
},
{
"epoch": 1.927437641723356,
"grad_norm": 18.805988311767578,
"learning_rate": 1.1562315614107395e-05,
"loss": 1.3454,
"step": 1700
},
{
"epoch": 1.9387755102040818,
"grad_norm": 19.397449493408203,
"learning_rate": 1.1440092404655942e-05,
"loss": 1.4548,
"step": 1710
},
{
"epoch": 1.9501133786848073,
"grad_norm": 20.647939682006836,
"learning_rate": 1.131786919520449e-05,
"loss": 1.4206,
"step": 1720
},
{
"epoch": 1.9614512471655328,
"grad_norm": 22.62149429321289,
"learning_rate": 1.1195645985753037e-05,
"loss": 1.3777,
"step": 1730
},
{
"epoch": 1.9727891156462585,
"grad_norm": 27.34062385559082,
"learning_rate": 1.1073422776301585e-05,
"loss": 1.3447,
"step": 1740
},
{
"epoch": 1.9841269841269842,
"grad_norm": 29.27070426940918,
"learning_rate": 1.0951199566850134e-05,
"loss": 1.3864,
"step": 1750
},
{
"epoch": 1.99546485260771,
"grad_norm": 16.5633487701416,
"learning_rate": 1.082897635739868e-05,
"loss": 1.2893,
"step": 1760
},
{
"epoch": 2.0,
"eval_classification_report": {
"accuracy": 0.3989414488918293,
"ar": {
"f1-score": 0.3793103448275862,
"precision": 0.4618320610687023,
"recall": 0.32180851063829785,
"support": 376.0
},
"cl": {
"f1-score": 0.37325038880248834,
"precision": 0.3380281690140845,
"recall": 0.4166666666666667,
"support": 576.0
},
"co": {
"f1-score": 0.3398230088495575,
"precision": 0.4343891402714932,
"recall": 0.27906976744186046,
"support": 344.0
},
"es": {
"f1-score": 0.4501323918799647,
"precision": 0.4322033898305085,
"recall": 0.4696132596685083,
"support": 543.0
},
"macro avg": {
"f1-score": 0.3755741665997943,
"precision": 0.4098704182426044,
"recall": 0.3639331480997811,
"support": 3023.0
},
"mx": {
"f1-score": 0.4408888888888889,
"precision": 0.3875,
"recall": 0.511340206185567,
"support": 485.0
},
"pe": {
"f1-score": 0.35097493036211697,
"precision": 0.34054054054054056,
"recall": 0.3620689655172414,
"support": 348.0
},
"pr": {
"f1-score": 0.6666666666666666,
"precision": 0.7159090909090909,
"recall": 0.6237623762376238,
"support": 101.0
},
"uy": {
"f1-score": 0.3076923076923077,
"precision": 0.4117647058823529,
"recall": 0.24561403508771928,
"support": 228.0
},
"ve": {
"f1-score": 0.07142857142857142,
"precision": 0.16666666666666666,
"recall": 0.045454545454545456,
"support": 22.0
},
"weighted avg": {
"f1-score": 0.3949596137532948,
"precision": 0.40647397656072576,
"recall": 0.3989414488918293,
"support": 3023.0
}
},
"eval_f1": 0.3755741665997943,
"eval_loss": 1.6900478601455688,
"eval_runtime": 4.3866,
"eval_samples_per_second": 689.137,
"eval_steps_per_second": 86.171,
"step": 1764
}
],
"logging_steps": 10,
"max_steps": 2646,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 463959726481152.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}