{ "best_metric": 0.3755741665997943, "best_model_checkpoint": "/content/drive/MyDrive/model_outputs/trial_9/checkpoint-1764", "epoch": 2.0, "eval_steps": 500, "global_step": 1764, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.011337868480725623, "grad_norm": 9.890141487121582, "learning_rate": 9.346480722758123e-07, "loss": 2.2337, "step": 10 }, { "epoch": 0.022675736961451247, "grad_norm": 10.97945785522461, "learning_rate": 1.8692961445516245e-06, "loss": 2.2359, "step": 20 }, { "epoch": 0.034013605442176874, "grad_norm": 10.244830131530762, "learning_rate": 2.8039442168274367e-06, "loss": 2.2034, "step": 30 }, { "epoch": 0.045351473922902494, "grad_norm": 11.170487403869629, "learning_rate": 3.738592289103249e-06, "loss": 2.2238, "step": 40 }, { "epoch": 0.05668934240362812, "grad_norm": 8.994032859802246, "learning_rate": 4.673240361379061e-06, "loss": 2.143, "step": 50 }, { "epoch": 0.06802721088435375, "grad_norm": 11.781458854675293, "learning_rate": 5.607888433654873e-06, "loss": 2.1299, "step": 60 }, { "epoch": 0.07936507936507936, "grad_norm": 10.54732894897461, "learning_rate": 6.542536505930685e-06, "loss": 2.102, "step": 70 }, { "epoch": 0.09070294784580499, "grad_norm": 8.15542221069336, "learning_rate": 7.477184578206498e-06, "loss": 2.0436, "step": 80 }, { "epoch": 0.10204081632653061, "grad_norm": 12.086600303649902, "learning_rate": 8.41183265048231e-06, "loss": 2.0417, "step": 90 }, { "epoch": 0.11337868480725624, "grad_norm": 10.285418510437012, "learning_rate": 9.346480722758123e-06, "loss": 2.0767, "step": 100 }, { "epoch": 0.12471655328798185, "grad_norm": 10.342191696166992, "learning_rate": 1.0281128795033934e-05, "loss": 2.0401, "step": 110 }, { "epoch": 0.1360544217687075, "grad_norm": 9.20302677154541, "learning_rate": 1.1215776867309747e-05, "loss": 1.99, "step": 120 }, { "epoch": 0.1473922902494331, "grad_norm": 9.420401573181152, "learning_rate": 1.215042493958556e-05, "loss": 1.9867, "step": 130 }, { "epoch": 0.15873015873015872, "grad_norm": 7.75075101852417, "learning_rate": 1.308507301186137e-05, "loss": 1.9988, "step": 140 }, { "epoch": 0.17006802721088435, "grad_norm": 8.807808876037598, "learning_rate": 1.4019721084137183e-05, "loss": 1.9886, "step": 150 }, { "epoch": 0.18140589569160998, "grad_norm": 10.291740417480469, "learning_rate": 1.4954369156412996e-05, "loss": 1.9393, "step": 160 }, { "epoch": 0.1927437641723356, "grad_norm": 8.806387901306152, "learning_rate": 1.588901722868881e-05, "loss": 1.9602, "step": 170 }, { "epoch": 0.20408163265306123, "grad_norm": 7.684463977813721, "learning_rate": 1.682366530096462e-05, "loss": 2.0044, "step": 180 }, { "epoch": 0.21541950113378686, "grad_norm": 8.645553588867188, "learning_rate": 1.7758313373240435e-05, "loss": 2.0519, "step": 190 }, { "epoch": 0.22675736961451248, "grad_norm": 9.162885665893555, "learning_rate": 1.8692961445516246e-05, "loss": 2.0516, "step": 200 }, { "epoch": 0.23809523809523808, "grad_norm": 7.536770343780518, "learning_rate": 1.9627609517792057e-05, "loss": 2.0299, "step": 210 }, { "epoch": 0.2494331065759637, "grad_norm": 8.47108268737793, "learning_rate": 2.0562257590067868e-05, "loss": 2.0816, "step": 220 }, { "epoch": 0.26077097505668934, "grad_norm": 6.5353498458862305, "learning_rate": 2.1496905662343682e-05, "loss": 2.0053, "step": 230 }, { "epoch": 0.272108843537415, "grad_norm": 7.390655994415283, "learning_rate": 2.2431553734619493e-05, "loss": 1.956, "step": 240 }, { "epoch": 0.2834467120181406, "grad_norm": 11.590116500854492, "learning_rate": 2.3366201806895304e-05, "loss": 1.9745, "step": 250 }, { "epoch": 0.2947845804988662, "grad_norm": 7.277239799499512, "learning_rate": 2.430084987917112e-05, "loss": 2.0846, "step": 260 }, { "epoch": 0.30612244897959184, "grad_norm": 7.210775375366211, "learning_rate": 2.523549795144693e-05, "loss": 1.9379, "step": 270 }, { "epoch": 0.31746031746031744, "grad_norm": 9.101860046386719, "learning_rate": 2.617014602372274e-05, "loss": 1.9774, "step": 280 }, { "epoch": 0.3287981859410431, "grad_norm": 8.541576385498047, "learning_rate": 2.7104794095998556e-05, "loss": 1.9359, "step": 290 }, { "epoch": 0.3401360544217687, "grad_norm": 9.097405433654785, "learning_rate": 2.8039442168274367e-05, "loss": 1.9943, "step": 300 }, { "epoch": 0.35147392290249435, "grad_norm": 10.134627342224121, "learning_rate": 2.8551341727859275e-05, "loss": 1.9935, "step": 310 }, { "epoch": 0.36281179138321995, "grad_norm": 9.275653839111328, "learning_rate": 2.842911851840782e-05, "loss": 1.9708, "step": 320 }, { "epoch": 0.3741496598639456, "grad_norm": 10.57584285736084, "learning_rate": 2.830689530895637e-05, "loss": 1.8931, "step": 330 }, { "epoch": 0.3854875283446712, "grad_norm": 8.968500137329102, "learning_rate": 2.818467209950492e-05, "loss": 2.0358, "step": 340 }, { "epoch": 0.3968253968253968, "grad_norm": 7.630152702331543, "learning_rate": 2.806244889005346e-05, "loss": 1.9313, "step": 350 }, { "epoch": 0.40816326530612246, "grad_norm": 10.709409713745117, "learning_rate": 2.794022568060201e-05, "loss": 1.8677, "step": 360 }, { "epoch": 0.41950113378684806, "grad_norm": 8.903763771057129, "learning_rate": 2.781800247115056e-05, "loss": 1.9309, "step": 370 }, { "epoch": 0.4308390022675737, "grad_norm": 11.22157096862793, "learning_rate": 2.769577926169911e-05, "loss": 1.9434, "step": 380 }, { "epoch": 0.4421768707482993, "grad_norm": 13.553994178771973, "learning_rate": 2.7573556052247655e-05, "loss": 1.9612, "step": 390 }, { "epoch": 0.45351473922902497, "grad_norm": 10.572993278503418, "learning_rate": 2.7451332842796204e-05, "loss": 1.9312, "step": 400 }, { "epoch": 0.46485260770975056, "grad_norm": 10.051871299743652, "learning_rate": 2.7329109633344753e-05, "loss": 2.0256, "step": 410 }, { "epoch": 0.47619047619047616, "grad_norm": 8.120019912719727, "learning_rate": 2.7206886423893295e-05, "loss": 1.8118, "step": 420 }, { "epoch": 0.4875283446712018, "grad_norm": 8.908893585205078, "learning_rate": 2.7084663214441844e-05, "loss": 1.9479, "step": 430 }, { "epoch": 0.4988662131519274, "grad_norm": 9.484359741210938, "learning_rate": 2.6962440004990394e-05, "loss": 1.9766, "step": 440 }, { "epoch": 0.5102040816326531, "grad_norm": 9.914438247680664, "learning_rate": 2.684021679553894e-05, "loss": 1.9492, "step": 450 }, { "epoch": 0.5215419501133787, "grad_norm": 10.76697063446045, "learning_rate": 2.671799358608749e-05, "loss": 1.9384, "step": 460 }, { "epoch": 0.5328798185941043, "grad_norm": 11.358717918395996, "learning_rate": 2.6595770376636038e-05, "loss": 1.8805, "step": 470 }, { "epoch": 0.54421768707483, "grad_norm": 10.204866409301758, "learning_rate": 2.6473547167184583e-05, "loss": 1.7569, "step": 480 }, { "epoch": 0.5555555555555556, "grad_norm": 9.388846397399902, "learning_rate": 2.635132395773313e-05, "loss": 1.9267, "step": 490 }, { "epoch": 0.5668934240362812, "grad_norm": 10.731882095336914, "learning_rate": 2.6229100748281678e-05, "loss": 1.9189, "step": 500 }, { "epoch": 0.5782312925170068, "grad_norm": 17.54758644104004, "learning_rate": 2.6106877538830227e-05, "loss": 1.9435, "step": 510 }, { "epoch": 0.5895691609977324, "grad_norm": 16.29359245300293, "learning_rate": 2.5984654329378773e-05, "loss": 1.9415, "step": 520 }, { "epoch": 0.6009070294784581, "grad_norm": 12.303582191467285, "learning_rate": 2.5862431119927322e-05, "loss": 1.9217, "step": 530 }, { "epoch": 0.6122448979591837, "grad_norm": 11.611120223999023, "learning_rate": 2.574020791047587e-05, "loss": 1.9101, "step": 540 }, { "epoch": 0.6235827664399093, "grad_norm": 8.830730438232422, "learning_rate": 2.5617984701024417e-05, "loss": 1.7771, "step": 550 }, { "epoch": 0.6349206349206349, "grad_norm": 17.929471969604492, "learning_rate": 2.5495761491572966e-05, "loss": 1.9179, "step": 560 }, { "epoch": 0.6462585034013606, "grad_norm": 10.709088325500488, "learning_rate": 2.5373538282121512e-05, "loss": 1.9077, "step": 570 }, { "epoch": 0.6575963718820862, "grad_norm": 11.527586936950684, "learning_rate": 2.5251315072670058e-05, "loss": 1.8224, "step": 580 }, { "epoch": 0.6689342403628118, "grad_norm": 10.246384620666504, "learning_rate": 2.5129091863218607e-05, "loss": 1.8023, "step": 590 }, { "epoch": 0.6802721088435374, "grad_norm": 16.176572799682617, "learning_rate": 2.5006868653767156e-05, "loss": 1.9447, "step": 600 }, { "epoch": 0.691609977324263, "grad_norm": 13.913769721984863, "learning_rate": 2.4884645444315702e-05, "loss": 1.8641, "step": 610 }, { "epoch": 0.7029478458049887, "grad_norm": 12.541318893432617, "learning_rate": 2.476242223486425e-05, "loss": 1.9395, "step": 620 }, { "epoch": 0.7142857142857143, "grad_norm": 12.410688400268555, "learning_rate": 2.46401990254128e-05, "loss": 1.9303, "step": 630 }, { "epoch": 0.7256235827664399, "grad_norm": 9.643453598022461, "learning_rate": 2.4517975815961346e-05, "loss": 1.9703, "step": 640 }, { "epoch": 0.7369614512471655, "grad_norm": 19.51215934753418, "learning_rate": 2.439575260650989e-05, "loss": 1.8502, "step": 650 }, { "epoch": 0.7482993197278912, "grad_norm": 16.120214462280273, "learning_rate": 2.427352939705844e-05, "loss": 1.8644, "step": 660 }, { "epoch": 0.7596371882086168, "grad_norm": 9.631799697875977, "learning_rate": 2.415130618760699e-05, "loss": 1.7562, "step": 670 }, { "epoch": 0.7709750566893424, "grad_norm": 11.26856803894043, "learning_rate": 2.4029082978155535e-05, "loss": 1.9283, "step": 680 }, { "epoch": 0.782312925170068, "grad_norm": 14.097902297973633, "learning_rate": 2.3906859768704085e-05, "loss": 1.8127, "step": 690 }, { "epoch": 0.7936507936507936, "grad_norm": 10.835921287536621, "learning_rate": 2.3784636559252634e-05, "loss": 1.6211, "step": 700 }, { "epoch": 0.8049886621315193, "grad_norm": 13.751789093017578, "learning_rate": 2.3662413349801176e-05, "loss": 1.7555, "step": 710 }, { "epoch": 0.8163265306122449, "grad_norm": 14.243096351623535, "learning_rate": 2.3540190140349725e-05, "loss": 1.6117, "step": 720 }, { "epoch": 0.8276643990929705, "grad_norm": 15.838502883911133, "learning_rate": 2.3417966930898274e-05, "loss": 1.7722, "step": 730 }, { "epoch": 0.8390022675736961, "grad_norm": 12.460963249206543, "learning_rate": 2.329574372144682e-05, "loss": 1.7567, "step": 740 }, { "epoch": 0.8503401360544217, "grad_norm": 17.053138732910156, "learning_rate": 2.317352051199537e-05, "loss": 1.907, "step": 750 }, { "epoch": 0.8616780045351474, "grad_norm": 12.155874252319336, "learning_rate": 2.305129730254392e-05, "loss": 1.9573, "step": 760 }, { "epoch": 0.873015873015873, "grad_norm": 9.946329116821289, "learning_rate": 2.2929074093092468e-05, "loss": 1.7964, "step": 770 }, { "epoch": 0.8843537414965986, "grad_norm": 13.480246543884277, "learning_rate": 2.2806850883641013e-05, "loss": 1.9251, "step": 780 }, { "epoch": 0.8956916099773242, "grad_norm": 9.594596862792969, "learning_rate": 2.268462767418956e-05, "loss": 1.6834, "step": 790 }, { "epoch": 0.9070294784580499, "grad_norm": 11.284895896911621, "learning_rate": 2.2562404464738108e-05, "loss": 1.7469, "step": 800 }, { "epoch": 0.9183673469387755, "grad_norm": 13.537227630615234, "learning_rate": 2.2440181255286654e-05, "loss": 1.6632, "step": 810 }, { "epoch": 0.9297052154195011, "grad_norm": 11.549741744995117, "learning_rate": 2.2317958045835203e-05, "loss": 1.788, "step": 820 }, { "epoch": 0.9410430839002267, "grad_norm": 53.76149368286133, "learning_rate": 2.2195734836383752e-05, "loss": 1.8974, "step": 830 }, { "epoch": 0.9523809523809523, "grad_norm": 9.953411102294922, "learning_rate": 2.2073511626932298e-05, "loss": 1.8805, "step": 840 }, { "epoch": 0.963718820861678, "grad_norm": 18.290058135986328, "learning_rate": 2.1951288417480847e-05, "loss": 1.6644, "step": 850 }, { "epoch": 0.9750566893424036, "grad_norm": 12.544032096862793, "learning_rate": 2.1829065208029393e-05, "loss": 1.6205, "step": 860 }, { "epoch": 0.9863945578231292, "grad_norm": 20.213836669921875, "learning_rate": 2.170684199857794e-05, "loss": 1.8964, "step": 870 }, { "epoch": 0.9977324263038548, "grad_norm": 17.492652893066406, "learning_rate": 2.1584618789126488e-05, "loss": 1.7601, "step": 880 }, { "epoch": 1.0, "eval_classification_report": { "accuracy": 0.3479986768111148, "ar": { "f1-score": 0.36553524804177545, "precision": 0.358974358974359, "recall": 0.3723404255319149, "support": 376.0 }, "cl": { "f1-score": 0.2568250758341759, "precision": 0.3075060532687651, "recall": 0.2204861111111111, "support": 576.0 }, "co": { "f1-score": 0.25892857142857145, "precision": 0.5576923076923077, "recall": 0.1686046511627907, "support": 344.0 }, "es": { "f1-score": 0.41894353369763204, "precision": 0.4144144144144144, "recall": 0.42357274401473294, "support": 543.0 }, "macro avg": { "f1-score": 0.3142646730168648, "precision": 0.3640216920469342, "recall": 0.3175752127167582, "support": 3023.0 }, "mx": { "f1-score": 0.38181818181818183, "precision": 0.2786729857819905, "recall": 0.6061855670103092, "support": 485.0 }, "pe": { "f1-score": 0.3090909090909091, "precision": 0.3269230769230769, "recall": 0.29310344827586204, "support": 348.0 }, "pr": { "f1-score": 0.5769230769230769, "precision": 0.5607476635514018, "recall": 0.594059405940594, "support": 101.0 }, "uy": { "f1-score": 0.26031746031746034, "precision": 0.47126436781609193, "recall": 0.17982456140350878, "support": 228.0 }, "ve": { "f1-score": 0.0, "precision": 0.0, "recall": 0.0, "support": 22.0 }, "weighted avg": { "f1-score": 0.3348651440888197, "precision": 0.37776395808289953, "recall": 0.3479986768111148, "support": 3023.0 } }, "eval_f1": 0.3142646730168648, "eval_loss": 1.7349679470062256, "eval_runtime": 4.5054, "eval_samples_per_second": 670.979, "eval_steps_per_second": 83.9, "step": 882 }, { "epoch": 1.0090702947845804, "grad_norm": 15.810832023620605, "learning_rate": 2.1462395579675037e-05, "loss": 1.6151, "step": 890 }, { "epoch": 1.0204081632653061, "grad_norm": 13.969820022583008, "learning_rate": 2.1340172370223586e-05, "loss": 1.6903, "step": 900 }, { "epoch": 1.0317460317460316, "grad_norm": 10.878263473510742, "learning_rate": 2.1217949160772132e-05, "loss": 1.3787, "step": 910 }, { "epoch": 1.0430839002267573, "grad_norm": 13.959477424621582, "learning_rate": 2.109572595132068e-05, "loss": 1.6575, "step": 920 }, { "epoch": 1.054421768707483, "grad_norm": 17.30800437927246, "learning_rate": 2.0973502741869227e-05, "loss": 1.5016, "step": 930 }, { "epoch": 1.0657596371882085, "grad_norm": 14.826261520385742, "learning_rate": 2.0851279532417772e-05, "loss": 1.6254, "step": 940 }, { "epoch": 1.0770975056689343, "grad_norm": 15.56223201751709, "learning_rate": 2.072905632296632e-05, "loss": 1.5939, "step": 950 }, { "epoch": 1.08843537414966, "grad_norm": 26.888315200805664, "learning_rate": 2.060683311351487e-05, "loss": 1.521, "step": 960 }, { "epoch": 1.0997732426303855, "grad_norm": 15.208369255065918, "learning_rate": 2.0484609904063416e-05, "loss": 1.601, "step": 970 }, { "epoch": 1.1111111111111112, "grad_norm": 20.432218551635742, "learning_rate": 2.0362386694611965e-05, "loss": 1.5258, "step": 980 }, { "epoch": 1.1224489795918366, "grad_norm": 16.719057083129883, "learning_rate": 2.0240163485160515e-05, "loss": 1.4809, "step": 990 }, { "epoch": 1.1337868480725624, "grad_norm": 20.315963745117188, "learning_rate": 2.011794027570906e-05, "loss": 1.4214, "step": 1000 }, { "epoch": 1.145124716553288, "grad_norm": 17.45546531677246, "learning_rate": 1.9995717066257606e-05, "loss": 1.5853, "step": 1010 }, { "epoch": 1.1564625850340136, "grad_norm": 16.830751419067383, "learning_rate": 1.9873493856806155e-05, "loss": 1.3777, "step": 1020 }, { "epoch": 1.1678004535147393, "grad_norm": 19.829317092895508, "learning_rate": 1.9751270647354704e-05, "loss": 1.3531, "step": 1030 }, { "epoch": 1.179138321995465, "grad_norm": 23.709720611572266, "learning_rate": 1.962904743790325e-05, "loss": 1.485, "step": 1040 }, { "epoch": 1.1904761904761905, "grad_norm": 14.404341697692871, "learning_rate": 1.95068242284518e-05, "loss": 1.2469, "step": 1050 }, { "epoch": 1.2018140589569162, "grad_norm": 26.16022300720215, "learning_rate": 1.938460101900035e-05, "loss": 1.5754, "step": 1060 }, { "epoch": 1.2131519274376417, "grad_norm": 20.90950584411621, "learning_rate": 1.9262377809548894e-05, "loss": 1.7132, "step": 1070 }, { "epoch": 1.2244897959183674, "grad_norm": 20.9376220703125, "learning_rate": 1.914015460009744e-05, "loss": 1.5629, "step": 1080 }, { "epoch": 1.235827664399093, "grad_norm": 18.35310935974121, "learning_rate": 1.901793139064599e-05, "loss": 1.4152, "step": 1090 }, { "epoch": 1.2471655328798186, "grad_norm": 18.69922637939453, "learning_rate": 1.8895708181194535e-05, "loss": 1.3757, "step": 1100 }, { "epoch": 1.2585034013605443, "grad_norm": 15.565815925598145, "learning_rate": 1.8773484971743084e-05, "loss": 1.3995, "step": 1110 }, { "epoch": 1.2698412698412698, "grad_norm": 20.754438400268555, "learning_rate": 1.8651261762291633e-05, "loss": 1.5485, "step": 1120 }, { "epoch": 1.2811791383219955, "grad_norm": 31.195865631103516, "learning_rate": 1.852903855284018e-05, "loss": 1.4945, "step": 1130 }, { "epoch": 1.2925170068027212, "grad_norm": 22.87392234802246, "learning_rate": 1.8406815343388728e-05, "loss": 1.5909, "step": 1140 }, { "epoch": 1.3038548752834467, "grad_norm": 16.350553512573242, "learning_rate": 1.8284592133937274e-05, "loss": 1.3411, "step": 1150 }, { "epoch": 1.3151927437641724, "grad_norm": 18.95813751220703, "learning_rate": 1.8162368924485823e-05, "loss": 1.5416, "step": 1160 }, { "epoch": 1.3265306122448979, "grad_norm": 21.95383071899414, "learning_rate": 1.804014571503437e-05, "loss": 1.4762, "step": 1170 }, { "epoch": 1.3378684807256236, "grad_norm": 22.101272583007812, "learning_rate": 1.7917922505582918e-05, "loss": 1.4737, "step": 1180 }, { "epoch": 1.3492063492063493, "grad_norm": 21.4808406829834, "learning_rate": 1.7795699296131467e-05, "loss": 1.2548, "step": 1190 }, { "epoch": 1.3605442176870748, "grad_norm": 17.070913314819336, "learning_rate": 1.7673476086680013e-05, "loss": 1.438, "step": 1200 }, { "epoch": 1.3718820861678005, "grad_norm": 23.818998336791992, "learning_rate": 1.7551252877228562e-05, "loss": 1.4455, "step": 1210 }, { "epoch": 1.383219954648526, "grad_norm": 25.911645889282227, "learning_rate": 1.742902966777711e-05, "loss": 1.4403, "step": 1220 }, { "epoch": 1.3945578231292517, "grad_norm": 16.849903106689453, "learning_rate": 1.7306806458325653e-05, "loss": 1.3663, "step": 1230 }, { "epoch": 1.4058956916099774, "grad_norm": 16.710933685302734, "learning_rate": 1.7184583248874202e-05, "loss": 1.3915, "step": 1240 }, { "epoch": 1.417233560090703, "grad_norm": 22.40735626220703, "learning_rate": 1.706236003942275e-05, "loss": 1.472, "step": 1250 }, { "epoch": 1.4285714285714286, "grad_norm": 23.049968719482422, "learning_rate": 1.6940136829971297e-05, "loss": 1.3523, "step": 1260 }, { "epoch": 1.439909297052154, "grad_norm": 12.89521598815918, "learning_rate": 1.6817913620519846e-05, "loss": 1.5376, "step": 1270 }, { "epoch": 1.4512471655328798, "grad_norm": 19.498533248901367, "learning_rate": 1.6695690411068396e-05, "loss": 1.5389, "step": 1280 }, { "epoch": 1.4625850340136055, "grad_norm": 18.624237060546875, "learning_rate": 1.657346720161694e-05, "loss": 1.4731, "step": 1290 }, { "epoch": 1.473922902494331, "grad_norm": 21.690345764160156, "learning_rate": 1.6451243992165487e-05, "loss": 1.3718, "step": 1300 }, { "epoch": 1.4852607709750567, "grad_norm": 21.58686065673828, "learning_rate": 1.6329020782714036e-05, "loss": 1.6325, "step": 1310 }, { "epoch": 1.4965986394557822, "grad_norm": 17.337610244750977, "learning_rate": 1.6206797573262585e-05, "loss": 1.5196, "step": 1320 }, { "epoch": 1.507936507936508, "grad_norm": 22.74974822998047, "learning_rate": 1.608457436381113e-05, "loss": 1.6169, "step": 1330 }, { "epoch": 1.5192743764172336, "grad_norm": 24.127099990844727, "learning_rate": 1.596235115435968e-05, "loss": 1.3358, "step": 1340 }, { "epoch": 1.5306122448979593, "grad_norm": 18.630477905273438, "learning_rate": 1.584012794490823e-05, "loss": 1.2559, "step": 1350 }, { "epoch": 1.5419501133786848, "grad_norm": 22.353515625, "learning_rate": 1.5717904735456775e-05, "loss": 1.3693, "step": 1360 }, { "epoch": 1.5532879818594103, "grad_norm": 25.24136734008789, "learning_rate": 1.559568152600532e-05, "loss": 1.4876, "step": 1370 }, { "epoch": 1.564625850340136, "grad_norm": 13.99299144744873, "learning_rate": 1.547345831655387e-05, "loss": 1.3684, "step": 1380 }, { "epoch": 1.5759637188208617, "grad_norm": 17.318729400634766, "learning_rate": 1.5351235107102416e-05, "loss": 1.2686, "step": 1390 }, { "epoch": 1.5873015873015874, "grad_norm": 16.49215316772461, "learning_rate": 1.5229011897650965e-05, "loss": 1.3287, "step": 1400 }, { "epoch": 1.598639455782313, "grad_norm": 17.993852615356445, "learning_rate": 1.5106788688199514e-05, "loss": 1.3661, "step": 1410 }, { "epoch": 1.6099773242630384, "grad_norm": 17.669912338256836, "learning_rate": 1.4984565478748061e-05, "loss": 1.4371, "step": 1420 }, { "epoch": 1.6213151927437641, "grad_norm": 32.347835540771484, "learning_rate": 1.4862342269296609e-05, "loss": 1.5957, "step": 1430 }, { "epoch": 1.6326530612244898, "grad_norm": 15.98144245147705, "learning_rate": 1.4740119059845155e-05, "loss": 1.3895, "step": 1440 }, { "epoch": 1.6439909297052155, "grad_norm": 15.446006774902344, "learning_rate": 1.4617895850393702e-05, "loss": 1.4985, "step": 1450 }, { "epoch": 1.655328798185941, "grad_norm": 20.650182723999023, "learning_rate": 1.4495672640942251e-05, "loss": 1.3036, "step": 1460 }, { "epoch": 1.6666666666666665, "grad_norm": 18.82501220703125, "learning_rate": 1.4373449431490799e-05, "loss": 1.3777, "step": 1470 }, { "epoch": 1.6780045351473922, "grad_norm": 24.08733558654785, "learning_rate": 1.4251226222039346e-05, "loss": 1.4847, "step": 1480 }, { "epoch": 1.689342403628118, "grad_norm": 15.655111312866211, "learning_rate": 1.4129003012587895e-05, "loss": 1.6208, "step": 1490 }, { "epoch": 1.7006802721088436, "grad_norm": 14.29283618927002, "learning_rate": 1.4006779803136441e-05, "loss": 1.4783, "step": 1500 }, { "epoch": 1.7120181405895691, "grad_norm": 27.245153427124023, "learning_rate": 1.388455659368499e-05, "loss": 1.3621, "step": 1510 }, { "epoch": 1.7233560090702946, "grad_norm": 17.18270492553711, "learning_rate": 1.3762333384233537e-05, "loss": 1.4615, "step": 1520 }, { "epoch": 1.7346938775510203, "grad_norm": 30.546113967895508, "learning_rate": 1.3640110174782085e-05, "loss": 1.4101, "step": 1530 }, { "epoch": 1.746031746031746, "grad_norm": 27.630264282226562, "learning_rate": 1.3517886965330632e-05, "loss": 1.5457, "step": 1540 }, { "epoch": 1.7573696145124718, "grad_norm": 24.351619720458984, "learning_rate": 1.339566375587918e-05, "loss": 1.4364, "step": 1550 }, { "epoch": 1.7687074829931972, "grad_norm": 22.478717803955078, "learning_rate": 1.3273440546427727e-05, "loss": 1.5078, "step": 1560 }, { "epoch": 1.780045351473923, "grad_norm": 33.06885528564453, "learning_rate": 1.3151217336976275e-05, "loss": 1.3344, "step": 1570 }, { "epoch": 1.7913832199546484, "grad_norm": 25.309829711914062, "learning_rate": 1.3028994127524822e-05, "loss": 1.4234, "step": 1580 }, { "epoch": 1.8027210884353742, "grad_norm": 26.68161392211914, "learning_rate": 1.2906770918073371e-05, "loss": 1.326, "step": 1590 }, { "epoch": 1.8140589569160999, "grad_norm": 24.11896324157715, "learning_rate": 1.2784547708621919e-05, "loss": 1.5031, "step": 1600 }, { "epoch": 1.8253968253968254, "grad_norm": 19.29245948791504, "learning_rate": 1.2662324499170464e-05, "loss": 1.3132, "step": 1610 }, { "epoch": 1.836734693877551, "grad_norm": 18.402624130249023, "learning_rate": 1.2540101289719014e-05, "loss": 1.359, "step": 1620 }, { "epoch": 1.8480725623582765, "grad_norm": 22.64293670654297, "learning_rate": 1.2417878080267561e-05, "loss": 1.6493, "step": 1630 }, { "epoch": 1.8594104308390023, "grad_norm": 20.680465698242188, "learning_rate": 1.2295654870816108e-05, "loss": 1.3705, "step": 1640 }, { "epoch": 1.870748299319728, "grad_norm": 21.203262329101562, "learning_rate": 1.2173431661364656e-05, "loss": 1.4593, "step": 1650 }, { "epoch": 1.8820861678004537, "grad_norm": 19.054349899291992, "learning_rate": 1.2051208451913203e-05, "loss": 1.316, "step": 1660 }, { "epoch": 1.8934240362811792, "grad_norm": 18.196651458740234, "learning_rate": 1.1928985242461752e-05, "loss": 1.4121, "step": 1670 }, { "epoch": 1.9047619047619047, "grad_norm": 14.884149551391602, "learning_rate": 1.1806762033010298e-05, "loss": 1.2392, "step": 1680 }, { "epoch": 1.9160997732426304, "grad_norm": 21.171005249023438, "learning_rate": 1.1684538823558846e-05, "loss": 1.3379, "step": 1690 }, { "epoch": 1.927437641723356, "grad_norm": 18.805988311767578, "learning_rate": 1.1562315614107395e-05, "loss": 1.3454, "step": 1700 }, { "epoch": 1.9387755102040818, "grad_norm": 19.397449493408203, "learning_rate": 1.1440092404655942e-05, "loss": 1.4548, "step": 1710 }, { "epoch": 1.9501133786848073, "grad_norm": 20.647939682006836, "learning_rate": 1.131786919520449e-05, "loss": 1.4206, "step": 1720 }, { "epoch": 1.9614512471655328, "grad_norm": 22.62149429321289, "learning_rate": 1.1195645985753037e-05, "loss": 1.3777, "step": 1730 }, { "epoch": 1.9727891156462585, "grad_norm": 27.34062385559082, "learning_rate": 1.1073422776301585e-05, "loss": 1.3447, "step": 1740 }, { "epoch": 1.9841269841269842, "grad_norm": 29.27070426940918, "learning_rate": 1.0951199566850134e-05, "loss": 1.3864, "step": 1750 }, { "epoch": 1.99546485260771, "grad_norm": 16.5633487701416, "learning_rate": 1.082897635739868e-05, "loss": 1.2893, "step": 1760 }, { "epoch": 2.0, "eval_classification_report": { "accuracy": 0.3989414488918293, "ar": { "f1-score": 0.3793103448275862, "precision": 0.4618320610687023, "recall": 0.32180851063829785, "support": 376.0 }, "cl": { "f1-score": 0.37325038880248834, "precision": 0.3380281690140845, "recall": 0.4166666666666667, "support": 576.0 }, "co": { "f1-score": 0.3398230088495575, "precision": 0.4343891402714932, "recall": 0.27906976744186046, "support": 344.0 }, "es": { "f1-score": 0.4501323918799647, "precision": 0.4322033898305085, "recall": 0.4696132596685083, "support": 543.0 }, "macro avg": { "f1-score": 0.3755741665997943, "precision": 0.4098704182426044, "recall": 0.3639331480997811, "support": 3023.0 }, "mx": { "f1-score": 0.4408888888888889, "precision": 0.3875, "recall": 0.511340206185567, "support": 485.0 }, "pe": { "f1-score": 0.35097493036211697, "precision": 0.34054054054054056, "recall": 0.3620689655172414, "support": 348.0 }, "pr": { "f1-score": 0.6666666666666666, "precision": 0.7159090909090909, "recall": 0.6237623762376238, "support": 101.0 }, "uy": { "f1-score": 0.3076923076923077, "precision": 0.4117647058823529, "recall": 0.24561403508771928, "support": 228.0 }, "ve": { "f1-score": 0.07142857142857142, "precision": 0.16666666666666666, "recall": 0.045454545454545456, "support": 22.0 }, "weighted avg": { "f1-score": 0.3949596137532948, "precision": 0.40647397656072576, "recall": 0.3989414488918293, "support": 3023.0 } }, "eval_f1": 0.3755741665997943, "eval_loss": 1.6900478601455688, "eval_runtime": 4.3866, "eval_samples_per_second": 689.137, "eval_steps_per_second": 86.171, "step": 1764 } ], "logging_steps": 10, "max_steps": 2646, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 463959726481152.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }