|
{ |
|
"best_metric": 0.3755741665997943, |
|
"best_model_checkpoint": "/content/drive/MyDrive/model_outputs/trial_9/checkpoint-1764", |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 1764, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.011337868480725623, |
|
"grad_norm": 9.890141487121582, |
|
"learning_rate": 9.346480722758123e-07, |
|
"loss": 2.2337, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.022675736961451247, |
|
"grad_norm": 10.97945785522461, |
|
"learning_rate": 1.8692961445516245e-06, |
|
"loss": 2.2359, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.034013605442176874, |
|
"grad_norm": 10.244830131530762, |
|
"learning_rate": 2.8039442168274367e-06, |
|
"loss": 2.2034, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.045351473922902494, |
|
"grad_norm": 11.170487403869629, |
|
"learning_rate": 3.738592289103249e-06, |
|
"loss": 2.2238, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05668934240362812, |
|
"grad_norm": 8.994032859802246, |
|
"learning_rate": 4.673240361379061e-06, |
|
"loss": 2.143, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06802721088435375, |
|
"grad_norm": 11.781458854675293, |
|
"learning_rate": 5.607888433654873e-06, |
|
"loss": 2.1299, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07936507936507936, |
|
"grad_norm": 10.54732894897461, |
|
"learning_rate": 6.542536505930685e-06, |
|
"loss": 2.102, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.09070294784580499, |
|
"grad_norm": 8.15542221069336, |
|
"learning_rate": 7.477184578206498e-06, |
|
"loss": 2.0436, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.10204081632653061, |
|
"grad_norm": 12.086600303649902, |
|
"learning_rate": 8.41183265048231e-06, |
|
"loss": 2.0417, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.11337868480725624, |
|
"grad_norm": 10.285418510437012, |
|
"learning_rate": 9.346480722758123e-06, |
|
"loss": 2.0767, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12471655328798185, |
|
"grad_norm": 10.342191696166992, |
|
"learning_rate": 1.0281128795033934e-05, |
|
"loss": 2.0401, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.1360544217687075, |
|
"grad_norm": 9.20302677154541, |
|
"learning_rate": 1.1215776867309747e-05, |
|
"loss": 1.99, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1473922902494331, |
|
"grad_norm": 9.420401573181152, |
|
"learning_rate": 1.215042493958556e-05, |
|
"loss": 1.9867, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.15873015873015872, |
|
"grad_norm": 7.75075101852417, |
|
"learning_rate": 1.308507301186137e-05, |
|
"loss": 1.9988, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.17006802721088435, |
|
"grad_norm": 8.807808876037598, |
|
"learning_rate": 1.4019721084137183e-05, |
|
"loss": 1.9886, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.18140589569160998, |
|
"grad_norm": 10.291740417480469, |
|
"learning_rate": 1.4954369156412996e-05, |
|
"loss": 1.9393, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.1927437641723356, |
|
"grad_norm": 8.806387901306152, |
|
"learning_rate": 1.588901722868881e-05, |
|
"loss": 1.9602, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.20408163265306123, |
|
"grad_norm": 7.684463977813721, |
|
"learning_rate": 1.682366530096462e-05, |
|
"loss": 2.0044, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.21541950113378686, |
|
"grad_norm": 8.645553588867188, |
|
"learning_rate": 1.7758313373240435e-05, |
|
"loss": 2.0519, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.22675736961451248, |
|
"grad_norm": 9.162885665893555, |
|
"learning_rate": 1.8692961445516246e-05, |
|
"loss": 2.0516, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.23809523809523808, |
|
"grad_norm": 7.536770343780518, |
|
"learning_rate": 1.9627609517792057e-05, |
|
"loss": 2.0299, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.2494331065759637, |
|
"grad_norm": 8.47108268737793, |
|
"learning_rate": 2.0562257590067868e-05, |
|
"loss": 2.0816, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.26077097505668934, |
|
"grad_norm": 6.5353498458862305, |
|
"learning_rate": 2.1496905662343682e-05, |
|
"loss": 2.0053, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.272108843537415, |
|
"grad_norm": 7.390655994415283, |
|
"learning_rate": 2.2431553734619493e-05, |
|
"loss": 1.956, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.2834467120181406, |
|
"grad_norm": 11.590116500854492, |
|
"learning_rate": 2.3366201806895304e-05, |
|
"loss": 1.9745, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2947845804988662, |
|
"grad_norm": 7.277239799499512, |
|
"learning_rate": 2.430084987917112e-05, |
|
"loss": 2.0846, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.30612244897959184, |
|
"grad_norm": 7.210775375366211, |
|
"learning_rate": 2.523549795144693e-05, |
|
"loss": 1.9379, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.31746031746031744, |
|
"grad_norm": 9.101860046386719, |
|
"learning_rate": 2.617014602372274e-05, |
|
"loss": 1.9774, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3287981859410431, |
|
"grad_norm": 8.541576385498047, |
|
"learning_rate": 2.7104794095998556e-05, |
|
"loss": 1.9359, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.3401360544217687, |
|
"grad_norm": 9.097405433654785, |
|
"learning_rate": 2.8039442168274367e-05, |
|
"loss": 1.9943, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.35147392290249435, |
|
"grad_norm": 10.134627342224121, |
|
"learning_rate": 2.8551341727859275e-05, |
|
"loss": 1.9935, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.36281179138321995, |
|
"grad_norm": 9.275653839111328, |
|
"learning_rate": 2.842911851840782e-05, |
|
"loss": 1.9708, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.3741496598639456, |
|
"grad_norm": 10.57584285736084, |
|
"learning_rate": 2.830689530895637e-05, |
|
"loss": 1.8931, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.3854875283446712, |
|
"grad_norm": 8.968500137329102, |
|
"learning_rate": 2.818467209950492e-05, |
|
"loss": 2.0358, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.3968253968253968, |
|
"grad_norm": 7.630152702331543, |
|
"learning_rate": 2.806244889005346e-05, |
|
"loss": 1.9313, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.40816326530612246, |
|
"grad_norm": 10.709409713745117, |
|
"learning_rate": 2.794022568060201e-05, |
|
"loss": 1.8677, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.41950113378684806, |
|
"grad_norm": 8.903763771057129, |
|
"learning_rate": 2.781800247115056e-05, |
|
"loss": 1.9309, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.4308390022675737, |
|
"grad_norm": 11.22157096862793, |
|
"learning_rate": 2.769577926169911e-05, |
|
"loss": 1.9434, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.4421768707482993, |
|
"grad_norm": 13.553994178771973, |
|
"learning_rate": 2.7573556052247655e-05, |
|
"loss": 1.9612, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.45351473922902497, |
|
"grad_norm": 10.572993278503418, |
|
"learning_rate": 2.7451332842796204e-05, |
|
"loss": 1.9312, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.46485260770975056, |
|
"grad_norm": 10.051871299743652, |
|
"learning_rate": 2.7329109633344753e-05, |
|
"loss": 2.0256, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.47619047619047616, |
|
"grad_norm": 8.120019912719727, |
|
"learning_rate": 2.7206886423893295e-05, |
|
"loss": 1.8118, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.4875283446712018, |
|
"grad_norm": 8.908893585205078, |
|
"learning_rate": 2.7084663214441844e-05, |
|
"loss": 1.9479, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.4988662131519274, |
|
"grad_norm": 9.484359741210938, |
|
"learning_rate": 2.6962440004990394e-05, |
|
"loss": 1.9766, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.5102040816326531, |
|
"grad_norm": 9.914438247680664, |
|
"learning_rate": 2.684021679553894e-05, |
|
"loss": 1.9492, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.5215419501133787, |
|
"grad_norm": 10.76697063446045, |
|
"learning_rate": 2.671799358608749e-05, |
|
"loss": 1.9384, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.5328798185941043, |
|
"grad_norm": 11.358717918395996, |
|
"learning_rate": 2.6595770376636038e-05, |
|
"loss": 1.8805, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.54421768707483, |
|
"grad_norm": 10.204866409301758, |
|
"learning_rate": 2.6473547167184583e-05, |
|
"loss": 1.7569, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.5555555555555556, |
|
"grad_norm": 9.388846397399902, |
|
"learning_rate": 2.635132395773313e-05, |
|
"loss": 1.9267, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.5668934240362812, |
|
"grad_norm": 10.731882095336914, |
|
"learning_rate": 2.6229100748281678e-05, |
|
"loss": 1.9189, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5782312925170068, |
|
"grad_norm": 17.54758644104004, |
|
"learning_rate": 2.6106877538830227e-05, |
|
"loss": 1.9435, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.5895691609977324, |
|
"grad_norm": 16.29359245300293, |
|
"learning_rate": 2.5984654329378773e-05, |
|
"loss": 1.9415, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.6009070294784581, |
|
"grad_norm": 12.303582191467285, |
|
"learning_rate": 2.5862431119927322e-05, |
|
"loss": 1.9217, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.6122448979591837, |
|
"grad_norm": 11.611120223999023, |
|
"learning_rate": 2.574020791047587e-05, |
|
"loss": 1.9101, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.6235827664399093, |
|
"grad_norm": 8.830730438232422, |
|
"learning_rate": 2.5617984701024417e-05, |
|
"loss": 1.7771, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.6349206349206349, |
|
"grad_norm": 17.929471969604492, |
|
"learning_rate": 2.5495761491572966e-05, |
|
"loss": 1.9179, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.6462585034013606, |
|
"grad_norm": 10.709088325500488, |
|
"learning_rate": 2.5373538282121512e-05, |
|
"loss": 1.9077, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.6575963718820862, |
|
"grad_norm": 11.527586936950684, |
|
"learning_rate": 2.5251315072670058e-05, |
|
"loss": 1.8224, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.6689342403628118, |
|
"grad_norm": 10.246384620666504, |
|
"learning_rate": 2.5129091863218607e-05, |
|
"loss": 1.8023, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.6802721088435374, |
|
"grad_norm": 16.176572799682617, |
|
"learning_rate": 2.5006868653767156e-05, |
|
"loss": 1.9447, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.691609977324263, |
|
"grad_norm": 13.913769721984863, |
|
"learning_rate": 2.4884645444315702e-05, |
|
"loss": 1.8641, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.7029478458049887, |
|
"grad_norm": 12.541318893432617, |
|
"learning_rate": 2.476242223486425e-05, |
|
"loss": 1.9395, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 12.410688400268555, |
|
"learning_rate": 2.46401990254128e-05, |
|
"loss": 1.9303, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.7256235827664399, |
|
"grad_norm": 9.643453598022461, |
|
"learning_rate": 2.4517975815961346e-05, |
|
"loss": 1.9703, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.7369614512471655, |
|
"grad_norm": 19.51215934753418, |
|
"learning_rate": 2.439575260650989e-05, |
|
"loss": 1.8502, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.7482993197278912, |
|
"grad_norm": 16.120214462280273, |
|
"learning_rate": 2.427352939705844e-05, |
|
"loss": 1.8644, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.7596371882086168, |
|
"grad_norm": 9.631799697875977, |
|
"learning_rate": 2.415130618760699e-05, |
|
"loss": 1.7562, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.7709750566893424, |
|
"grad_norm": 11.26856803894043, |
|
"learning_rate": 2.4029082978155535e-05, |
|
"loss": 1.9283, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.782312925170068, |
|
"grad_norm": 14.097902297973633, |
|
"learning_rate": 2.3906859768704085e-05, |
|
"loss": 1.8127, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.7936507936507936, |
|
"grad_norm": 10.835921287536621, |
|
"learning_rate": 2.3784636559252634e-05, |
|
"loss": 1.6211, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.8049886621315193, |
|
"grad_norm": 13.751789093017578, |
|
"learning_rate": 2.3662413349801176e-05, |
|
"loss": 1.7555, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.8163265306122449, |
|
"grad_norm": 14.243096351623535, |
|
"learning_rate": 2.3540190140349725e-05, |
|
"loss": 1.6117, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.8276643990929705, |
|
"grad_norm": 15.838502883911133, |
|
"learning_rate": 2.3417966930898274e-05, |
|
"loss": 1.7722, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.8390022675736961, |
|
"grad_norm": 12.460963249206543, |
|
"learning_rate": 2.329574372144682e-05, |
|
"loss": 1.7567, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.8503401360544217, |
|
"grad_norm": 17.053138732910156, |
|
"learning_rate": 2.317352051199537e-05, |
|
"loss": 1.907, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.8616780045351474, |
|
"grad_norm": 12.155874252319336, |
|
"learning_rate": 2.305129730254392e-05, |
|
"loss": 1.9573, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.873015873015873, |
|
"grad_norm": 9.946329116821289, |
|
"learning_rate": 2.2929074093092468e-05, |
|
"loss": 1.7964, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.8843537414965986, |
|
"grad_norm": 13.480246543884277, |
|
"learning_rate": 2.2806850883641013e-05, |
|
"loss": 1.9251, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.8956916099773242, |
|
"grad_norm": 9.594596862792969, |
|
"learning_rate": 2.268462767418956e-05, |
|
"loss": 1.6834, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.9070294784580499, |
|
"grad_norm": 11.284895896911621, |
|
"learning_rate": 2.2562404464738108e-05, |
|
"loss": 1.7469, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.9183673469387755, |
|
"grad_norm": 13.537227630615234, |
|
"learning_rate": 2.2440181255286654e-05, |
|
"loss": 1.6632, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.9297052154195011, |
|
"grad_norm": 11.549741744995117, |
|
"learning_rate": 2.2317958045835203e-05, |
|
"loss": 1.788, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.9410430839002267, |
|
"grad_norm": 53.76149368286133, |
|
"learning_rate": 2.2195734836383752e-05, |
|
"loss": 1.8974, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.9523809523809523, |
|
"grad_norm": 9.953411102294922, |
|
"learning_rate": 2.2073511626932298e-05, |
|
"loss": 1.8805, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.963718820861678, |
|
"grad_norm": 18.290058135986328, |
|
"learning_rate": 2.1951288417480847e-05, |
|
"loss": 1.6644, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.9750566893424036, |
|
"grad_norm": 12.544032096862793, |
|
"learning_rate": 2.1829065208029393e-05, |
|
"loss": 1.6205, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.9863945578231292, |
|
"grad_norm": 20.213836669921875, |
|
"learning_rate": 2.170684199857794e-05, |
|
"loss": 1.8964, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.9977324263038548, |
|
"grad_norm": 17.492652893066406, |
|
"learning_rate": 2.1584618789126488e-05, |
|
"loss": 1.7601, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_classification_report": { |
|
"accuracy": 0.3479986768111148, |
|
"ar": { |
|
"f1-score": 0.36553524804177545, |
|
"precision": 0.358974358974359, |
|
"recall": 0.3723404255319149, |
|
"support": 376.0 |
|
}, |
|
"cl": { |
|
"f1-score": 0.2568250758341759, |
|
"precision": 0.3075060532687651, |
|
"recall": 0.2204861111111111, |
|
"support": 576.0 |
|
}, |
|
"co": { |
|
"f1-score": 0.25892857142857145, |
|
"precision": 0.5576923076923077, |
|
"recall": 0.1686046511627907, |
|
"support": 344.0 |
|
}, |
|
"es": { |
|
"f1-score": 0.41894353369763204, |
|
"precision": 0.4144144144144144, |
|
"recall": 0.42357274401473294, |
|
"support": 543.0 |
|
}, |
|
"macro avg": { |
|
"f1-score": 0.3142646730168648, |
|
"precision": 0.3640216920469342, |
|
"recall": 0.3175752127167582, |
|
"support": 3023.0 |
|
}, |
|
"mx": { |
|
"f1-score": 0.38181818181818183, |
|
"precision": 0.2786729857819905, |
|
"recall": 0.6061855670103092, |
|
"support": 485.0 |
|
}, |
|
"pe": { |
|
"f1-score": 0.3090909090909091, |
|
"precision": 0.3269230769230769, |
|
"recall": 0.29310344827586204, |
|
"support": 348.0 |
|
}, |
|
"pr": { |
|
"f1-score": 0.5769230769230769, |
|
"precision": 0.5607476635514018, |
|
"recall": 0.594059405940594, |
|
"support": 101.0 |
|
}, |
|
"uy": { |
|
"f1-score": 0.26031746031746034, |
|
"precision": 0.47126436781609193, |
|
"recall": 0.17982456140350878, |
|
"support": 228.0 |
|
}, |
|
"ve": { |
|
"f1-score": 0.0, |
|
"precision": 0.0, |
|
"recall": 0.0, |
|
"support": 22.0 |
|
}, |
|
"weighted avg": { |
|
"f1-score": 0.3348651440888197, |
|
"precision": 0.37776395808289953, |
|
"recall": 0.3479986768111148, |
|
"support": 3023.0 |
|
} |
|
}, |
|
"eval_f1": 0.3142646730168648, |
|
"eval_loss": 1.7349679470062256, |
|
"eval_runtime": 4.5054, |
|
"eval_samples_per_second": 670.979, |
|
"eval_steps_per_second": 83.9, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 1.0090702947845804, |
|
"grad_norm": 15.810832023620605, |
|
"learning_rate": 2.1462395579675037e-05, |
|
"loss": 1.6151, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.0204081632653061, |
|
"grad_norm": 13.969820022583008, |
|
"learning_rate": 2.1340172370223586e-05, |
|
"loss": 1.6903, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.0317460317460316, |
|
"grad_norm": 10.878263473510742, |
|
"learning_rate": 2.1217949160772132e-05, |
|
"loss": 1.3787, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.0430839002267573, |
|
"grad_norm": 13.959477424621582, |
|
"learning_rate": 2.109572595132068e-05, |
|
"loss": 1.6575, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.054421768707483, |
|
"grad_norm": 17.30800437927246, |
|
"learning_rate": 2.0973502741869227e-05, |
|
"loss": 1.5016, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.0657596371882085, |
|
"grad_norm": 14.826261520385742, |
|
"learning_rate": 2.0851279532417772e-05, |
|
"loss": 1.6254, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.0770975056689343, |
|
"grad_norm": 15.56223201751709, |
|
"learning_rate": 2.072905632296632e-05, |
|
"loss": 1.5939, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.08843537414966, |
|
"grad_norm": 26.888315200805664, |
|
"learning_rate": 2.060683311351487e-05, |
|
"loss": 1.521, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.0997732426303855, |
|
"grad_norm": 15.208369255065918, |
|
"learning_rate": 2.0484609904063416e-05, |
|
"loss": 1.601, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.1111111111111112, |
|
"grad_norm": 20.432218551635742, |
|
"learning_rate": 2.0362386694611965e-05, |
|
"loss": 1.5258, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.1224489795918366, |
|
"grad_norm": 16.719057083129883, |
|
"learning_rate": 2.0240163485160515e-05, |
|
"loss": 1.4809, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.1337868480725624, |
|
"grad_norm": 20.315963745117188, |
|
"learning_rate": 2.011794027570906e-05, |
|
"loss": 1.4214, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.145124716553288, |
|
"grad_norm": 17.45546531677246, |
|
"learning_rate": 1.9995717066257606e-05, |
|
"loss": 1.5853, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.1564625850340136, |
|
"grad_norm": 16.830751419067383, |
|
"learning_rate": 1.9873493856806155e-05, |
|
"loss": 1.3777, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.1678004535147393, |
|
"grad_norm": 19.829317092895508, |
|
"learning_rate": 1.9751270647354704e-05, |
|
"loss": 1.3531, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.179138321995465, |
|
"grad_norm": 23.709720611572266, |
|
"learning_rate": 1.962904743790325e-05, |
|
"loss": 1.485, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.1904761904761905, |
|
"grad_norm": 14.404341697692871, |
|
"learning_rate": 1.95068242284518e-05, |
|
"loss": 1.2469, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.2018140589569162, |
|
"grad_norm": 26.16022300720215, |
|
"learning_rate": 1.938460101900035e-05, |
|
"loss": 1.5754, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.2131519274376417, |
|
"grad_norm": 20.90950584411621, |
|
"learning_rate": 1.9262377809548894e-05, |
|
"loss": 1.7132, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.2244897959183674, |
|
"grad_norm": 20.9376220703125, |
|
"learning_rate": 1.914015460009744e-05, |
|
"loss": 1.5629, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.235827664399093, |
|
"grad_norm": 18.35310935974121, |
|
"learning_rate": 1.901793139064599e-05, |
|
"loss": 1.4152, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.2471655328798186, |
|
"grad_norm": 18.69922637939453, |
|
"learning_rate": 1.8895708181194535e-05, |
|
"loss": 1.3757, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.2585034013605443, |
|
"grad_norm": 15.565815925598145, |
|
"learning_rate": 1.8773484971743084e-05, |
|
"loss": 1.3995, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.2698412698412698, |
|
"grad_norm": 20.754438400268555, |
|
"learning_rate": 1.8651261762291633e-05, |
|
"loss": 1.5485, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.2811791383219955, |
|
"grad_norm": 31.195865631103516, |
|
"learning_rate": 1.852903855284018e-05, |
|
"loss": 1.4945, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.2925170068027212, |
|
"grad_norm": 22.87392234802246, |
|
"learning_rate": 1.8406815343388728e-05, |
|
"loss": 1.5909, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.3038548752834467, |
|
"grad_norm": 16.350553512573242, |
|
"learning_rate": 1.8284592133937274e-05, |
|
"loss": 1.3411, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.3151927437641724, |
|
"grad_norm": 18.95813751220703, |
|
"learning_rate": 1.8162368924485823e-05, |
|
"loss": 1.5416, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.3265306122448979, |
|
"grad_norm": 21.95383071899414, |
|
"learning_rate": 1.804014571503437e-05, |
|
"loss": 1.4762, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.3378684807256236, |
|
"grad_norm": 22.101272583007812, |
|
"learning_rate": 1.7917922505582918e-05, |
|
"loss": 1.4737, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.3492063492063493, |
|
"grad_norm": 21.4808406829834, |
|
"learning_rate": 1.7795699296131467e-05, |
|
"loss": 1.2548, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.3605442176870748, |
|
"grad_norm": 17.070913314819336, |
|
"learning_rate": 1.7673476086680013e-05, |
|
"loss": 1.438, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.3718820861678005, |
|
"grad_norm": 23.818998336791992, |
|
"learning_rate": 1.7551252877228562e-05, |
|
"loss": 1.4455, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.383219954648526, |
|
"grad_norm": 25.911645889282227, |
|
"learning_rate": 1.742902966777711e-05, |
|
"loss": 1.4403, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.3945578231292517, |
|
"grad_norm": 16.849903106689453, |
|
"learning_rate": 1.7306806458325653e-05, |
|
"loss": 1.3663, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.4058956916099774, |
|
"grad_norm": 16.710933685302734, |
|
"learning_rate": 1.7184583248874202e-05, |
|
"loss": 1.3915, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.417233560090703, |
|
"grad_norm": 22.40735626220703, |
|
"learning_rate": 1.706236003942275e-05, |
|
"loss": 1.472, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.4285714285714286, |
|
"grad_norm": 23.049968719482422, |
|
"learning_rate": 1.6940136829971297e-05, |
|
"loss": 1.3523, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.439909297052154, |
|
"grad_norm": 12.89521598815918, |
|
"learning_rate": 1.6817913620519846e-05, |
|
"loss": 1.5376, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.4512471655328798, |
|
"grad_norm": 19.498533248901367, |
|
"learning_rate": 1.6695690411068396e-05, |
|
"loss": 1.5389, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.4625850340136055, |
|
"grad_norm": 18.624237060546875, |
|
"learning_rate": 1.657346720161694e-05, |
|
"loss": 1.4731, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.473922902494331, |
|
"grad_norm": 21.690345764160156, |
|
"learning_rate": 1.6451243992165487e-05, |
|
"loss": 1.3718, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.4852607709750567, |
|
"grad_norm": 21.58686065673828, |
|
"learning_rate": 1.6329020782714036e-05, |
|
"loss": 1.6325, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.4965986394557822, |
|
"grad_norm": 17.337610244750977, |
|
"learning_rate": 1.6206797573262585e-05, |
|
"loss": 1.5196, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.507936507936508, |
|
"grad_norm": 22.74974822998047, |
|
"learning_rate": 1.608457436381113e-05, |
|
"loss": 1.6169, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.5192743764172336, |
|
"grad_norm": 24.127099990844727, |
|
"learning_rate": 1.596235115435968e-05, |
|
"loss": 1.3358, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.5306122448979593, |
|
"grad_norm": 18.630477905273438, |
|
"learning_rate": 1.584012794490823e-05, |
|
"loss": 1.2559, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.5419501133786848, |
|
"grad_norm": 22.353515625, |
|
"learning_rate": 1.5717904735456775e-05, |
|
"loss": 1.3693, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.5532879818594103, |
|
"grad_norm": 25.24136734008789, |
|
"learning_rate": 1.559568152600532e-05, |
|
"loss": 1.4876, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.564625850340136, |
|
"grad_norm": 13.99299144744873, |
|
"learning_rate": 1.547345831655387e-05, |
|
"loss": 1.3684, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.5759637188208617, |
|
"grad_norm": 17.318729400634766, |
|
"learning_rate": 1.5351235107102416e-05, |
|
"loss": 1.2686, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.5873015873015874, |
|
"grad_norm": 16.49215316772461, |
|
"learning_rate": 1.5229011897650965e-05, |
|
"loss": 1.3287, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.598639455782313, |
|
"grad_norm": 17.993852615356445, |
|
"learning_rate": 1.5106788688199514e-05, |
|
"loss": 1.3661, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.6099773242630384, |
|
"grad_norm": 17.669912338256836, |
|
"learning_rate": 1.4984565478748061e-05, |
|
"loss": 1.4371, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.6213151927437641, |
|
"grad_norm": 32.347835540771484, |
|
"learning_rate": 1.4862342269296609e-05, |
|
"loss": 1.5957, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.6326530612244898, |
|
"grad_norm": 15.98144245147705, |
|
"learning_rate": 1.4740119059845155e-05, |
|
"loss": 1.3895, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.6439909297052155, |
|
"grad_norm": 15.446006774902344, |
|
"learning_rate": 1.4617895850393702e-05, |
|
"loss": 1.4985, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.655328798185941, |
|
"grad_norm": 20.650182723999023, |
|
"learning_rate": 1.4495672640942251e-05, |
|
"loss": 1.3036, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 18.82501220703125, |
|
"learning_rate": 1.4373449431490799e-05, |
|
"loss": 1.3777, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.6780045351473922, |
|
"grad_norm": 24.08733558654785, |
|
"learning_rate": 1.4251226222039346e-05, |
|
"loss": 1.4847, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.689342403628118, |
|
"grad_norm": 15.655111312866211, |
|
"learning_rate": 1.4129003012587895e-05, |
|
"loss": 1.6208, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.7006802721088436, |
|
"grad_norm": 14.29283618927002, |
|
"learning_rate": 1.4006779803136441e-05, |
|
"loss": 1.4783, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.7120181405895691, |
|
"grad_norm": 27.245153427124023, |
|
"learning_rate": 1.388455659368499e-05, |
|
"loss": 1.3621, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.7233560090702946, |
|
"grad_norm": 17.18270492553711, |
|
"learning_rate": 1.3762333384233537e-05, |
|
"loss": 1.4615, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.7346938775510203, |
|
"grad_norm": 30.546113967895508, |
|
"learning_rate": 1.3640110174782085e-05, |
|
"loss": 1.4101, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.746031746031746, |
|
"grad_norm": 27.630264282226562, |
|
"learning_rate": 1.3517886965330632e-05, |
|
"loss": 1.5457, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.7573696145124718, |
|
"grad_norm": 24.351619720458984, |
|
"learning_rate": 1.339566375587918e-05, |
|
"loss": 1.4364, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.7687074829931972, |
|
"grad_norm": 22.478717803955078, |
|
"learning_rate": 1.3273440546427727e-05, |
|
"loss": 1.5078, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.780045351473923, |
|
"grad_norm": 33.06885528564453, |
|
"learning_rate": 1.3151217336976275e-05, |
|
"loss": 1.3344, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.7913832199546484, |
|
"grad_norm": 25.309829711914062, |
|
"learning_rate": 1.3028994127524822e-05, |
|
"loss": 1.4234, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.8027210884353742, |
|
"grad_norm": 26.68161392211914, |
|
"learning_rate": 1.2906770918073371e-05, |
|
"loss": 1.326, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.8140589569160999, |
|
"grad_norm": 24.11896324157715, |
|
"learning_rate": 1.2784547708621919e-05, |
|
"loss": 1.5031, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.8253968253968254, |
|
"grad_norm": 19.29245948791504, |
|
"learning_rate": 1.2662324499170464e-05, |
|
"loss": 1.3132, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.836734693877551, |
|
"grad_norm": 18.402624130249023, |
|
"learning_rate": 1.2540101289719014e-05, |
|
"loss": 1.359, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.8480725623582765, |
|
"grad_norm": 22.64293670654297, |
|
"learning_rate": 1.2417878080267561e-05, |
|
"loss": 1.6493, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.8594104308390023, |
|
"grad_norm": 20.680465698242188, |
|
"learning_rate": 1.2295654870816108e-05, |
|
"loss": 1.3705, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.870748299319728, |
|
"grad_norm": 21.203262329101562, |
|
"learning_rate": 1.2173431661364656e-05, |
|
"loss": 1.4593, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.8820861678004537, |
|
"grad_norm": 19.054349899291992, |
|
"learning_rate": 1.2051208451913203e-05, |
|
"loss": 1.316, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.8934240362811792, |
|
"grad_norm": 18.196651458740234, |
|
"learning_rate": 1.1928985242461752e-05, |
|
"loss": 1.4121, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.9047619047619047, |
|
"grad_norm": 14.884149551391602, |
|
"learning_rate": 1.1806762033010298e-05, |
|
"loss": 1.2392, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.9160997732426304, |
|
"grad_norm": 21.171005249023438, |
|
"learning_rate": 1.1684538823558846e-05, |
|
"loss": 1.3379, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.927437641723356, |
|
"grad_norm": 18.805988311767578, |
|
"learning_rate": 1.1562315614107395e-05, |
|
"loss": 1.3454, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.9387755102040818, |
|
"grad_norm": 19.397449493408203, |
|
"learning_rate": 1.1440092404655942e-05, |
|
"loss": 1.4548, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.9501133786848073, |
|
"grad_norm": 20.647939682006836, |
|
"learning_rate": 1.131786919520449e-05, |
|
"loss": 1.4206, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.9614512471655328, |
|
"grad_norm": 22.62149429321289, |
|
"learning_rate": 1.1195645985753037e-05, |
|
"loss": 1.3777, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.9727891156462585, |
|
"grad_norm": 27.34062385559082, |
|
"learning_rate": 1.1073422776301585e-05, |
|
"loss": 1.3447, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.9841269841269842, |
|
"grad_norm": 29.27070426940918, |
|
"learning_rate": 1.0951199566850134e-05, |
|
"loss": 1.3864, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.99546485260771, |
|
"grad_norm": 16.5633487701416, |
|
"learning_rate": 1.082897635739868e-05, |
|
"loss": 1.2893, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_classification_report": { |
|
"accuracy": 0.3989414488918293, |
|
"ar": { |
|
"f1-score": 0.3793103448275862, |
|
"precision": 0.4618320610687023, |
|
"recall": 0.32180851063829785, |
|
"support": 376.0 |
|
}, |
|
"cl": { |
|
"f1-score": 0.37325038880248834, |
|
"precision": 0.3380281690140845, |
|
"recall": 0.4166666666666667, |
|
"support": 576.0 |
|
}, |
|
"co": { |
|
"f1-score": 0.3398230088495575, |
|
"precision": 0.4343891402714932, |
|
"recall": 0.27906976744186046, |
|
"support": 344.0 |
|
}, |
|
"es": { |
|
"f1-score": 0.4501323918799647, |
|
"precision": 0.4322033898305085, |
|
"recall": 0.4696132596685083, |
|
"support": 543.0 |
|
}, |
|
"macro avg": { |
|
"f1-score": 0.3755741665997943, |
|
"precision": 0.4098704182426044, |
|
"recall": 0.3639331480997811, |
|
"support": 3023.0 |
|
}, |
|
"mx": { |
|
"f1-score": 0.4408888888888889, |
|
"precision": 0.3875, |
|
"recall": 0.511340206185567, |
|
"support": 485.0 |
|
}, |
|
"pe": { |
|
"f1-score": 0.35097493036211697, |
|
"precision": 0.34054054054054056, |
|
"recall": 0.3620689655172414, |
|
"support": 348.0 |
|
}, |
|
"pr": { |
|
"f1-score": 0.6666666666666666, |
|
"precision": 0.7159090909090909, |
|
"recall": 0.6237623762376238, |
|
"support": 101.0 |
|
}, |
|
"uy": { |
|
"f1-score": 0.3076923076923077, |
|
"precision": 0.4117647058823529, |
|
"recall": 0.24561403508771928, |
|
"support": 228.0 |
|
}, |
|
"ve": { |
|
"f1-score": 0.07142857142857142, |
|
"precision": 0.16666666666666666, |
|
"recall": 0.045454545454545456, |
|
"support": 22.0 |
|
}, |
|
"weighted avg": { |
|
"f1-score": 0.3949596137532948, |
|
"precision": 0.40647397656072576, |
|
"recall": 0.3989414488918293, |
|
"support": 3023.0 |
|
} |
|
}, |
|
"eval_f1": 0.3755741665997943, |
|
"eval_loss": 1.6900478601455688, |
|
"eval_runtime": 4.3866, |
|
"eval_samples_per_second": 689.137, |
|
"eval_steps_per_second": 86.171, |
|
"step": 1764 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2646, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 463959726481152.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|