{ "best_metric": 0.3142646730168648, "best_model_checkpoint": "/content/drive/MyDrive/model_outputs/trial_9/checkpoint-882", "epoch": 1.0, "eval_steps": 500, "global_step": 882, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.011337868480725623, "grad_norm": 9.890141487121582, "learning_rate": 9.346480722758123e-07, "loss": 2.2337, "step": 10 }, { "epoch": 0.022675736961451247, "grad_norm": 10.97945785522461, "learning_rate": 1.8692961445516245e-06, "loss": 2.2359, "step": 20 }, { "epoch": 0.034013605442176874, "grad_norm": 10.244830131530762, "learning_rate": 2.8039442168274367e-06, "loss": 2.2034, "step": 30 }, { "epoch": 0.045351473922902494, "grad_norm": 11.170487403869629, "learning_rate": 3.738592289103249e-06, "loss": 2.2238, "step": 40 }, { "epoch": 0.05668934240362812, "grad_norm": 8.994032859802246, "learning_rate": 4.673240361379061e-06, "loss": 2.143, "step": 50 }, { "epoch": 0.06802721088435375, "grad_norm": 11.781458854675293, "learning_rate": 5.607888433654873e-06, "loss": 2.1299, "step": 60 }, { "epoch": 0.07936507936507936, "grad_norm": 10.54732894897461, "learning_rate": 6.542536505930685e-06, "loss": 2.102, "step": 70 }, { "epoch": 0.09070294784580499, "grad_norm": 8.15542221069336, "learning_rate": 7.477184578206498e-06, "loss": 2.0436, "step": 80 }, { "epoch": 0.10204081632653061, "grad_norm": 12.086600303649902, "learning_rate": 8.41183265048231e-06, "loss": 2.0417, "step": 90 }, { "epoch": 0.11337868480725624, "grad_norm": 10.285418510437012, "learning_rate": 9.346480722758123e-06, "loss": 2.0767, "step": 100 }, { "epoch": 0.12471655328798185, "grad_norm": 10.342191696166992, "learning_rate": 1.0281128795033934e-05, "loss": 2.0401, "step": 110 }, { "epoch": 0.1360544217687075, "grad_norm": 9.20302677154541, "learning_rate": 1.1215776867309747e-05, "loss": 1.99, "step": 120 }, { "epoch": 0.1473922902494331, "grad_norm": 9.420401573181152, "learning_rate": 1.215042493958556e-05, "loss": 1.9867, "step": 130 }, { "epoch": 0.15873015873015872, "grad_norm": 7.75075101852417, "learning_rate": 1.308507301186137e-05, "loss": 1.9988, "step": 140 }, { "epoch": 0.17006802721088435, "grad_norm": 8.807808876037598, "learning_rate": 1.4019721084137183e-05, "loss": 1.9886, "step": 150 }, { "epoch": 0.18140589569160998, "grad_norm": 10.291740417480469, "learning_rate": 1.4954369156412996e-05, "loss": 1.9393, "step": 160 }, { "epoch": 0.1927437641723356, "grad_norm": 8.806387901306152, "learning_rate": 1.588901722868881e-05, "loss": 1.9602, "step": 170 }, { "epoch": 0.20408163265306123, "grad_norm": 7.684463977813721, "learning_rate": 1.682366530096462e-05, "loss": 2.0044, "step": 180 }, { "epoch": 0.21541950113378686, "grad_norm": 8.645553588867188, "learning_rate": 1.7758313373240435e-05, "loss": 2.0519, "step": 190 }, { "epoch": 0.22675736961451248, "grad_norm": 9.162885665893555, "learning_rate": 1.8692961445516246e-05, "loss": 2.0516, "step": 200 }, { "epoch": 0.23809523809523808, "grad_norm": 7.536770343780518, "learning_rate": 1.9627609517792057e-05, "loss": 2.0299, "step": 210 }, { "epoch": 0.2494331065759637, "grad_norm": 8.47108268737793, "learning_rate": 2.0562257590067868e-05, "loss": 2.0816, "step": 220 }, { "epoch": 0.26077097505668934, "grad_norm": 6.5353498458862305, "learning_rate": 2.1496905662343682e-05, "loss": 2.0053, "step": 230 }, { "epoch": 0.272108843537415, "grad_norm": 7.390655994415283, "learning_rate": 2.2431553734619493e-05, "loss": 1.956, "step": 240 }, { "epoch": 0.2834467120181406, "grad_norm": 11.590116500854492, "learning_rate": 2.3366201806895304e-05, "loss": 1.9745, "step": 250 }, { "epoch": 0.2947845804988662, "grad_norm": 7.277239799499512, "learning_rate": 2.430084987917112e-05, "loss": 2.0846, "step": 260 }, { "epoch": 0.30612244897959184, "grad_norm": 7.210775375366211, "learning_rate": 2.523549795144693e-05, "loss": 1.9379, "step": 270 }, { "epoch": 0.31746031746031744, "grad_norm": 9.101860046386719, "learning_rate": 2.617014602372274e-05, "loss": 1.9774, "step": 280 }, { "epoch": 0.3287981859410431, "grad_norm": 8.541576385498047, "learning_rate": 2.7104794095998556e-05, "loss": 1.9359, "step": 290 }, { "epoch": 0.3401360544217687, "grad_norm": 9.097405433654785, "learning_rate": 2.8039442168274367e-05, "loss": 1.9943, "step": 300 }, { "epoch": 0.35147392290249435, "grad_norm": 10.134627342224121, "learning_rate": 2.8551341727859275e-05, "loss": 1.9935, "step": 310 }, { "epoch": 0.36281179138321995, "grad_norm": 9.275653839111328, "learning_rate": 2.842911851840782e-05, "loss": 1.9708, "step": 320 }, { "epoch": 0.3741496598639456, "grad_norm": 10.57584285736084, "learning_rate": 2.830689530895637e-05, "loss": 1.8931, "step": 330 }, { "epoch": 0.3854875283446712, "grad_norm": 8.968500137329102, "learning_rate": 2.818467209950492e-05, "loss": 2.0358, "step": 340 }, { "epoch": 0.3968253968253968, "grad_norm": 7.630152702331543, "learning_rate": 2.806244889005346e-05, "loss": 1.9313, "step": 350 }, { "epoch": 0.40816326530612246, "grad_norm": 10.709409713745117, "learning_rate": 2.794022568060201e-05, "loss": 1.8677, "step": 360 }, { "epoch": 0.41950113378684806, "grad_norm": 8.903763771057129, "learning_rate": 2.781800247115056e-05, "loss": 1.9309, "step": 370 }, { "epoch": 0.4308390022675737, "grad_norm": 11.22157096862793, "learning_rate": 2.769577926169911e-05, "loss": 1.9434, "step": 380 }, { "epoch": 0.4421768707482993, "grad_norm": 13.553994178771973, "learning_rate": 2.7573556052247655e-05, "loss": 1.9612, "step": 390 }, { "epoch": 0.45351473922902497, "grad_norm": 10.572993278503418, "learning_rate": 2.7451332842796204e-05, "loss": 1.9312, "step": 400 }, { "epoch": 0.46485260770975056, "grad_norm": 10.051871299743652, "learning_rate": 2.7329109633344753e-05, "loss": 2.0256, "step": 410 }, { "epoch": 0.47619047619047616, "grad_norm": 8.120019912719727, "learning_rate": 2.7206886423893295e-05, "loss": 1.8118, "step": 420 }, { "epoch": 0.4875283446712018, "grad_norm": 8.908893585205078, "learning_rate": 2.7084663214441844e-05, "loss": 1.9479, "step": 430 }, { "epoch": 0.4988662131519274, "grad_norm": 9.484359741210938, "learning_rate": 2.6962440004990394e-05, "loss": 1.9766, "step": 440 }, { "epoch": 0.5102040816326531, "grad_norm": 9.914438247680664, "learning_rate": 2.684021679553894e-05, "loss": 1.9492, "step": 450 }, { "epoch": 0.5215419501133787, "grad_norm": 10.76697063446045, "learning_rate": 2.671799358608749e-05, "loss": 1.9384, "step": 460 }, { "epoch": 0.5328798185941043, "grad_norm": 11.358717918395996, "learning_rate": 2.6595770376636038e-05, "loss": 1.8805, "step": 470 }, { "epoch": 0.54421768707483, "grad_norm": 10.204866409301758, "learning_rate": 2.6473547167184583e-05, "loss": 1.7569, "step": 480 }, { "epoch": 0.5555555555555556, "grad_norm": 9.388846397399902, "learning_rate": 2.635132395773313e-05, "loss": 1.9267, "step": 490 }, { "epoch": 0.5668934240362812, "grad_norm": 10.731882095336914, "learning_rate": 2.6229100748281678e-05, "loss": 1.9189, "step": 500 }, { "epoch": 0.5782312925170068, "grad_norm": 17.54758644104004, "learning_rate": 2.6106877538830227e-05, "loss": 1.9435, "step": 510 }, { "epoch": 0.5895691609977324, "grad_norm": 16.29359245300293, "learning_rate": 2.5984654329378773e-05, "loss": 1.9415, "step": 520 }, { "epoch": 0.6009070294784581, "grad_norm": 12.303582191467285, "learning_rate": 2.5862431119927322e-05, "loss": 1.9217, "step": 530 }, { "epoch": 0.6122448979591837, "grad_norm": 11.611120223999023, "learning_rate": 2.574020791047587e-05, "loss": 1.9101, "step": 540 }, { "epoch": 0.6235827664399093, "grad_norm": 8.830730438232422, "learning_rate": 2.5617984701024417e-05, "loss": 1.7771, "step": 550 }, { "epoch": 0.6349206349206349, "grad_norm": 17.929471969604492, "learning_rate": 2.5495761491572966e-05, "loss": 1.9179, "step": 560 }, { "epoch": 0.6462585034013606, "grad_norm": 10.709088325500488, "learning_rate": 2.5373538282121512e-05, "loss": 1.9077, "step": 570 }, { "epoch": 0.6575963718820862, "grad_norm": 11.527586936950684, "learning_rate": 2.5251315072670058e-05, "loss": 1.8224, "step": 580 }, { "epoch": 0.6689342403628118, "grad_norm": 10.246384620666504, "learning_rate": 2.5129091863218607e-05, "loss": 1.8023, "step": 590 }, { "epoch": 0.6802721088435374, "grad_norm": 16.176572799682617, "learning_rate": 2.5006868653767156e-05, "loss": 1.9447, "step": 600 }, { "epoch": 0.691609977324263, "grad_norm": 13.913769721984863, "learning_rate": 2.4884645444315702e-05, "loss": 1.8641, "step": 610 }, { "epoch": 0.7029478458049887, "grad_norm": 12.541318893432617, "learning_rate": 2.476242223486425e-05, "loss": 1.9395, "step": 620 }, { "epoch": 0.7142857142857143, "grad_norm": 12.410688400268555, "learning_rate": 2.46401990254128e-05, "loss": 1.9303, "step": 630 }, { "epoch": 0.7256235827664399, "grad_norm": 9.643453598022461, "learning_rate": 2.4517975815961346e-05, "loss": 1.9703, "step": 640 }, { "epoch": 0.7369614512471655, "grad_norm": 19.51215934753418, "learning_rate": 2.439575260650989e-05, "loss": 1.8502, "step": 650 }, { "epoch": 0.7482993197278912, "grad_norm": 16.120214462280273, "learning_rate": 2.427352939705844e-05, "loss": 1.8644, "step": 660 }, { "epoch": 0.7596371882086168, "grad_norm": 9.631799697875977, "learning_rate": 2.415130618760699e-05, "loss": 1.7562, "step": 670 }, { "epoch": 0.7709750566893424, "grad_norm": 11.26856803894043, "learning_rate": 2.4029082978155535e-05, "loss": 1.9283, "step": 680 }, { "epoch": 0.782312925170068, "grad_norm": 14.097902297973633, "learning_rate": 2.3906859768704085e-05, "loss": 1.8127, "step": 690 }, { "epoch": 0.7936507936507936, "grad_norm": 10.835921287536621, "learning_rate": 2.3784636559252634e-05, "loss": 1.6211, "step": 700 }, { "epoch": 0.8049886621315193, "grad_norm": 13.751789093017578, "learning_rate": 2.3662413349801176e-05, "loss": 1.7555, "step": 710 }, { "epoch": 0.8163265306122449, "grad_norm": 14.243096351623535, "learning_rate": 2.3540190140349725e-05, "loss": 1.6117, "step": 720 }, { "epoch": 0.8276643990929705, "grad_norm": 15.838502883911133, "learning_rate": 2.3417966930898274e-05, "loss": 1.7722, "step": 730 }, { "epoch": 0.8390022675736961, "grad_norm": 12.460963249206543, "learning_rate": 2.329574372144682e-05, "loss": 1.7567, "step": 740 }, { "epoch": 0.8503401360544217, "grad_norm": 17.053138732910156, "learning_rate": 2.317352051199537e-05, "loss": 1.907, "step": 750 }, { "epoch": 0.8616780045351474, "grad_norm": 12.155874252319336, "learning_rate": 2.305129730254392e-05, "loss": 1.9573, "step": 760 }, { "epoch": 0.873015873015873, "grad_norm": 9.946329116821289, "learning_rate": 2.2929074093092468e-05, "loss": 1.7964, "step": 770 }, { "epoch": 0.8843537414965986, "grad_norm": 13.480246543884277, "learning_rate": 2.2806850883641013e-05, "loss": 1.9251, "step": 780 }, { "epoch": 0.8956916099773242, "grad_norm": 9.594596862792969, "learning_rate": 2.268462767418956e-05, "loss": 1.6834, "step": 790 }, { "epoch": 0.9070294784580499, "grad_norm": 11.284895896911621, "learning_rate": 2.2562404464738108e-05, "loss": 1.7469, "step": 800 }, { "epoch": 0.9183673469387755, "grad_norm": 13.537227630615234, "learning_rate": 2.2440181255286654e-05, "loss": 1.6632, "step": 810 }, { "epoch": 0.9297052154195011, "grad_norm": 11.549741744995117, "learning_rate": 2.2317958045835203e-05, "loss": 1.788, "step": 820 }, { "epoch": 0.9410430839002267, "grad_norm": 53.76149368286133, "learning_rate": 2.2195734836383752e-05, "loss": 1.8974, "step": 830 }, { "epoch": 0.9523809523809523, "grad_norm": 9.953411102294922, "learning_rate": 2.2073511626932298e-05, "loss": 1.8805, "step": 840 }, { "epoch": 0.963718820861678, "grad_norm": 18.290058135986328, "learning_rate": 2.1951288417480847e-05, "loss": 1.6644, "step": 850 }, { "epoch": 0.9750566893424036, "grad_norm": 12.544032096862793, "learning_rate": 2.1829065208029393e-05, "loss": 1.6205, "step": 860 }, { "epoch": 0.9863945578231292, "grad_norm": 20.213836669921875, "learning_rate": 2.170684199857794e-05, "loss": 1.8964, "step": 870 }, { "epoch": 0.9977324263038548, "grad_norm": 17.492652893066406, "learning_rate": 2.1584618789126488e-05, "loss": 1.7601, "step": 880 }, { "epoch": 1.0, "eval_classification_report": { "accuracy": 0.3479986768111148, "ar": { "f1-score": 0.36553524804177545, "precision": 0.358974358974359, "recall": 0.3723404255319149, "support": 376.0 }, "cl": { "f1-score": 0.2568250758341759, "precision": 0.3075060532687651, "recall": 0.2204861111111111, "support": 576.0 }, "co": { "f1-score": 0.25892857142857145, "precision": 0.5576923076923077, "recall": 0.1686046511627907, "support": 344.0 }, "es": { "f1-score": 0.41894353369763204, "precision": 0.4144144144144144, "recall": 0.42357274401473294, "support": 543.0 }, "macro avg": { "f1-score": 0.3142646730168648, "precision": 0.3640216920469342, "recall": 0.3175752127167582, "support": 3023.0 }, "mx": { "f1-score": 0.38181818181818183, "precision": 0.2786729857819905, "recall": 0.6061855670103092, "support": 485.0 }, "pe": { "f1-score": 0.3090909090909091, "precision": 0.3269230769230769, "recall": 0.29310344827586204, "support": 348.0 }, "pr": { "f1-score": 0.5769230769230769, "precision": 0.5607476635514018, "recall": 0.594059405940594, "support": 101.0 }, "uy": { "f1-score": 0.26031746031746034, "precision": 0.47126436781609193, "recall": 0.17982456140350878, "support": 228.0 }, "ve": { "f1-score": 0.0, "precision": 0.0, "recall": 0.0, "support": 22.0 }, "weighted avg": { "f1-score": 0.3348651440888197, "precision": 0.37776395808289953, "recall": 0.3479986768111148, "support": 3023.0 } }, "eval_f1": 0.3142646730168648, "eval_loss": 1.7349679470062256, "eval_runtime": 4.5054, "eval_samples_per_second": 670.979, "eval_steps_per_second": 83.9, "step": 882 } ], "logging_steps": 10, "max_steps": 2646, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 231979863240576.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }