{ "best_metric": 0.3353596555098448, "best_model_checkpoint": "/content/drive/MyDrive/model_outputs/trial_2/checkpoint-1168", "epoch": 2.0, "eval_steps": 500, "global_step": 1168, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.017123287671232876, "grad_norm": 11.892024993896484, "learning_rate": 7.592988034631688e-07, "loss": 2.215, "step": 10 }, { "epoch": 0.03424657534246575, "grad_norm": 11.759550094604492, "learning_rate": 1.5185976069263376e-06, "loss": 2.2173, "step": 20 }, { "epoch": 0.05136986301369863, "grad_norm": 11.668200492858887, "learning_rate": 2.2778964103895065e-06, "loss": 2.2151, "step": 30 }, { "epoch": 0.0684931506849315, "grad_norm": 10.476677894592285, "learning_rate": 3.0371952138526753e-06, "loss": 2.1641, "step": 40 }, { "epoch": 0.08561643835616438, "grad_norm": 9.34625244140625, "learning_rate": 3.7964940173158444e-06, "loss": 2.1653, "step": 50 }, { "epoch": 0.10273972602739725, "grad_norm": 9.768696784973145, "learning_rate": 4.555792820779013e-06, "loss": 2.1354, "step": 60 }, { "epoch": 0.11986301369863013, "grad_norm": 8.377198219299316, "learning_rate": 5.315091624242182e-06, "loss": 2.1244, "step": 70 }, { "epoch": 0.136986301369863, "grad_norm": 11.591525077819824, "learning_rate": 6.0743904277053505e-06, "loss": 2.0764, "step": 80 }, { "epoch": 0.1541095890410959, "grad_norm": 9.914732933044434, "learning_rate": 6.83368923116852e-06, "loss": 2.1816, "step": 90 }, { "epoch": 0.17123287671232876, "grad_norm": 11.065092086791992, "learning_rate": 7.592988034631689e-06, "loss": 2.0771, "step": 100 }, { "epoch": 0.18835616438356165, "grad_norm": 11.352612495422363, "learning_rate": 8.352286838094857e-06, "loss": 2.0857, "step": 110 }, { "epoch": 0.2054794520547945, "grad_norm": 11.720881462097168, "learning_rate": 9.111585641558026e-06, "loss": 2.0766, "step": 120 }, { "epoch": 0.2226027397260274, "grad_norm": 10.508756637573242, "learning_rate": 9.870884445021195e-06, "loss": 2.1546, "step": 130 }, { "epoch": 0.23972602739726026, "grad_norm": 11.728009223937988, "learning_rate": 1.0630183248484364e-05, "loss": 2.0462, "step": 140 }, { "epoch": 0.2568493150684932, "grad_norm": 9.81351089477539, "learning_rate": 1.1389482051947532e-05, "loss": 2.0667, "step": 150 }, { "epoch": 0.273972602739726, "grad_norm": 8.76012134552002, "learning_rate": 1.2148780855410701e-05, "loss": 2.0924, "step": 160 }, { "epoch": 0.2910958904109589, "grad_norm": 9.990751266479492, "learning_rate": 1.290807965887387e-05, "loss": 2.0672, "step": 170 }, { "epoch": 0.3082191780821918, "grad_norm": 10.576468467712402, "learning_rate": 1.366737846233704e-05, "loss": 2.0109, "step": 180 }, { "epoch": 0.3253424657534247, "grad_norm": 9.048011779785156, "learning_rate": 1.4426677265800208e-05, "loss": 2.0392, "step": 190 }, { "epoch": 0.3424657534246575, "grad_norm": 11.787674903869629, "learning_rate": 1.5185976069263378e-05, "loss": 2.034, "step": 200 }, { "epoch": 0.3595890410958904, "grad_norm": 10.221675872802734, "learning_rate": 1.5945274872726547e-05, "loss": 1.9762, "step": 210 }, { "epoch": 0.3767123287671233, "grad_norm": 9.597102165222168, "learning_rate": 1.6704573676189714e-05, "loss": 2.0765, "step": 220 }, { "epoch": 0.3938356164383562, "grad_norm": 11.017401695251465, "learning_rate": 1.7463872479652885e-05, "loss": 2.0997, "step": 230 }, { "epoch": 0.410958904109589, "grad_norm": 10.641432762145996, "learning_rate": 1.8223171283116052e-05, "loss": 2.0985, "step": 240 }, { "epoch": 0.4280821917808219, "grad_norm": 8.237184524536133, "learning_rate": 1.8982470086579223e-05, "loss": 2.056, "step": 250 }, { "epoch": 0.4452054794520548, "grad_norm": 8.095678329467773, "learning_rate": 1.974176889004239e-05, "loss": 2.0807, "step": 260 }, { "epoch": 0.4623287671232877, "grad_norm": 9.02453899383545, "learning_rate": 2.050106769350556e-05, "loss": 2.102, "step": 270 }, { "epoch": 0.4794520547945205, "grad_norm": 9.920637130737305, "learning_rate": 2.126036649696873e-05, "loss": 1.9904, "step": 280 }, { "epoch": 0.4965753424657534, "grad_norm": 10.004680633544922, "learning_rate": 2.2019665300431896e-05, "loss": 2.0086, "step": 290 }, { "epoch": 0.5136986301369864, "grad_norm": 8.83816909790039, "learning_rate": 2.2140715952718072e-05, "loss": 1.9748, "step": 300 }, { "epoch": 0.5308219178082192, "grad_norm": 8.490972518920898, "learning_rate": 2.1988231683071254e-05, "loss": 2.0026, "step": 310 }, { "epoch": 0.547945205479452, "grad_norm": 9.475632667541504, "learning_rate": 2.1835747413424435e-05, "loss": 2.0512, "step": 320 }, { "epoch": 0.565068493150685, "grad_norm": 9.864670753479004, "learning_rate": 2.1683263143777617e-05, "loss": 2.0304, "step": 330 }, { "epoch": 0.5821917808219178, "grad_norm": 10.569999694824219, "learning_rate": 2.15307788741308e-05, "loss": 1.9682, "step": 340 }, { "epoch": 0.5993150684931506, "grad_norm": 9.928328514099121, "learning_rate": 2.137829460448398e-05, "loss": 2.0027, "step": 350 }, { "epoch": 0.6164383561643836, "grad_norm": 8.040825843811035, "learning_rate": 2.1225810334837162e-05, "loss": 1.9177, "step": 360 }, { "epoch": 0.6335616438356164, "grad_norm": 10.714200019836426, "learning_rate": 2.1073326065190344e-05, "loss": 2.0449, "step": 370 }, { "epoch": 0.6506849315068494, "grad_norm": 8.81334114074707, "learning_rate": 2.0920841795543526e-05, "loss": 2.0512, "step": 380 }, { "epoch": 0.6678082191780822, "grad_norm": 10.270297050476074, "learning_rate": 2.0768357525896707e-05, "loss": 1.9713, "step": 390 }, { "epoch": 0.684931506849315, "grad_norm": 10.366915702819824, "learning_rate": 2.061587325624989e-05, "loss": 1.9791, "step": 400 }, { "epoch": 0.702054794520548, "grad_norm": 10.001801490783691, "learning_rate": 2.0463388986603067e-05, "loss": 1.9183, "step": 410 }, { "epoch": 0.7191780821917808, "grad_norm": 11.41006088256836, "learning_rate": 2.031090471695625e-05, "loss": 1.9395, "step": 420 }, { "epoch": 0.7363013698630136, "grad_norm": 9.574875831604004, "learning_rate": 2.015842044730943e-05, "loss": 1.9356, "step": 430 }, { "epoch": 0.7534246575342466, "grad_norm": 11.301505088806152, "learning_rate": 2.0005936177662612e-05, "loss": 2.059, "step": 440 }, { "epoch": 0.7705479452054794, "grad_norm": 11.491235733032227, "learning_rate": 1.9853451908015794e-05, "loss": 2.0022, "step": 450 }, { "epoch": 0.7876712328767124, "grad_norm": 9.770740509033203, "learning_rate": 1.9700967638368976e-05, "loss": 1.9812, "step": 460 }, { "epoch": 0.8047945205479452, "grad_norm": 9.801970481872559, "learning_rate": 1.9548483368722154e-05, "loss": 2.0287, "step": 470 }, { "epoch": 0.821917808219178, "grad_norm": 10.885910034179688, "learning_rate": 1.9395999099075336e-05, "loss": 2.0574, "step": 480 }, { "epoch": 0.839041095890411, "grad_norm": 10.215595245361328, "learning_rate": 1.9243514829428517e-05, "loss": 2.0379, "step": 490 }, { "epoch": 0.8561643835616438, "grad_norm": 10.391200065612793, "learning_rate": 1.9091030559781702e-05, "loss": 1.9701, "step": 500 }, { "epoch": 0.8732876712328768, "grad_norm": 10.24344253540039, "learning_rate": 1.893854629013488e-05, "loss": 1.9256, "step": 510 }, { "epoch": 0.8904109589041096, "grad_norm": 11.228612899780273, "learning_rate": 1.8786062020488062e-05, "loss": 2.029, "step": 520 }, { "epoch": 0.9075342465753424, "grad_norm": 13.008833885192871, "learning_rate": 1.8633577750841244e-05, "loss": 1.9837, "step": 530 }, { "epoch": 0.9246575342465754, "grad_norm": 10.637398719787598, "learning_rate": 1.8481093481194426e-05, "loss": 1.8445, "step": 540 }, { "epoch": 0.9417808219178082, "grad_norm": 13.296565055847168, "learning_rate": 1.8328609211547607e-05, "loss": 1.9631, "step": 550 }, { "epoch": 0.958904109589041, "grad_norm": 11.811957359313965, "learning_rate": 1.817612494190079e-05, "loss": 1.961, "step": 560 }, { "epoch": 0.976027397260274, "grad_norm": 11.553853988647461, "learning_rate": 1.8023640672253967e-05, "loss": 1.9842, "step": 570 }, { "epoch": 0.9931506849315068, "grad_norm": 12.016478538513184, "learning_rate": 1.787115640260715e-05, "loss": 1.9183, "step": 580 }, { "epoch": 1.0, "eval_classification_report": { "accuracy": 0.273, "ar": { "f1-score": 0.22297297297297297, "precision": 0.36666666666666664, "recall": 0.16019417475728157, "support": 206.0 }, "cl": { "f1-score": 0.06876790830945559, "precision": 0.2033898305084746, "recall": 0.041379310344827586, "support": 290.0 }, "co": { "f1-score": 0.3118081180811808, "precision": 0.21311475409836064, "recall": 0.5807560137457045, "support": 291.0 }, "es": { "f1-score": 0.33212996389891697, "precision": 0.33454545454545453, "recall": 0.32974910394265233, "support": 279.0 }, "macro avg": { "f1-score": 0.2506678702739225, "precision": 0.29977219111270315, "recall": 0.2607198556236934, "support": 2000.0 }, "mx": { "f1-score": 0.26535626535626533, "precision": 0.46551724137931033, "recall": 0.18556701030927836, "support": 291.0 }, "pe": { "f1-score": 0.21484375, "precision": 0.248868778280543, "recall": 0.18900343642611683, "support": 291.0 }, "pr": { "f1-score": 0.574585635359116, "precision": 0.65, "recall": 0.5148514851485149, "support": 101.0 }, "uy": { "f1-score": 0.26554621848739496, "precision": 0.21584699453551912, "recall": 0.34497816593886466, "support": 229.0 }, "ve": { "f1-score": 0.0, "precision": 0.0, "recall": 0.0, "support": 22.0 }, "weighted avg": { "f1-score": 0.2539284929025767, "precision": 0.30641812645562344, "recall": 0.273, "support": 2000.0 } }, "eval_f1": 0.2506678702739225, "eval_loss": 1.8817169666290283, "eval_runtime": 2.8277, "eval_samples_per_second": 707.278, "eval_steps_per_second": 88.41, "step": 584 }, { "epoch": 1.0102739726027397, "grad_norm": 11.690625190734863, "learning_rate": 1.771867213296033e-05, "loss": 1.8131, "step": 590 }, { "epoch": 1.0273972602739727, "grad_norm": 12.48835563659668, "learning_rate": 1.7566187863313512e-05, "loss": 1.7617, "step": 600 }, { "epoch": 1.0445205479452055, "grad_norm": 15.49386978149414, "learning_rate": 1.7413703593666694e-05, "loss": 1.7815, "step": 610 }, { "epoch": 1.0616438356164384, "grad_norm": 17.888931274414062, "learning_rate": 1.7261219324019876e-05, "loss": 1.7337, "step": 620 }, { "epoch": 1.0787671232876712, "grad_norm": 15.020529747009277, "learning_rate": 1.7108735054373058e-05, "loss": 1.6467, "step": 630 }, { "epoch": 1.095890410958904, "grad_norm": 13.33208179473877, "learning_rate": 1.695625078472624e-05, "loss": 1.8363, "step": 640 }, { "epoch": 1.1130136986301369, "grad_norm": 18.24432373046875, "learning_rate": 1.680376651507942e-05, "loss": 1.9009, "step": 650 }, { "epoch": 1.13013698630137, "grad_norm": 13.63119125366211, "learning_rate": 1.6651282245432603e-05, "loss": 1.7742, "step": 660 }, { "epoch": 1.1472602739726028, "grad_norm": 15.010072708129883, "learning_rate": 1.6498797975785784e-05, "loss": 1.6258, "step": 670 }, { "epoch": 1.1643835616438356, "grad_norm": 11.895198822021484, "learning_rate": 1.6346313706138963e-05, "loss": 1.8729, "step": 680 }, { "epoch": 1.1815068493150684, "grad_norm": 14.77038288116455, "learning_rate": 1.6193829436492144e-05, "loss": 1.7128, "step": 690 }, { "epoch": 1.1986301369863013, "grad_norm": 11.579257011413574, "learning_rate": 1.6041345166845326e-05, "loss": 1.8186, "step": 700 }, { "epoch": 1.2157534246575343, "grad_norm": 13.447906494140625, "learning_rate": 1.5888860897198508e-05, "loss": 1.7262, "step": 710 }, { "epoch": 1.2328767123287672, "grad_norm": 16.56707763671875, "learning_rate": 1.573637662755169e-05, "loss": 1.8165, "step": 720 }, { "epoch": 1.25, "grad_norm": 12.793381690979004, "learning_rate": 1.558389235790487e-05, "loss": 1.7352, "step": 730 }, { "epoch": 1.2671232876712328, "grad_norm": 14.821900367736816, "learning_rate": 1.543140808825805e-05, "loss": 1.8093, "step": 740 }, { "epoch": 1.2842465753424657, "grad_norm": 13.674068450927734, "learning_rate": 1.527892381861123e-05, "loss": 1.7997, "step": 750 }, { "epoch": 1.3013698630136985, "grad_norm": 21.423152923583984, "learning_rate": 1.5126439548964413e-05, "loss": 1.8501, "step": 760 }, { "epoch": 1.3184931506849316, "grad_norm": 13.842451095581055, "learning_rate": 1.4973955279317594e-05, "loss": 1.6396, "step": 770 }, { "epoch": 1.3356164383561644, "grad_norm": 12.872172355651855, "learning_rate": 1.4821471009670778e-05, "loss": 1.7377, "step": 780 }, { "epoch": 1.3527397260273972, "grad_norm": 18.74566650390625, "learning_rate": 1.466898674002396e-05, "loss": 1.7576, "step": 790 }, { "epoch": 1.36986301369863, "grad_norm": 15.825615882873535, "learning_rate": 1.451650247037714e-05, "loss": 1.738, "step": 800 }, { "epoch": 1.3869863013698631, "grad_norm": 13.375591278076172, "learning_rate": 1.4364018200730321e-05, "loss": 1.6459, "step": 810 }, { "epoch": 1.404109589041096, "grad_norm": 17.51374053955078, "learning_rate": 1.4211533931083503e-05, "loss": 1.7292, "step": 820 }, { "epoch": 1.4212328767123288, "grad_norm": 15.895259857177734, "learning_rate": 1.4059049661436683e-05, "loss": 1.8173, "step": 830 }, { "epoch": 1.4383561643835616, "grad_norm": 24.653060913085938, "learning_rate": 1.3906565391789864e-05, "loss": 1.7303, "step": 840 }, { "epoch": 1.4554794520547945, "grad_norm": 19.205734252929688, "learning_rate": 1.3754081122143046e-05, "loss": 1.5581, "step": 850 }, { "epoch": 1.4726027397260273, "grad_norm": 13.14504337310791, "learning_rate": 1.3601596852496226e-05, "loss": 1.7028, "step": 860 }, { "epoch": 1.4897260273972603, "grad_norm": 19.409351348876953, "learning_rate": 1.3449112582849408e-05, "loss": 1.7864, "step": 870 }, { "epoch": 1.5068493150684932, "grad_norm": 20.153881072998047, "learning_rate": 1.329662831320259e-05, "loss": 1.5898, "step": 880 }, { "epoch": 1.523972602739726, "grad_norm": 13.584912300109863, "learning_rate": 1.314414404355577e-05, "loss": 1.6459, "step": 890 }, { "epoch": 1.541095890410959, "grad_norm": 20.62996482849121, "learning_rate": 1.2991659773908951e-05, "loss": 1.8207, "step": 900 }, { "epoch": 1.558219178082192, "grad_norm": 16.82609748840332, "learning_rate": 1.2839175504262133e-05, "loss": 1.6256, "step": 910 }, { "epoch": 1.5753424657534247, "grad_norm": 19.674978256225586, "learning_rate": 1.2686691234615316e-05, "loss": 1.6743, "step": 920 }, { "epoch": 1.5924657534246576, "grad_norm": 23.367334365844727, "learning_rate": 1.2534206964968498e-05, "loss": 1.602, "step": 930 }, { "epoch": 1.6095890410958904, "grad_norm": 19.46713638305664, "learning_rate": 1.2381722695321678e-05, "loss": 1.6916, "step": 940 }, { "epoch": 1.6267123287671232, "grad_norm": 24.35736083984375, "learning_rate": 1.222923842567486e-05, "loss": 1.6058, "step": 950 }, { "epoch": 1.643835616438356, "grad_norm": 24.921846389770508, "learning_rate": 1.2076754156028041e-05, "loss": 1.7183, "step": 960 }, { "epoch": 1.660958904109589, "grad_norm": 15.05435848236084, "learning_rate": 1.1924269886381221e-05, "loss": 1.6577, "step": 970 }, { "epoch": 1.678082191780822, "grad_norm": 15.788996696472168, "learning_rate": 1.1771785616734403e-05, "loss": 1.6803, "step": 980 }, { "epoch": 1.6952054794520548, "grad_norm": 18.309133529663086, "learning_rate": 1.1619301347087585e-05, "loss": 1.5949, "step": 990 }, { "epoch": 1.7123287671232876, "grad_norm": 20.53139877319336, "learning_rate": 1.1466817077440765e-05, "loss": 1.6097, "step": 1000 }, { "epoch": 1.7294520547945207, "grad_norm": 23.069534301757812, "learning_rate": 1.1314332807793946e-05, "loss": 1.6068, "step": 1010 }, { "epoch": 1.7465753424657535, "grad_norm": 22.394153594970703, "learning_rate": 1.1161848538147128e-05, "loss": 1.6641, "step": 1020 }, { "epoch": 1.7636986301369864, "grad_norm": 20.755950927734375, "learning_rate": 1.100936426850031e-05, "loss": 1.4966, "step": 1030 }, { "epoch": 1.7808219178082192, "grad_norm": 16.427976608276367, "learning_rate": 1.0856879998853491e-05, "loss": 1.7212, "step": 1040 }, { "epoch": 1.797945205479452, "grad_norm": 33.8179817199707, "learning_rate": 1.0704395729206671e-05, "loss": 1.7811, "step": 1050 }, { "epoch": 1.8150684931506849, "grad_norm": 20.979110717773438, "learning_rate": 1.0551911459559853e-05, "loss": 1.583, "step": 1060 }, { "epoch": 1.8321917808219177, "grad_norm": 17.085895538330078, "learning_rate": 1.0399427189913035e-05, "loss": 1.7014, "step": 1070 }, { "epoch": 1.8493150684931505, "grad_norm": 18.183605194091797, "learning_rate": 1.0246942920266215e-05, "loss": 1.5758, "step": 1080 }, { "epoch": 1.8664383561643836, "grad_norm": 16.887876510620117, "learning_rate": 1.0094458650619396e-05, "loss": 1.4666, "step": 1090 }, { "epoch": 1.8835616438356164, "grad_norm": 18.789949417114258, "learning_rate": 9.94197438097258e-06, "loss": 1.7645, "step": 1100 }, { "epoch": 1.9006849315068495, "grad_norm": 18.971622467041016, "learning_rate": 9.78949011132576e-06, "loss": 1.6875, "step": 1110 }, { "epoch": 1.9178082191780823, "grad_norm": 24.858366012573242, "learning_rate": 9.637005841678942e-06, "loss": 1.6846, "step": 1120 }, { "epoch": 1.9349315068493151, "grad_norm": 13.931944847106934, "learning_rate": 9.484521572032123e-06, "loss": 1.6025, "step": 1130 }, { "epoch": 1.952054794520548, "grad_norm": 17.908315658569336, "learning_rate": 9.332037302385303e-06, "loss": 1.4988, "step": 1140 }, { "epoch": 1.9691780821917808, "grad_norm": 16.93858528137207, "learning_rate": 9.179553032738485e-06, "loss": 1.7157, "step": 1150 }, { "epoch": 1.9863013698630136, "grad_norm": 19.648006439208984, "learning_rate": 9.027068763091667e-06, "loss": 1.5337, "step": 1160 }, { "epoch": 2.0, "eval_classification_report": { "accuracy": 0.3535, "ar": { "f1-score": 0.31309904153354634, "precision": 0.45794392523364486, "recall": 0.23786407766990292, "support": 206.0 }, "cl": { "f1-score": 0.23689320388349513, "precision": 0.27111111111111114, "recall": 0.2103448275862069, "support": 290.0 }, "co": { "f1-score": 0.36268068331143233, "precision": 0.2936170212765957, "recall": 0.4742268041237113, "support": 291.0 }, "es": { "f1-score": 0.38976377952755903, "precision": 0.43231441048034935, "recall": 0.3548387096774194, "support": 279.0 }, "macro avg": { "f1-score": 0.3353596555098448, "precision": 0.3519727641328481, "recall": 0.3389548790003148, "support": 2000.0 }, "mx": { "f1-score": 0.36254980079681276, "precision": 0.4312796208530806, "recall": 0.3127147766323024, "support": 291.0 }, "pe": { "f1-score": 0.31550068587105623, "precision": 0.2625570776255708, "recall": 0.3951890034364261, "support": 291.0 }, "pr": { "f1-score": 0.6543778801843319, "precision": 0.6120689655172413, "recall": 0.7029702970297029, "support": 101.0 }, "uy": { "f1-score": 0.3833718244803695, "precision": 0.4068627450980392, "recall": 0.3624454148471616, "support": 229.0 }, "ve": { "f1-score": 0.0, "precision": 0.0, "recall": 0.0, "support": 22.0 }, "weighted avg": { "f1-score": 0.34933930516945594, "precision": 0.3679569789689199, "recall": 0.3535, "support": 2000.0 } }, "eval_f1": 0.3353596555098448, "eval_loss": 1.793221116065979, "eval_runtime": 2.7925, "eval_samples_per_second": 716.208, "eval_steps_per_second": 89.526, "step": 1168 } ], "logging_steps": 10, "max_steps": 1752, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 153469167996672.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }