{ "best_metric": 0.3344495615547186, "best_model_checkpoint": "/content/drive/MyDrive/model_outputs/trial_7/checkpoint-1764", "epoch": 2.0, "eval_steps": 500, "global_step": 1764, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.011337868480725623, "grad_norm": 10.448426246643066, "learning_rate": 3.2522795772612694e-07, "loss": 2.2378, "step": 10 }, { "epoch": 0.022675736961451247, "grad_norm": 9.789140701293945, "learning_rate": 6.504559154522539e-07, "loss": 2.2313, "step": 20 }, { "epoch": 0.034013605442176874, "grad_norm": 10.550893783569336, "learning_rate": 9.756838731783808e-07, "loss": 2.2025, "step": 30 }, { "epoch": 0.045351473922902494, "grad_norm": 11.21814250946045, "learning_rate": 1.3009118309045078e-06, "loss": 2.2219, "step": 40 }, { "epoch": 0.05668934240362812, "grad_norm": 9.440560340881348, "learning_rate": 1.6261397886306348e-06, "loss": 2.1929, "step": 50 }, { "epoch": 0.06802721088435375, "grad_norm": 9.947296142578125, "learning_rate": 1.9513677463567616e-06, "loss": 2.1799, "step": 60 }, { "epoch": 0.07936507936507936, "grad_norm": 9.87543773651123, "learning_rate": 2.2765957040828886e-06, "loss": 2.1658, "step": 70 }, { "epoch": 0.09070294784580499, "grad_norm": 8.421579360961914, "learning_rate": 2.6018236618090155e-06, "loss": 2.1612, "step": 80 }, { "epoch": 0.10204081632653061, "grad_norm": 11.202038764953613, "learning_rate": 2.9270516195351425e-06, "loss": 2.1247, "step": 90 }, { "epoch": 0.11337868480725624, "grad_norm": 12.861761093139648, "learning_rate": 3.2522795772612695e-06, "loss": 2.1237, "step": 100 }, { "epoch": 0.12471655328798185, "grad_norm": 11.995753288269043, "learning_rate": 3.5775075349873965e-06, "loss": 2.0857, "step": 110 }, { "epoch": 0.1360544217687075, "grad_norm": 10.376130104064941, "learning_rate": 3.902735492713523e-06, "loss": 2.0663, "step": 120 }, { "epoch": 0.1473922902494331, "grad_norm": 11.153587341308594, "learning_rate": 4.227963450439651e-06, "loss": 2.0499, "step": 130 }, { "epoch": 0.15873015873015872, "grad_norm": 10.43906307220459, "learning_rate": 4.553191408165777e-06, "loss": 2.0444, "step": 140 }, { "epoch": 0.17006802721088435, "grad_norm": 10.224808692932129, "learning_rate": 4.878419365891904e-06, "loss": 2.0634, "step": 150 }, { "epoch": 0.18140589569160998, "grad_norm": 11.265706062316895, "learning_rate": 5.203647323618031e-06, "loss": 1.9827, "step": 160 }, { "epoch": 0.1927437641723356, "grad_norm": 9.84843921661377, "learning_rate": 5.528875281344158e-06, "loss": 2.0075, "step": 170 }, { "epoch": 0.20408163265306123, "grad_norm": 8.700265884399414, "learning_rate": 5.854103239070285e-06, "loss": 2.0327, "step": 180 }, { "epoch": 0.21541950113378686, "grad_norm": 9.785911560058594, "learning_rate": 6.179331196796412e-06, "loss": 2.0588, "step": 190 }, { "epoch": 0.22675736961451248, "grad_norm": 11.363122940063477, "learning_rate": 6.504559154522539e-06, "loss": 2.1046, "step": 200 }, { "epoch": 0.23809523809523808, "grad_norm": 8.90456485748291, "learning_rate": 6.829787112248665e-06, "loss": 2.0855, "step": 210 }, { "epoch": 0.2494331065759637, "grad_norm": 9.236421585083008, "learning_rate": 7.155015069974793e-06, "loss": 2.0381, "step": 220 }, { "epoch": 0.26077097505668934, "grad_norm": 7.954260349273682, "learning_rate": 7.48024302770092e-06, "loss": 2.049, "step": 230 }, { "epoch": 0.272108843537415, "grad_norm": 8.307881355285645, "learning_rate": 7.805470985427046e-06, "loss": 1.969, "step": 240 }, { "epoch": 0.2834467120181406, "grad_norm": 10.518112182617188, "learning_rate": 8.130698943153173e-06, "loss": 1.9447, "step": 250 }, { "epoch": 0.2947845804988662, "grad_norm": 9.017768859863281, "learning_rate": 8.455926900879302e-06, "loss": 2.0961, "step": 260 }, { "epoch": 0.30612244897959184, "grad_norm": 8.073211669921875, "learning_rate": 8.781154858605427e-06, "loss": 1.9331, "step": 270 }, { "epoch": 0.31746031746031744, "grad_norm": 9.508843421936035, "learning_rate": 9.106382816331554e-06, "loss": 1.9698, "step": 280 }, { "epoch": 0.3287981859410431, "grad_norm": 9.191576957702637, "learning_rate": 9.431610774057681e-06, "loss": 1.9649, "step": 290 }, { "epoch": 0.3401360544217687, "grad_norm": 10.584879875183105, "learning_rate": 9.756838731783808e-06, "loss": 1.9858, "step": 300 }, { "epoch": 0.35147392290249435, "grad_norm": 10.487820625305176, "learning_rate": 1.0082066689509935e-05, "loss": 1.9865, "step": 310 }, { "epoch": 0.36281179138321995, "grad_norm": 11.283440589904785, "learning_rate": 1.0407294647236062e-05, "loss": 1.9821, "step": 320 }, { "epoch": 0.3741496598639456, "grad_norm": 8.875638008117676, "learning_rate": 1.073252260496219e-05, "loss": 1.929, "step": 330 }, { "epoch": 0.3854875283446712, "grad_norm": 10.396716117858887, "learning_rate": 1.1057750562688316e-05, "loss": 2.026, "step": 340 }, { "epoch": 0.3968253968253968, "grad_norm": 7.329042911529541, "learning_rate": 1.1382978520414443e-05, "loss": 1.9425, "step": 350 }, { "epoch": 0.40816326530612246, "grad_norm": 11.754528999328613, "learning_rate": 1.170820647814057e-05, "loss": 1.8786, "step": 360 }, { "epoch": 0.41950113378684806, "grad_norm": 8.387690544128418, "learning_rate": 1.1732014759459226e-05, "loss": 1.9279, "step": 370 }, { "epoch": 0.4308390022675737, "grad_norm": 10.3031005859375, "learning_rate": 1.1680468121676012e-05, "loss": 1.9839, "step": 380 }, { "epoch": 0.4421768707482993, "grad_norm": 14.319021224975586, "learning_rate": 1.16289214838928e-05, "loss": 2.0223, "step": 390 }, { "epoch": 0.45351473922902497, "grad_norm": 9.42290210723877, "learning_rate": 1.1577374846109587e-05, "loss": 1.9165, "step": 400 }, { "epoch": 0.46485260770975056, "grad_norm": 11.768155097961426, "learning_rate": 1.1525828208326375e-05, "loss": 2.0116, "step": 410 }, { "epoch": 0.47619047619047616, "grad_norm": 8.139991760253906, "learning_rate": 1.1474281570543161e-05, "loss": 1.8628, "step": 420 }, { "epoch": 0.4875283446712018, "grad_norm": 10.308381080627441, "learning_rate": 1.1422734932759949e-05, "loss": 2.017, "step": 430 }, { "epoch": 0.4988662131519274, "grad_norm": 9.104990005493164, "learning_rate": 1.1371188294976736e-05, "loss": 2.0109, "step": 440 }, { "epoch": 0.5102040816326531, "grad_norm": 9.941882133483887, "learning_rate": 1.1319641657193522e-05, "loss": 1.9498, "step": 450 }, { "epoch": 0.5215419501133787, "grad_norm": 10.337907791137695, "learning_rate": 1.126809501941031e-05, "loss": 1.9946, "step": 460 }, { "epoch": 0.5328798185941043, "grad_norm": 9.354191780090332, "learning_rate": 1.1216548381627098e-05, "loss": 1.8953, "step": 470 }, { "epoch": 0.54421768707483, "grad_norm": 10.24344539642334, "learning_rate": 1.1165001743843885e-05, "loss": 1.889, "step": 480 }, { "epoch": 0.5555555555555556, "grad_norm": 8.560635566711426, "learning_rate": 1.1113455106060671e-05, "loss": 1.9698, "step": 490 }, { "epoch": 0.5668934240362812, "grad_norm": 9.784881591796875, "learning_rate": 1.1061908468277459e-05, "loss": 1.9718, "step": 500 }, { "epoch": 0.5782312925170068, "grad_norm": 11.199634552001953, "learning_rate": 1.1010361830494247e-05, "loss": 1.9762, "step": 510 }, { "epoch": 0.5895691609977324, "grad_norm": 10.915511131286621, "learning_rate": 1.0958815192711034e-05, "loss": 1.9347, "step": 520 }, { "epoch": 0.6009070294784581, "grad_norm": 13.744818687438965, "learning_rate": 1.090726855492782e-05, "loss": 2.0184, "step": 530 }, { "epoch": 0.6122448979591837, "grad_norm": 11.262948036193848, "learning_rate": 1.0855721917144608e-05, "loss": 1.9323, "step": 540 }, { "epoch": 0.6235827664399093, "grad_norm": 8.698514938354492, "learning_rate": 1.0804175279361396e-05, "loss": 1.917, "step": 550 }, { "epoch": 0.6349206349206349, "grad_norm": 15.39905834197998, "learning_rate": 1.0752628641578182e-05, "loss": 1.9203, "step": 560 }, { "epoch": 0.6462585034013606, "grad_norm": 9.898249626159668, "learning_rate": 1.070108200379497e-05, "loss": 1.9317, "step": 570 }, { "epoch": 0.6575963718820862, "grad_norm": 9.889189720153809, "learning_rate": 1.0649535366011757e-05, "loss": 1.9203, "step": 580 }, { "epoch": 0.6689342403628118, "grad_norm": 9.887190818786621, "learning_rate": 1.0597988728228545e-05, "loss": 1.8459, "step": 590 }, { "epoch": 0.6802721088435374, "grad_norm": 9.046891212463379, "learning_rate": 1.054644209044533e-05, "loss": 1.983, "step": 600 }, { "epoch": 0.691609977324263, "grad_norm": 11.797575950622559, "learning_rate": 1.0494895452662118e-05, "loss": 1.9428, "step": 610 }, { "epoch": 0.7029478458049887, "grad_norm": 13.745688438415527, "learning_rate": 1.0443348814878906e-05, "loss": 1.9652, "step": 620 }, { "epoch": 0.7142857142857143, "grad_norm": 11.441299438476562, "learning_rate": 1.0391802177095692e-05, "loss": 1.9636, "step": 630 }, { "epoch": 0.7256235827664399, "grad_norm": 11.024886131286621, "learning_rate": 1.034025553931248e-05, "loss": 1.9634, "step": 640 }, { "epoch": 0.7369614512471655, "grad_norm": 18.117177963256836, "learning_rate": 1.0288708901529269e-05, "loss": 1.9059, "step": 650 }, { "epoch": 0.7482993197278912, "grad_norm": 13.534732818603516, "learning_rate": 1.0237162263746055e-05, "loss": 1.8964, "step": 660 }, { "epoch": 0.7596371882086168, "grad_norm": 11.099019050598145, "learning_rate": 1.0185615625962842e-05, "loss": 1.7963, "step": 670 }, { "epoch": 0.7709750566893424, "grad_norm": 26.388456344604492, "learning_rate": 1.013406898817963e-05, "loss": 1.9809, "step": 680 }, { "epoch": 0.782312925170068, "grad_norm": 12.548821449279785, "learning_rate": 1.0082522350396418e-05, "loss": 1.914, "step": 690 }, { "epoch": 0.7936507936507936, "grad_norm": 11.160512924194336, "learning_rate": 1.0030975712613204e-05, "loss": 1.82, "step": 700 }, { "epoch": 0.8049886621315193, "grad_norm": 13.189496994018555, "learning_rate": 9.979429074829991e-06, "loss": 1.835, "step": 710 }, { "epoch": 0.8163265306122449, "grad_norm": 14.305792808532715, "learning_rate": 9.927882437046779e-06, "loss": 1.7968, "step": 720 }, { "epoch": 0.8276643990929705, "grad_norm": 15.657683372497559, "learning_rate": 9.876335799263565e-06, "loss": 1.8773, "step": 730 }, { "epoch": 0.8390022675736961, "grad_norm": 13.894340515136719, "learning_rate": 9.824789161480353e-06, "loss": 1.848, "step": 740 }, { "epoch": 0.8503401360544217, "grad_norm": 18.09587860107422, "learning_rate": 9.77324252369714e-06, "loss": 1.9382, "step": 750 }, { "epoch": 0.8616780045351474, "grad_norm": 14.405220985412598, "learning_rate": 9.721695885913928e-06, "loss": 1.973, "step": 760 }, { "epoch": 0.873015873015873, "grad_norm": 12.452035903930664, "learning_rate": 9.670149248130714e-06, "loss": 1.83, "step": 770 }, { "epoch": 0.8843537414965986, "grad_norm": 17.113231658935547, "learning_rate": 9.618602610347502e-06, "loss": 1.9733, "step": 780 }, { "epoch": 0.8956916099773242, "grad_norm": 11.744027137756348, "learning_rate": 9.56705597256429e-06, "loss": 1.7322, "step": 790 }, { "epoch": 0.9070294784580499, "grad_norm": 12.800016403198242, "learning_rate": 9.515509334781077e-06, "loss": 1.7837, "step": 800 }, { "epoch": 0.9183673469387755, "grad_norm": 12.709762573242188, "learning_rate": 9.463962696997863e-06, "loss": 1.8111, "step": 810 }, { "epoch": 0.9297052154195011, "grad_norm": 12.49414348602295, "learning_rate": 9.41241605921465e-06, "loss": 1.8696, "step": 820 }, { "epoch": 0.9410430839002267, "grad_norm": 14.208767890930176, "learning_rate": 9.360869421431438e-06, "loss": 1.9039, "step": 830 }, { "epoch": 0.9523809523809523, "grad_norm": 12.151473999023438, "learning_rate": 9.309322783648224e-06, "loss": 1.9351, "step": 840 }, { "epoch": 0.963718820861678, "grad_norm": 15.71224594116211, "learning_rate": 9.257776145865012e-06, "loss": 1.7715, "step": 850 }, { "epoch": 0.9750566893424036, "grad_norm": 12.460648536682129, "learning_rate": 9.2062295080818e-06, "loss": 1.8283, "step": 860 }, { "epoch": 0.9863945578231292, "grad_norm": 14.45302963256836, "learning_rate": 9.154682870298587e-06, "loss": 1.8793, "step": 870 }, { "epoch": 0.9977324263038548, "grad_norm": 16.95499610900879, "learning_rate": 9.103136232515373e-06, "loss": 1.7859, "step": 880 }, { "epoch": 1.0, "eval_classification_report": { "accuracy": 0.31624214356599406, "ar": { "f1-score": 0.34932349323493234, "precision": 0.32494279176201374, "recall": 0.3776595744680851, "support": 376.0 }, "cl": { "f1-score": 0.2806361085126286, "precision": 0.30425963488843816, "recall": 0.2604166666666667, "support": 576.0 }, "co": { "f1-score": 0.29607250755287007, "precision": 0.3081761006289308, "recall": 0.28488372093023256, "support": 344.0 }, "es": { "f1-score": 0.3728115345005149, "precision": 0.42289719626168226, "recall": 0.3333333333333333, "support": 543.0 }, "macro avg": { "f1-score": 0.272621791856563, "precision": 0.30099582851554657, "recall": 0.2856216737927651, "support": 3023.0 }, "mx": { "f1-score": 0.35931307793923384, "precision": 0.26433430515063167, "recall": 0.5608247422680412, "support": 485.0 }, "pe": { "f1-score": 0.16205533596837945, "precision": 0.25949367088607594, "recall": 0.11781609195402298, "support": 348.0 }, "pr": { "f1-score": 0.5296803652968036, "precision": 0.4915254237288136, "recall": 0.5742574257425742, "support": 101.0 }, "uy": { "f1-score": 0.1037037037037037, "precision": 0.3333333333333333, "recall": 0.06140350877192982, "support": 228.0 }, "ve": { "f1-score": 0.0, "precision": 0.0, "recall": 0.0, "support": 22.0 }, "weighted avg": { "f1-score": 0.29939857719104257, "precision": 0.32326424043920743, "recall": 0.31624214356599406, "support": 3023.0 } }, "eval_f1": 0.272621791856563, "eval_loss": 1.826755166053772, "eval_runtime": 4.3231, "eval_samples_per_second": 699.273, "eval_steps_per_second": 87.438, "step": 882 }, { "epoch": 1.0090702947845804, "grad_norm": 26.123666763305664, "learning_rate": 9.051589594732161e-06, "loss": 1.7874, "step": 890 }, { "epoch": 1.0204081632653061, "grad_norm": 16.006940841674805, "learning_rate": 9.000042956948949e-06, "loss": 1.8211, "step": 900 }, { "epoch": 1.0317460317460316, "grad_norm": 11.488687515258789, "learning_rate": 8.948496319165735e-06, "loss": 1.5922, "step": 910 }, { "epoch": 1.0430839002267573, "grad_norm": 17.735294342041016, "learning_rate": 8.896949681382522e-06, "loss": 1.7408, "step": 920 }, { "epoch": 1.054421768707483, "grad_norm": 17.176015853881836, "learning_rate": 8.84540304359931e-06, "loss": 1.6946, "step": 930 }, { "epoch": 1.0657596371882085, "grad_norm": 16.43549346923828, "learning_rate": 8.793856405816098e-06, "loss": 1.7945, "step": 940 }, { "epoch": 1.0770975056689343, "grad_norm": 15.267044067382812, "learning_rate": 8.742309768032884e-06, "loss": 1.8128, "step": 950 }, { "epoch": 1.08843537414966, "grad_norm": 14.859127044677734, "learning_rate": 8.690763130249671e-06, "loss": 1.6856, "step": 960 }, { "epoch": 1.0997732426303855, "grad_norm": 18.991714477539062, "learning_rate": 8.63921649246646e-06, "loss": 1.7442, "step": 970 }, { "epoch": 1.1111111111111112, "grad_norm": 14.102248191833496, "learning_rate": 8.587669854683247e-06, "loss": 1.78, "step": 980 }, { "epoch": 1.1224489795918366, "grad_norm": 14.221405982971191, "learning_rate": 8.536123216900034e-06, "loss": 1.8002, "step": 990 }, { "epoch": 1.1337868480725624, "grad_norm": 14.290886878967285, "learning_rate": 8.484576579116822e-06, "loss": 1.6335, "step": 1000 }, { "epoch": 1.145124716553288, "grad_norm": 14.495686531066895, "learning_rate": 8.433029941333608e-06, "loss": 1.7481, "step": 1010 }, { "epoch": 1.1564625850340136, "grad_norm": 11.985411643981934, "learning_rate": 8.381483303550395e-06, "loss": 1.5709, "step": 1020 }, { "epoch": 1.1678004535147393, "grad_norm": 17.67130470275879, "learning_rate": 8.329936665767183e-06, "loss": 1.6573, "step": 1030 }, { "epoch": 1.179138321995465, "grad_norm": 14.700641632080078, "learning_rate": 8.27839002798397e-06, "loss": 1.6877, "step": 1040 }, { "epoch": 1.1904761904761905, "grad_norm": 42.978267669677734, "learning_rate": 8.226843390200757e-06, "loss": 1.5638, "step": 1050 }, { "epoch": 1.2018140589569162, "grad_norm": 20.4002685546875, "learning_rate": 8.175296752417544e-06, "loss": 1.7197, "step": 1060 }, { "epoch": 1.2131519274376417, "grad_norm": 19.56304931640625, "learning_rate": 8.123750114634332e-06, "loss": 1.8438, "step": 1070 }, { "epoch": 1.2244897959183674, "grad_norm": 16.191972732543945, "learning_rate": 8.07220347685112e-06, "loss": 1.691, "step": 1080 }, { "epoch": 1.235827664399093, "grad_norm": 30.342937469482422, "learning_rate": 8.020656839067906e-06, "loss": 1.6144, "step": 1090 }, { "epoch": 1.2471655328798186, "grad_norm": 15.779362678527832, "learning_rate": 7.969110201284693e-06, "loss": 1.656, "step": 1100 }, { "epoch": 1.2585034013605443, "grad_norm": 13.458856582641602, "learning_rate": 7.917563563501481e-06, "loss": 1.6735, "step": 1110 }, { "epoch": 1.2698412698412698, "grad_norm": 19.857772827148438, "learning_rate": 7.866016925718267e-06, "loss": 1.8041, "step": 1120 }, { "epoch": 1.2811791383219955, "grad_norm": 13.60135555267334, "learning_rate": 7.814470287935055e-06, "loss": 1.6365, "step": 1130 }, { "epoch": 1.2925170068027212, "grad_norm": 22.09275245666504, "learning_rate": 7.762923650151842e-06, "loss": 1.7464, "step": 1140 }, { "epoch": 1.3038548752834467, "grad_norm": 20.55967903137207, "learning_rate": 7.71137701236863e-06, "loss": 1.5525, "step": 1150 }, { "epoch": 1.3151927437641724, "grad_norm": 16.4479923248291, "learning_rate": 7.659830374585416e-06, "loss": 1.6759, "step": 1160 }, { "epoch": 1.3265306122448979, "grad_norm": 17.639759063720703, "learning_rate": 7.608283736802204e-06, "loss": 1.6931, "step": 1170 }, { "epoch": 1.3378684807256236, "grad_norm": 13.464910507202148, "learning_rate": 7.556737099018991e-06, "loss": 1.6979, "step": 1180 }, { "epoch": 1.3492063492063493, "grad_norm": 17.602092742919922, "learning_rate": 7.505190461235778e-06, "loss": 1.523, "step": 1190 }, { "epoch": 1.3605442176870748, "grad_norm": 14.499417304992676, "learning_rate": 7.453643823452565e-06, "loss": 1.6009, "step": 1200 }, { "epoch": 1.3718820861678005, "grad_norm": 13.913595199584961, "learning_rate": 7.402097185669353e-06, "loss": 1.6974, "step": 1210 }, { "epoch": 1.383219954648526, "grad_norm": 18.651060104370117, "learning_rate": 7.3505505478861395e-06, "loss": 1.5858, "step": 1220 }, { "epoch": 1.3945578231292517, "grad_norm": 14.985074996948242, "learning_rate": 7.299003910102927e-06, "loss": 1.5809, "step": 1230 }, { "epoch": 1.4058956916099774, "grad_norm": 15.544303894042969, "learning_rate": 7.247457272319714e-06, "loss": 1.7217, "step": 1240 }, { "epoch": 1.417233560090703, "grad_norm": 13.504234313964844, "learning_rate": 7.195910634536502e-06, "loss": 1.7267, "step": 1250 }, { "epoch": 1.4285714285714286, "grad_norm": 17.615610122680664, "learning_rate": 7.144363996753288e-06, "loss": 1.6181, "step": 1260 }, { "epoch": 1.439909297052154, "grad_norm": 17.280879974365234, "learning_rate": 7.092817358970076e-06, "loss": 1.7672, "step": 1270 }, { "epoch": 1.4512471655328798, "grad_norm": 16.363788604736328, "learning_rate": 7.041270721186863e-06, "loss": 1.7007, "step": 1280 }, { "epoch": 1.4625850340136055, "grad_norm": 17.784503936767578, "learning_rate": 6.989724083403651e-06, "loss": 1.8024, "step": 1290 }, { "epoch": 1.473922902494331, "grad_norm": 14.961355209350586, "learning_rate": 6.938177445620438e-06, "loss": 1.6246, "step": 1300 }, { "epoch": 1.4852607709750567, "grad_norm": 20.90947723388672, "learning_rate": 6.886630807837226e-06, "loss": 1.7225, "step": 1310 }, { "epoch": 1.4965986394557822, "grad_norm": 20.188579559326172, "learning_rate": 6.835084170054013e-06, "loss": 1.6348, "step": 1320 }, { "epoch": 1.507936507936508, "grad_norm": 18.756460189819336, "learning_rate": 6.7835375322708e-06, "loss": 1.7665, "step": 1330 }, { "epoch": 1.5192743764172336, "grad_norm": 23.779773712158203, "learning_rate": 6.731990894487587e-06, "loss": 1.6492, "step": 1340 }, { "epoch": 1.5306122448979593, "grad_norm": 19.124439239501953, "learning_rate": 6.680444256704375e-06, "loss": 1.5442, "step": 1350 }, { "epoch": 1.5419501133786848, "grad_norm": 14.493389129638672, "learning_rate": 6.628897618921162e-06, "loss": 1.6453, "step": 1360 }, { "epoch": 1.5532879818594103, "grad_norm": 21.88387680053711, "learning_rate": 6.577350981137949e-06, "loss": 1.6425, "step": 1370 }, { "epoch": 1.564625850340136, "grad_norm": 14.829331398010254, "learning_rate": 6.525804343354736e-06, "loss": 1.5625, "step": 1380 }, { "epoch": 1.5759637188208617, "grad_norm": 15.82640552520752, "learning_rate": 6.474257705571523e-06, "loss": 1.4464, "step": 1390 }, { "epoch": 1.5873015873015874, "grad_norm": 19.32452392578125, "learning_rate": 6.422711067788311e-06, "loss": 1.6272, "step": 1400 }, { "epoch": 1.598639455782313, "grad_norm": 20.762659072875977, "learning_rate": 6.3711644300050975e-06, "loss": 1.5704, "step": 1410 }, { "epoch": 1.6099773242630384, "grad_norm": 15.721837997436523, "learning_rate": 6.319617792221885e-06, "loss": 1.6598, "step": 1420 }, { "epoch": 1.6213151927437641, "grad_norm": 18.4571533203125, "learning_rate": 6.268071154438672e-06, "loss": 1.7319, "step": 1430 }, { "epoch": 1.6326530612244898, "grad_norm": 18.22947120666504, "learning_rate": 6.21652451665546e-06, "loss": 1.6941, "step": 1440 }, { "epoch": 1.6439909297052155, "grad_norm": 23.945701599121094, "learning_rate": 6.1649778788722464e-06, "loss": 1.7542, "step": 1450 }, { "epoch": 1.655328798185941, "grad_norm": 18.437143325805664, "learning_rate": 6.113431241089034e-06, "loss": 1.4949, "step": 1460 }, { "epoch": 1.6666666666666665, "grad_norm": 17.228715896606445, "learning_rate": 6.061884603305821e-06, "loss": 1.6577, "step": 1470 }, { "epoch": 1.6780045351473922, "grad_norm": 20.193279266357422, "learning_rate": 6.010337965522608e-06, "loss": 1.7019, "step": 1480 }, { "epoch": 1.689342403628118, "grad_norm": 15.752019882202148, "learning_rate": 5.958791327739395e-06, "loss": 1.6625, "step": 1490 }, { "epoch": 1.7006802721088436, "grad_norm": 18.021570205688477, "learning_rate": 5.907244689956182e-06, "loss": 1.6466, "step": 1500 }, { "epoch": 1.7120181405895691, "grad_norm": 27.58159828186035, "learning_rate": 5.85569805217297e-06, "loss": 1.5707, "step": 1510 }, { "epoch": 1.7233560090702946, "grad_norm": 29.481163024902344, "learning_rate": 5.8041514143897575e-06, "loss": 1.6184, "step": 1520 }, { "epoch": 1.7346938775510203, "grad_norm": 23.25908851623535, "learning_rate": 5.752604776606544e-06, "loss": 1.6903, "step": 1530 }, { "epoch": 1.746031746031746, "grad_norm": 23.716106414794922, "learning_rate": 5.701058138823332e-06, "loss": 1.7002, "step": 1540 }, { "epoch": 1.7573696145124718, "grad_norm": 23.144357681274414, "learning_rate": 5.649511501040119e-06, "loss": 1.6566, "step": 1550 }, { "epoch": 1.7687074829931972, "grad_norm": 16.0502872467041, "learning_rate": 5.5979648632569065e-06, "loss": 1.5765, "step": 1560 }, { "epoch": 1.780045351473923, "grad_norm": 24.38329315185547, "learning_rate": 5.546418225473693e-06, "loss": 1.5083, "step": 1570 }, { "epoch": 1.7913832199546484, "grad_norm": 19.64059829711914, "learning_rate": 5.494871587690481e-06, "loss": 1.6865, "step": 1580 }, { "epoch": 1.8027210884353742, "grad_norm": 20.771997451782227, "learning_rate": 5.443324949907268e-06, "loss": 1.5953, "step": 1590 }, { "epoch": 1.8140589569160999, "grad_norm": 18.78712272644043, "learning_rate": 5.3917783121240555e-06, "loss": 1.6201, "step": 1600 }, { "epoch": 1.8253968253968254, "grad_norm": 17.190635681152344, "learning_rate": 5.340231674340842e-06, "loss": 1.4179, "step": 1610 }, { "epoch": 1.836734693877551, "grad_norm": 22.07889747619629, "learning_rate": 5.288685036557629e-06, "loss": 1.7159, "step": 1620 }, { "epoch": 1.8480725623582765, "grad_norm": 20.719635009765625, "learning_rate": 5.237138398774417e-06, "loss": 1.8329, "step": 1630 }, { "epoch": 1.8594104308390023, "grad_norm": 17.722476959228516, "learning_rate": 5.185591760991204e-06, "loss": 1.5632, "step": 1640 }, { "epoch": 1.870748299319728, "grad_norm": 23.3852596282959, "learning_rate": 5.134045123207991e-06, "loss": 1.554, "step": 1650 }, { "epoch": 1.8820861678004537, "grad_norm": 18.780004501342773, "learning_rate": 5.082498485424778e-06, "loss": 1.5414, "step": 1660 }, { "epoch": 1.8934240362811792, "grad_norm": 18.146533966064453, "learning_rate": 5.030951847641566e-06, "loss": 1.5634, "step": 1670 }, { "epoch": 1.9047619047619047, "grad_norm": 16.80215835571289, "learning_rate": 4.979405209858353e-06, "loss": 1.4973, "step": 1680 }, { "epoch": 1.9160997732426304, "grad_norm": 19.140361785888672, "learning_rate": 4.92785857207514e-06, "loss": 1.5131, "step": 1690 }, { "epoch": 1.927437641723356, "grad_norm": 18.766475677490234, "learning_rate": 4.876311934291928e-06, "loss": 1.6316, "step": 1700 }, { "epoch": 1.9387755102040818, "grad_norm": 18.763959884643555, "learning_rate": 4.824765296508715e-06, "loss": 1.7108, "step": 1710 }, { "epoch": 1.9501133786848073, "grad_norm": 20.144254684448242, "learning_rate": 4.773218658725502e-06, "loss": 1.6734, "step": 1720 }, { "epoch": 1.9614512471655328, "grad_norm": 21.231863021850586, "learning_rate": 4.721672020942289e-06, "loss": 1.6598, "step": 1730 }, { "epoch": 1.9727891156462585, "grad_norm": 21.432113647460938, "learning_rate": 4.670125383159077e-06, "loss": 1.5855, "step": 1740 }, { "epoch": 1.9841269841269842, "grad_norm": 22.680397033691406, "learning_rate": 4.618578745375864e-06, "loss": 1.5308, "step": 1750 }, { "epoch": 1.99546485260771, "grad_norm": 16.61890983581543, "learning_rate": 4.5670321075926505e-06, "loss": 1.6121, "step": 1760 }, { "epoch": 2.0, "eval_classification_report": { "accuracy": 0.3728084684088654, "ar": { "f1-score": 0.3960720130932897, "precision": 0.5148936170212766, "recall": 0.32180851063829785, "support": 376.0 }, "cl": { "f1-score": 0.3418803418803419, "precision": 0.2898550724637681, "recall": 0.4166666666666667, "support": 576.0 }, "co": { "f1-score": 0.31393298059964725, "precision": 0.3991031390134529, "recall": 0.25872093023255816, "support": 344.0 }, "es": { "f1-score": 0.44776119402985076, "precision": 0.3904109589041096, "recall": 0.5248618784530387, "support": 543.0 }, "macro avg": { "f1-score": 0.3344495615547186, "precision": 0.39328654354394, "recall": 0.3244903328432473, "support": 3023.0 }, "mx": { "f1-score": 0.3937621832358674, "precision": 0.3733826247689464, "recall": 0.41649484536082476, "support": 485.0 }, "pe": { "f1-score": 0.31085043988269795, "precision": 0.31736526946107785, "recall": 0.3045977011494253, "support": 348.0 }, "pr": { "f1-score": 0.6021505376344086, "precision": 0.6588235294117647, "recall": 0.5544554455445545, "support": 101.0 }, "uy": { "f1-score": 0.20363636363636364, "precision": 0.5957446808510638, "recall": 0.12280701754385964, "support": 228.0 }, "ve": { "f1-score": 0.0, "precision": 0.0, "recall": 0.0, "support": 22.0 }, "weighted avg": { "f1-score": 0.36499184078378505, "precision": 0.39819576670020335, "recall": 0.3728084684088654, "support": 3023.0 } }, "eval_f1": 0.3344495615547186, "eval_loss": 1.7376385927200317, "eval_runtime": 4.2251, "eval_samples_per_second": 715.479, "eval_steps_per_second": 89.464, "step": 1764 } ], "logging_steps": 10, "max_steps": 2646, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 231979863240576.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }