|
{ |
|
"best_metric": 0.272621791856563, |
|
"best_model_checkpoint": "/content/drive/MyDrive/model_outputs/trial_7/checkpoint-882", |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 882, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.011337868480725623, |
|
"grad_norm": 10.448426246643066, |
|
"learning_rate": 3.2522795772612694e-07, |
|
"loss": 2.2378, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.022675736961451247, |
|
"grad_norm": 9.789140701293945, |
|
"learning_rate": 6.504559154522539e-07, |
|
"loss": 2.2313, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.034013605442176874, |
|
"grad_norm": 10.550893783569336, |
|
"learning_rate": 9.756838731783808e-07, |
|
"loss": 2.2025, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.045351473922902494, |
|
"grad_norm": 11.21814250946045, |
|
"learning_rate": 1.3009118309045078e-06, |
|
"loss": 2.2219, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05668934240362812, |
|
"grad_norm": 9.440560340881348, |
|
"learning_rate": 1.6261397886306348e-06, |
|
"loss": 2.1929, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06802721088435375, |
|
"grad_norm": 9.947296142578125, |
|
"learning_rate": 1.9513677463567616e-06, |
|
"loss": 2.1799, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07936507936507936, |
|
"grad_norm": 9.87543773651123, |
|
"learning_rate": 2.2765957040828886e-06, |
|
"loss": 2.1658, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.09070294784580499, |
|
"grad_norm": 8.421579360961914, |
|
"learning_rate": 2.6018236618090155e-06, |
|
"loss": 2.1612, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.10204081632653061, |
|
"grad_norm": 11.202038764953613, |
|
"learning_rate": 2.9270516195351425e-06, |
|
"loss": 2.1247, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.11337868480725624, |
|
"grad_norm": 12.861761093139648, |
|
"learning_rate": 3.2522795772612695e-06, |
|
"loss": 2.1237, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12471655328798185, |
|
"grad_norm": 11.995753288269043, |
|
"learning_rate": 3.5775075349873965e-06, |
|
"loss": 2.0857, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.1360544217687075, |
|
"grad_norm": 10.376130104064941, |
|
"learning_rate": 3.902735492713523e-06, |
|
"loss": 2.0663, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1473922902494331, |
|
"grad_norm": 11.153587341308594, |
|
"learning_rate": 4.227963450439651e-06, |
|
"loss": 2.0499, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.15873015873015872, |
|
"grad_norm": 10.43906307220459, |
|
"learning_rate": 4.553191408165777e-06, |
|
"loss": 2.0444, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.17006802721088435, |
|
"grad_norm": 10.224808692932129, |
|
"learning_rate": 4.878419365891904e-06, |
|
"loss": 2.0634, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.18140589569160998, |
|
"grad_norm": 11.265706062316895, |
|
"learning_rate": 5.203647323618031e-06, |
|
"loss": 1.9827, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.1927437641723356, |
|
"grad_norm": 9.84843921661377, |
|
"learning_rate": 5.528875281344158e-06, |
|
"loss": 2.0075, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.20408163265306123, |
|
"grad_norm": 8.700265884399414, |
|
"learning_rate": 5.854103239070285e-06, |
|
"loss": 2.0327, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.21541950113378686, |
|
"grad_norm": 9.785911560058594, |
|
"learning_rate": 6.179331196796412e-06, |
|
"loss": 2.0588, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.22675736961451248, |
|
"grad_norm": 11.363122940063477, |
|
"learning_rate": 6.504559154522539e-06, |
|
"loss": 2.1046, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.23809523809523808, |
|
"grad_norm": 8.90456485748291, |
|
"learning_rate": 6.829787112248665e-06, |
|
"loss": 2.0855, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.2494331065759637, |
|
"grad_norm": 9.236421585083008, |
|
"learning_rate": 7.155015069974793e-06, |
|
"loss": 2.0381, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.26077097505668934, |
|
"grad_norm": 7.954260349273682, |
|
"learning_rate": 7.48024302770092e-06, |
|
"loss": 2.049, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.272108843537415, |
|
"grad_norm": 8.307881355285645, |
|
"learning_rate": 7.805470985427046e-06, |
|
"loss": 1.969, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.2834467120181406, |
|
"grad_norm": 10.518112182617188, |
|
"learning_rate": 8.130698943153173e-06, |
|
"loss": 1.9447, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2947845804988662, |
|
"grad_norm": 9.017768859863281, |
|
"learning_rate": 8.455926900879302e-06, |
|
"loss": 2.0961, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.30612244897959184, |
|
"grad_norm": 8.073211669921875, |
|
"learning_rate": 8.781154858605427e-06, |
|
"loss": 1.9331, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.31746031746031744, |
|
"grad_norm": 9.508843421936035, |
|
"learning_rate": 9.106382816331554e-06, |
|
"loss": 1.9698, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3287981859410431, |
|
"grad_norm": 9.191576957702637, |
|
"learning_rate": 9.431610774057681e-06, |
|
"loss": 1.9649, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.3401360544217687, |
|
"grad_norm": 10.584879875183105, |
|
"learning_rate": 9.756838731783808e-06, |
|
"loss": 1.9858, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.35147392290249435, |
|
"grad_norm": 10.487820625305176, |
|
"learning_rate": 1.0082066689509935e-05, |
|
"loss": 1.9865, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.36281179138321995, |
|
"grad_norm": 11.283440589904785, |
|
"learning_rate": 1.0407294647236062e-05, |
|
"loss": 1.9821, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.3741496598639456, |
|
"grad_norm": 8.875638008117676, |
|
"learning_rate": 1.073252260496219e-05, |
|
"loss": 1.929, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.3854875283446712, |
|
"grad_norm": 10.396716117858887, |
|
"learning_rate": 1.1057750562688316e-05, |
|
"loss": 2.026, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.3968253968253968, |
|
"grad_norm": 7.329042911529541, |
|
"learning_rate": 1.1382978520414443e-05, |
|
"loss": 1.9425, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.40816326530612246, |
|
"grad_norm": 11.754528999328613, |
|
"learning_rate": 1.170820647814057e-05, |
|
"loss": 1.8786, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.41950113378684806, |
|
"grad_norm": 8.387690544128418, |
|
"learning_rate": 1.1732014759459226e-05, |
|
"loss": 1.9279, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.4308390022675737, |
|
"grad_norm": 10.3031005859375, |
|
"learning_rate": 1.1680468121676012e-05, |
|
"loss": 1.9839, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.4421768707482993, |
|
"grad_norm": 14.319021224975586, |
|
"learning_rate": 1.16289214838928e-05, |
|
"loss": 2.0223, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.45351473922902497, |
|
"grad_norm": 9.42290210723877, |
|
"learning_rate": 1.1577374846109587e-05, |
|
"loss": 1.9165, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.46485260770975056, |
|
"grad_norm": 11.768155097961426, |
|
"learning_rate": 1.1525828208326375e-05, |
|
"loss": 2.0116, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.47619047619047616, |
|
"grad_norm": 8.139991760253906, |
|
"learning_rate": 1.1474281570543161e-05, |
|
"loss": 1.8628, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.4875283446712018, |
|
"grad_norm": 10.308381080627441, |
|
"learning_rate": 1.1422734932759949e-05, |
|
"loss": 2.017, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.4988662131519274, |
|
"grad_norm": 9.104990005493164, |
|
"learning_rate": 1.1371188294976736e-05, |
|
"loss": 2.0109, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.5102040816326531, |
|
"grad_norm": 9.941882133483887, |
|
"learning_rate": 1.1319641657193522e-05, |
|
"loss": 1.9498, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.5215419501133787, |
|
"grad_norm": 10.337907791137695, |
|
"learning_rate": 1.126809501941031e-05, |
|
"loss": 1.9946, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.5328798185941043, |
|
"grad_norm": 9.354191780090332, |
|
"learning_rate": 1.1216548381627098e-05, |
|
"loss": 1.8953, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.54421768707483, |
|
"grad_norm": 10.24344539642334, |
|
"learning_rate": 1.1165001743843885e-05, |
|
"loss": 1.889, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.5555555555555556, |
|
"grad_norm": 8.560635566711426, |
|
"learning_rate": 1.1113455106060671e-05, |
|
"loss": 1.9698, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.5668934240362812, |
|
"grad_norm": 9.784881591796875, |
|
"learning_rate": 1.1061908468277459e-05, |
|
"loss": 1.9718, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5782312925170068, |
|
"grad_norm": 11.199634552001953, |
|
"learning_rate": 1.1010361830494247e-05, |
|
"loss": 1.9762, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.5895691609977324, |
|
"grad_norm": 10.915511131286621, |
|
"learning_rate": 1.0958815192711034e-05, |
|
"loss": 1.9347, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.6009070294784581, |
|
"grad_norm": 13.744818687438965, |
|
"learning_rate": 1.090726855492782e-05, |
|
"loss": 2.0184, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.6122448979591837, |
|
"grad_norm": 11.262948036193848, |
|
"learning_rate": 1.0855721917144608e-05, |
|
"loss": 1.9323, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.6235827664399093, |
|
"grad_norm": 8.698514938354492, |
|
"learning_rate": 1.0804175279361396e-05, |
|
"loss": 1.917, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.6349206349206349, |
|
"grad_norm": 15.39905834197998, |
|
"learning_rate": 1.0752628641578182e-05, |
|
"loss": 1.9203, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.6462585034013606, |
|
"grad_norm": 9.898249626159668, |
|
"learning_rate": 1.070108200379497e-05, |
|
"loss": 1.9317, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.6575963718820862, |
|
"grad_norm": 9.889189720153809, |
|
"learning_rate": 1.0649535366011757e-05, |
|
"loss": 1.9203, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.6689342403628118, |
|
"grad_norm": 9.887190818786621, |
|
"learning_rate": 1.0597988728228545e-05, |
|
"loss": 1.8459, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.6802721088435374, |
|
"grad_norm": 9.046891212463379, |
|
"learning_rate": 1.054644209044533e-05, |
|
"loss": 1.983, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.691609977324263, |
|
"grad_norm": 11.797575950622559, |
|
"learning_rate": 1.0494895452662118e-05, |
|
"loss": 1.9428, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.7029478458049887, |
|
"grad_norm": 13.745688438415527, |
|
"learning_rate": 1.0443348814878906e-05, |
|
"loss": 1.9652, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 11.441299438476562, |
|
"learning_rate": 1.0391802177095692e-05, |
|
"loss": 1.9636, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.7256235827664399, |
|
"grad_norm": 11.024886131286621, |
|
"learning_rate": 1.034025553931248e-05, |
|
"loss": 1.9634, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.7369614512471655, |
|
"grad_norm": 18.117177963256836, |
|
"learning_rate": 1.0288708901529269e-05, |
|
"loss": 1.9059, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.7482993197278912, |
|
"grad_norm": 13.534732818603516, |
|
"learning_rate": 1.0237162263746055e-05, |
|
"loss": 1.8964, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.7596371882086168, |
|
"grad_norm": 11.099019050598145, |
|
"learning_rate": 1.0185615625962842e-05, |
|
"loss": 1.7963, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.7709750566893424, |
|
"grad_norm": 26.388456344604492, |
|
"learning_rate": 1.013406898817963e-05, |
|
"loss": 1.9809, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.782312925170068, |
|
"grad_norm": 12.548821449279785, |
|
"learning_rate": 1.0082522350396418e-05, |
|
"loss": 1.914, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.7936507936507936, |
|
"grad_norm": 11.160512924194336, |
|
"learning_rate": 1.0030975712613204e-05, |
|
"loss": 1.82, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.8049886621315193, |
|
"grad_norm": 13.189496994018555, |
|
"learning_rate": 9.979429074829991e-06, |
|
"loss": 1.835, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.8163265306122449, |
|
"grad_norm": 14.305792808532715, |
|
"learning_rate": 9.927882437046779e-06, |
|
"loss": 1.7968, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.8276643990929705, |
|
"grad_norm": 15.657683372497559, |
|
"learning_rate": 9.876335799263565e-06, |
|
"loss": 1.8773, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.8390022675736961, |
|
"grad_norm": 13.894340515136719, |
|
"learning_rate": 9.824789161480353e-06, |
|
"loss": 1.848, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.8503401360544217, |
|
"grad_norm": 18.09587860107422, |
|
"learning_rate": 9.77324252369714e-06, |
|
"loss": 1.9382, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.8616780045351474, |
|
"grad_norm": 14.405220985412598, |
|
"learning_rate": 9.721695885913928e-06, |
|
"loss": 1.973, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.873015873015873, |
|
"grad_norm": 12.452035903930664, |
|
"learning_rate": 9.670149248130714e-06, |
|
"loss": 1.83, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.8843537414965986, |
|
"grad_norm": 17.113231658935547, |
|
"learning_rate": 9.618602610347502e-06, |
|
"loss": 1.9733, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.8956916099773242, |
|
"grad_norm": 11.744027137756348, |
|
"learning_rate": 9.56705597256429e-06, |
|
"loss": 1.7322, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.9070294784580499, |
|
"grad_norm": 12.800016403198242, |
|
"learning_rate": 9.515509334781077e-06, |
|
"loss": 1.7837, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.9183673469387755, |
|
"grad_norm": 12.709762573242188, |
|
"learning_rate": 9.463962696997863e-06, |
|
"loss": 1.8111, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.9297052154195011, |
|
"grad_norm": 12.49414348602295, |
|
"learning_rate": 9.41241605921465e-06, |
|
"loss": 1.8696, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.9410430839002267, |
|
"grad_norm": 14.208767890930176, |
|
"learning_rate": 9.360869421431438e-06, |
|
"loss": 1.9039, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.9523809523809523, |
|
"grad_norm": 12.151473999023438, |
|
"learning_rate": 9.309322783648224e-06, |
|
"loss": 1.9351, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.963718820861678, |
|
"grad_norm": 15.71224594116211, |
|
"learning_rate": 9.257776145865012e-06, |
|
"loss": 1.7715, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.9750566893424036, |
|
"grad_norm": 12.460648536682129, |
|
"learning_rate": 9.2062295080818e-06, |
|
"loss": 1.8283, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.9863945578231292, |
|
"grad_norm": 14.45302963256836, |
|
"learning_rate": 9.154682870298587e-06, |
|
"loss": 1.8793, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.9977324263038548, |
|
"grad_norm": 16.95499610900879, |
|
"learning_rate": 9.103136232515373e-06, |
|
"loss": 1.7859, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_classification_report": { |
|
"accuracy": 0.31624214356599406, |
|
"ar": { |
|
"f1-score": 0.34932349323493234, |
|
"precision": 0.32494279176201374, |
|
"recall": 0.3776595744680851, |
|
"support": 376.0 |
|
}, |
|
"cl": { |
|
"f1-score": 0.2806361085126286, |
|
"precision": 0.30425963488843816, |
|
"recall": 0.2604166666666667, |
|
"support": 576.0 |
|
}, |
|
"co": { |
|
"f1-score": 0.29607250755287007, |
|
"precision": 0.3081761006289308, |
|
"recall": 0.28488372093023256, |
|
"support": 344.0 |
|
}, |
|
"es": { |
|
"f1-score": 0.3728115345005149, |
|
"precision": 0.42289719626168226, |
|
"recall": 0.3333333333333333, |
|
"support": 543.0 |
|
}, |
|
"macro avg": { |
|
"f1-score": 0.272621791856563, |
|
"precision": 0.30099582851554657, |
|
"recall": 0.2856216737927651, |
|
"support": 3023.0 |
|
}, |
|
"mx": { |
|
"f1-score": 0.35931307793923384, |
|
"precision": 0.26433430515063167, |
|
"recall": 0.5608247422680412, |
|
"support": 485.0 |
|
}, |
|
"pe": { |
|
"f1-score": 0.16205533596837945, |
|
"precision": 0.25949367088607594, |
|
"recall": 0.11781609195402298, |
|
"support": 348.0 |
|
}, |
|
"pr": { |
|
"f1-score": 0.5296803652968036, |
|
"precision": 0.4915254237288136, |
|
"recall": 0.5742574257425742, |
|
"support": 101.0 |
|
}, |
|
"uy": { |
|
"f1-score": 0.1037037037037037, |
|
"precision": 0.3333333333333333, |
|
"recall": 0.06140350877192982, |
|
"support": 228.0 |
|
}, |
|
"ve": { |
|
"f1-score": 0.0, |
|
"precision": 0.0, |
|
"recall": 0.0, |
|
"support": 22.0 |
|
}, |
|
"weighted avg": { |
|
"f1-score": 0.29939857719104257, |
|
"precision": 0.32326424043920743, |
|
"recall": 0.31624214356599406, |
|
"support": 3023.0 |
|
} |
|
}, |
|
"eval_f1": 0.272621791856563, |
|
"eval_loss": 1.826755166053772, |
|
"eval_runtime": 4.3231, |
|
"eval_samples_per_second": 699.273, |
|
"eval_steps_per_second": 87.438, |
|
"step": 882 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2646, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 115989931620288.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|