{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.9994373329582222, "eval_steps": 500, "global_step": 14216, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0002813335208890139, "grad_norm": 8.1875, "learning_rate": 5.625879043600562e-09, "loss": 1.9967, "step": 1 }, { "epoch": 0.0014066676044450696, "grad_norm": 8.25, "learning_rate": 2.8129395218002812e-08, "loss": 1.834, "step": 5 }, { "epoch": 0.0028133352088901393, "grad_norm": 7.8125, "learning_rate": 5.6258790436005624e-08, "loss": 1.9701, "step": 10 }, { "epoch": 0.004220002813335209, "grad_norm": 9.75, "learning_rate": 8.438818565400843e-08, "loss": 2.0358, "step": 15 }, { "epoch": 0.0056266704177802785, "grad_norm": 9.25, "learning_rate": 1.1251758087201125e-07, "loss": 2.1257, "step": 20 }, { "epoch": 0.007033338022225348, "grad_norm": 8.9375, "learning_rate": 1.4064697609001405e-07, "loss": 2.1333, "step": 25 }, { "epoch": 0.008440005626670417, "grad_norm": 7.9375, "learning_rate": 1.6877637130801686e-07, "loss": 2.0451, "step": 30 }, { "epoch": 0.009846673231115488, "grad_norm": 8.0625, "learning_rate": 1.9690576652601966e-07, "loss": 2.114, "step": 35 }, { "epoch": 0.011253340835560557, "grad_norm": 10.6875, "learning_rate": 2.250351617440225e-07, "loss": 2.0872, "step": 40 }, { "epoch": 0.012660008440005626, "grad_norm": 8.9375, "learning_rate": 2.5316455696202533e-07, "loss": 2.1654, "step": 45 }, { "epoch": 0.014066676044450697, "grad_norm": 15.0625, "learning_rate": 2.812939521800281e-07, "loss": 1.8529, "step": 50 }, { "epoch": 0.015473343648895766, "grad_norm": 15.1875, "learning_rate": 3.0942334739803094e-07, "loss": 2.0161, "step": 55 }, { "epoch": 0.016880011253340835, "grad_norm": 8.875, "learning_rate": 3.375527426160337e-07, "loss": 1.9371, "step": 60 }, { "epoch": 0.018286678857785905, "grad_norm": 10.0625, "learning_rate": 3.6568213783403655e-07, "loss": 1.7475, "step": 65 }, { "epoch": 0.019693346462230976, "grad_norm": 8.625, "learning_rate": 3.9381153305203933e-07, "loss": 2.1096, "step": 70 }, { "epoch": 0.021100014066676043, "grad_norm": 7.09375, "learning_rate": 4.2194092827004216e-07, "loss": 2.1636, "step": 75 }, { "epoch": 0.022506681671121114, "grad_norm": 18.375, "learning_rate": 4.50070323488045e-07, "loss": 1.888, "step": 80 }, { "epoch": 0.023913349275566185, "grad_norm": 9.6875, "learning_rate": 4.781997187060478e-07, "loss": 2.1679, "step": 85 }, { "epoch": 0.025320016880011252, "grad_norm": 8.5, "learning_rate": 5.063291139240507e-07, "loss": 1.8771, "step": 90 }, { "epoch": 0.026726684484456323, "grad_norm": 8.0625, "learning_rate": 5.344585091420533e-07, "loss": 1.9294, "step": 95 }, { "epoch": 0.028133352088901394, "grad_norm": 8.1875, "learning_rate": 5.625879043600562e-07, "loss": 1.8677, "step": 100 }, { "epoch": 0.02954001969334646, "grad_norm": 7.8125, "learning_rate": 5.907172995780591e-07, "loss": 1.9361, "step": 105 }, { "epoch": 0.03094668729779153, "grad_norm": 10.3125, "learning_rate": 6.188466947960619e-07, "loss": 2.1789, "step": 110 }, { "epoch": 0.0323533549022366, "grad_norm": 7.84375, "learning_rate": 6.469760900140648e-07, "loss": 2.0211, "step": 115 }, { "epoch": 0.03376002250668167, "grad_norm": 7.9375, "learning_rate": 6.751054852320674e-07, "loss": 2.1231, "step": 120 }, { "epoch": 0.03516669011112674, "grad_norm": 12.375, "learning_rate": 7.032348804500703e-07, "loss": 2.1527, "step": 125 }, { "epoch": 0.03657335771557181, "grad_norm": 14.6875, "learning_rate": 7.313642756680731e-07, "loss": 1.7367, "step": 130 }, { "epoch": 0.03798002532001688, "grad_norm": 8.3125, "learning_rate": 7.59493670886076e-07, "loss": 2.2069, "step": 135 }, { "epoch": 0.03938669292446195, "grad_norm": 7.40625, "learning_rate": 7.876230661040787e-07, "loss": 2.1692, "step": 140 }, { "epoch": 0.040793360528907016, "grad_norm": 10.1875, "learning_rate": 8.157524613220815e-07, "loss": 2.0095, "step": 145 }, { "epoch": 0.04220002813335209, "grad_norm": 8.3125, "learning_rate": 8.438818565400843e-07, "loss": 1.6281, "step": 150 }, { "epoch": 0.04360669573779716, "grad_norm": 7.625, "learning_rate": 8.720112517580872e-07, "loss": 1.759, "step": 155 }, { "epoch": 0.04501336334224223, "grad_norm": 8.75, "learning_rate": 9.0014064697609e-07, "loss": 1.9759, "step": 160 }, { "epoch": 0.0464200309466873, "grad_norm": 7.78125, "learning_rate": 9.282700421940928e-07, "loss": 2.0089, "step": 165 }, { "epoch": 0.04782669855113237, "grad_norm": 15.375, "learning_rate": 9.563994374120955e-07, "loss": 1.8533, "step": 170 }, { "epoch": 0.04923336615557744, "grad_norm": 11.625, "learning_rate": 9.845288326300985e-07, "loss": 2.0724, "step": 175 }, { "epoch": 0.050640033760022504, "grad_norm": 7.53125, "learning_rate": 1.0126582278481013e-06, "loss": 2.2394, "step": 180 }, { "epoch": 0.052046701364467575, "grad_norm": 7.21875, "learning_rate": 1.040787623066104e-06, "loss": 1.9738, "step": 185 }, { "epoch": 0.053453368968912646, "grad_norm": 7.34375, "learning_rate": 1.0689170182841067e-06, "loss": 2.0486, "step": 190 }, { "epoch": 0.054860036573357716, "grad_norm": 8.375, "learning_rate": 1.0970464135021096e-06, "loss": 1.88, "step": 195 }, { "epoch": 0.05626670417780279, "grad_norm": 7.90625, "learning_rate": 1.1251758087201124e-06, "loss": 1.9277, "step": 200 }, { "epoch": 0.05767337178224786, "grad_norm": 7.53125, "learning_rate": 1.1533052039381152e-06, "loss": 1.8934, "step": 205 }, { "epoch": 0.05908003938669292, "grad_norm": 7.0, "learning_rate": 1.1814345991561182e-06, "loss": 2.2194, "step": 210 }, { "epoch": 0.06048670699113799, "grad_norm": 6.375, "learning_rate": 1.209563994374121e-06, "loss": 1.7994, "step": 215 }, { "epoch": 0.06189337459558306, "grad_norm": 4.65625, "learning_rate": 1.2376933895921238e-06, "loss": 1.7983, "step": 220 }, { "epoch": 0.06330004220002813, "grad_norm": 5.46875, "learning_rate": 1.2658227848101265e-06, "loss": 1.9125, "step": 225 }, { "epoch": 0.0647067098044732, "grad_norm": 6.1875, "learning_rate": 1.2939521800281295e-06, "loss": 1.9858, "step": 230 }, { "epoch": 0.06611337740891828, "grad_norm": 5.21875, "learning_rate": 1.322081575246132e-06, "loss": 2.0835, "step": 235 }, { "epoch": 0.06752004501336334, "grad_norm": 5.625, "learning_rate": 1.3502109704641349e-06, "loss": 2.0088, "step": 240 }, { "epoch": 0.06892671261780842, "grad_norm": 4.6875, "learning_rate": 1.3783403656821376e-06, "loss": 2.0304, "step": 245 }, { "epoch": 0.07033338022225348, "grad_norm": 4.21875, "learning_rate": 1.4064697609001406e-06, "loss": 2.1096, "step": 250 }, { "epoch": 0.07174004782669856, "grad_norm": 9.25, "learning_rate": 1.4345991561181434e-06, "loss": 1.982, "step": 255 }, { "epoch": 0.07314671543114362, "grad_norm": 4.5625, "learning_rate": 1.4627285513361462e-06, "loss": 1.9651, "step": 260 }, { "epoch": 0.07455338303558869, "grad_norm": 4.4375, "learning_rate": 1.4908579465541492e-06, "loss": 1.8912, "step": 265 }, { "epoch": 0.07596005064003376, "grad_norm": 5.25, "learning_rate": 1.518987341772152e-06, "loss": 1.8922, "step": 270 }, { "epoch": 0.07736671824447883, "grad_norm": 4.65625, "learning_rate": 1.5471167369901545e-06, "loss": 2.033, "step": 275 }, { "epoch": 0.0787733858489239, "grad_norm": 8.6875, "learning_rate": 1.5752461322081573e-06, "loss": 1.8743, "step": 280 }, { "epoch": 0.08018005345336897, "grad_norm": 3.671875, "learning_rate": 1.6033755274261603e-06, "loss": 1.9284, "step": 285 }, { "epoch": 0.08158672105781403, "grad_norm": 4.5, "learning_rate": 1.631504922644163e-06, "loss": 1.8721, "step": 290 }, { "epoch": 0.08299338866225911, "grad_norm": 4.1875, "learning_rate": 1.6596343178621659e-06, "loss": 1.8106, "step": 295 }, { "epoch": 0.08440005626670417, "grad_norm": 5.03125, "learning_rate": 1.6877637130801686e-06, "loss": 1.8406, "step": 300 }, { "epoch": 0.08580672387114925, "grad_norm": 3.265625, "learning_rate": 1.7158931082981716e-06, "loss": 2.0746, "step": 305 }, { "epoch": 0.08721339147559432, "grad_norm": 5.40625, "learning_rate": 1.7440225035161744e-06, "loss": 1.7973, "step": 310 }, { "epoch": 0.08862005908003939, "grad_norm": 5.25, "learning_rate": 1.772151898734177e-06, "loss": 1.9815, "step": 315 }, { "epoch": 0.09002672668448446, "grad_norm": 3.328125, "learning_rate": 1.80028129395218e-06, "loss": 1.9967, "step": 320 }, { "epoch": 0.09143339428892952, "grad_norm": 2.6875, "learning_rate": 1.8284106891701827e-06, "loss": 1.8916, "step": 325 }, { "epoch": 0.0928400618933746, "grad_norm": 5.25, "learning_rate": 1.8565400843881855e-06, "loss": 1.686, "step": 330 }, { "epoch": 0.09424672949781966, "grad_norm": 4.28125, "learning_rate": 1.8846694796061883e-06, "loss": 2.0456, "step": 335 }, { "epoch": 0.09565339710226474, "grad_norm": 3.421875, "learning_rate": 1.912798874824191e-06, "loss": 2.115, "step": 340 }, { "epoch": 0.0970600647067098, "grad_norm": 3.828125, "learning_rate": 1.940928270042194e-06, "loss": 1.8874, "step": 345 }, { "epoch": 0.09846673231115488, "grad_norm": 3.015625, "learning_rate": 1.969057665260197e-06, "loss": 2.0844, "step": 350 }, { "epoch": 0.09987339991559994, "grad_norm": 3.21875, "learning_rate": 1.9971870604782e-06, "loss": 1.8196, "step": 355 }, { "epoch": 0.10128006752004501, "grad_norm": 2.90625, "learning_rate": 2.0253164556962026e-06, "loss": 1.9226, "step": 360 }, { "epoch": 0.10268673512449009, "grad_norm": 4.75, "learning_rate": 2.0534458509142054e-06, "loss": 1.8721, "step": 365 }, { "epoch": 0.10409340272893515, "grad_norm": 3.984375, "learning_rate": 2.081575246132208e-06, "loss": 1.8233, "step": 370 }, { "epoch": 0.10550007033338023, "grad_norm": 4.25, "learning_rate": 2.109704641350211e-06, "loss": 1.967, "step": 375 }, { "epoch": 0.10690673793782529, "grad_norm": 3.625, "learning_rate": 2.1378340365682133e-06, "loss": 1.9782, "step": 380 }, { "epoch": 0.10831340554227036, "grad_norm": 3.5625, "learning_rate": 2.1659634317862165e-06, "loss": 2.1084, "step": 385 }, { "epoch": 0.10972007314671543, "grad_norm": 3.640625, "learning_rate": 2.1940928270042193e-06, "loss": 2.2131, "step": 390 }, { "epoch": 0.1111267407511605, "grad_norm": 4.0625, "learning_rate": 2.222222222222222e-06, "loss": 1.8195, "step": 395 }, { "epoch": 0.11253340835560557, "grad_norm": 4.15625, "learning_rate": 2.250351617440225e-06, "loss": 1.9049, "step": 400 }, { "epoch": 0.11394007596005064, "grad_norm": 4.28125, "learning_rate": 2.278481012658228e-06, "loss": 1.9102, "step": 405 }, { "epoch": 0.11534674356449572, "grad_norm": 5.875, "learning_rate": 2.3066104078762304e-06, "loss": 1.8624, "step": 410 }, { "epoch": 0.11675341116894078, "grad_norm": 2.828125, "learning_rate": 2.3347398030942336e-06, "loss": 1.7784, "step": 415 }, { "epoch": 0.11816007877338584, "grad_norm": 7.59375, "learning_rate": 2.3628691983122364e-06, "loss": 1.9591, "step": 420 }, { "epoch": 0.11956674637783092, "grad_norm": 4.3125, "learning_rate": 2.3909985935302387e-06, "loss": 1.7737, "step": 425 }, { "epoch": 0.12097341398227598, "grad_norm": 2.4375, "learning_rate": 2.419127988748242e-06, "loss": 1.7564, "step": 430 }, { "epoch": 0.12238008158672106, "grad_norm": 2.9375, "learning_rate": 2.4472573839662443e-06, "loss": 1.8436, "step": 435 }, { "epoch": 0.12378674919116613, "grad_norm": 2.21875, "learning_rate": 2.4753867791842475e-06, "loss": 1.9585, "step": 440 }, { "epoch": 0.1251934167956112, "grad_norm": 5.0625, "learning_rate": 2.5035161744022503e-06, "loss": 1.7232, "step": 445 }, { "epoch": 0.12660008440005627, "grad_norm": 3.03125, "learning_rate": 2.531645569620253e-06, "loss": 1.9946, "step": 450 }, { "epoch": 0.12800675200450135, "grad_norm": 2.765625, "learning_rate": 2.559774964838256e-06, "loss": 1.9282, "step": 455 }, { "epoch": 0.1294134196089464, "grad_norm": 4.1875, "learning_rate": 2.587904360056259e-06, "loss": 1.7587, "step": 460 }, { "epoch": 0.13082008721339147, "grad_norm": 4.75, "learning_rate": 2.6160337552742614e-06, "loss": 1.785, "step": 465 }, { "epoch": 0.13222675481783655, "grad_norm": 3.375, "learning_rate": 2.644163150492264e-06, "loss": 1.8986, "step": 470 }, { "epoch": 0.13363342242228163, "grad_norm": 4.8125, "learning_rate": 2.6722925457102674e-06, "loss": 1.7295, "step": 475 }, { "epoch": 0.13504009002672668, "grad_norm": 2.546875, "learning_rate": 2.7004219409282697e-06, "loss": 1.9288, "step": 480 }, { "epoch": 0.13644675763117176, "grad_norm": 3.4375, "learning_rate": 2.728551336146273e-06, "loss": 1.7269, "step": 485 }, { "epoch": 0.13785342523561683, "grad_norm": 3.296875, "learning_rate": 2.7566807313642753e-06, "loss": 1.4669, "step": 490 }, { "epoch": 0.13926009284006188, "grad_norm": 3.84375, "learning_rate": 2.7848101265822785e-06, "loss": 1.9001, "step": 495 }, { "epoch": 0.14066676044450696, "grad_norm": 3.34375, "learning_rate": 2.8129395218002813e-06, "loss": 1.7307, "step": 500 }, { "epoch": 0.14207342804895204, "grad_norm": 4.4375, "learning_rate": 2.8410689170182836e-06, "loss": 1.8674, "step": 505 }, { "epoch": 0.14348009565339712, "grad_norm": 12.375, "learning_rate": 2.869198312236287e-06, "loss": 1.9095, "step": 510 }, { "epoch": 0.14488676325784217, "grad_norm": 2.53125, "learning_rate": 2.8973277074542896e-06, "loss": 1.8201, "step": 515 }, { "epoch": 0.14629343086228724, "grad_norm": 3.625, "learning_rate": 2.9254571026722924e-06, "loss": 1.9156, "step": 520 }, { "epoch": 0.14770009846673232, "grad_norm": 3.96875, "learning_rate": 2.953586497890295e-06, "loss": 1.7277, "step": 525 }, { "epoch": 0.14910676607117737, "grad_norm": 4.25, "learning_rate": 2.9817158931082984e-06, "loss": 1.7154, "step": 530 }, { "epoch": 0.15051343367562245, "grad_norm": 3.734375, "learning_rate": 3.0098452883263007e-06, "loss": 1.7168, "step": 535 }, { "epoch": 0.15192010128006753, "grad_norm": 4.1875, "learning_rate": 3.037974683544304e-06, "loss": 2.0013, "step": 540 }, { "epoch": 0.15332676888451258, "grad_norm": 2.890625, "learning_rate": 3.0661040787623063e-06, "loss": 1.8941, "step": 545 }, { "epoch": 0.15473343648895765, "grad_norm": 3.046875, "learning_rate": 3.094233473980309e-06, "loss": 1.682, "step": 550 }, { "epoch": 0.15614010409340273, "grad_norm": 5.75, "learning_rate": 3.1223628691983123e-06, "loss": 1.7576, "step": 555 }, { "epoch": 0.1575467716978478, "grad_norm": 4.71875, "learning_rate": 3.1504922644163146e-06, "loss": 1.8123, "step": 560 }, { "epoch": 0.15895343930229286, "grad_norm": 3.3125, "learning_rate": 3.178621659634318e-06, "loss": 1.6807, "step": 565 }, { "epoch": 0.16036010690673794, "grad_norm": 2.859375, "learning_rate": 3.2067510548523206e-06, "loss": 1.6907, "step": 570 }, { "epoch": 0.16176677451118301, "grad_norm": 3.0625, "learning_rate": 3.2348804500703234e-06, "loss": 1.892, "step": 575 }, { "epoch": 0.16317344211562806, "grad_norm": 2.625, "learning_rate": 3.263009845288326e-06, "loss": 2.0725, "step": 580 }, { "epoch": 0.16458010972007314, "grad_norm": 4.5, "learning_rate": 3.2911392405063294e-06, "loss": 1.8148, "step": 585 }, { "epoch": 0.16598677732451822, "grad_norm": 3.71875, "learning_rate": 3.3192686357243317e-06, "loss": 1.7494, "step": 590 }, { "epoch": 0.1673934449289633, "grad_norm": 4.53125, "learning_rate": 3.3473980309423345e-06, "loss": 1.8697, "step": 595 }, { "epoch": 0.16880011253340835, "grad_norm": 4.65625, "learning_rate": 3.3755274261603373e-06, "loss": 1.9731, "step": 600 }, { "epoch": 0.17020678013785343, "grad_norm": 2.859375, "learning_rate": 3.40365682137834e-06, "loss": 1.7131, "step": 605 }, { "epoch": 0.1716134477422985, "grad_norm": 3.46875, "learning_rate": 3.4317862165963433e-06, "loss": 1.8335, "step": 610 }, { "epoch": 0.17302011534674355, "grad_norm": 2.625, "learning_rate": 3.4599156118143456e-06, "loss": 1.8878, "step": 615 }, { "epoch": 0.17442678295118863, "grad_norm": 4.5, "learning_rate": 3.488045007032349e-06, "loss": 1.6365, "step": 620 }, { "epoch": 0.1758334505556337, "grad_norm": 5.28125, "learning_rate": 3.5161744022503516e-06, "loss": 1.8077, "step": 625 }, { "epoch": 0.17724011816007879, "grad_norm": 3.78125, "learning_rate": 3.544303797468354e-06, "loss": 1.9373, "step": 630 }, { "epoch": 0.17864678576452384, "grad_norm": 3.03125, "learning_rate": 3.572433192686357e-06, "loss": 1.8536, "step": 635 }, { "epoch": 0.1800534533689689, "grad_norm": 4.03125, "learning_rate": 3.60056258790436e-06, "loss": 1.7992, "step": 640 }, { "epoch": 0.181460120973414, "grad_norm": 2.65625, "learning_rate": 3.6286919831223627e-06, "loss": 1.7746, "step": 645 }, { "epoch": 0.18286678857785904, "grad_norm": 3.125, "learning_rate": 3.6568213783403655e-06, "loss": 2.0007, "step": 650 }, { "epoch": 0.18427345618230412, "grad_norm": 3.15625, "learning_rate": 3.6849507735583683e-06, "loss": 1.8947, "step": 655 }, { "epoch": 0.1856801237867492, "grad_norm": 3.484375, "learning_rate": 3.713080168776371e-06, "loss": 1.8504, "step": 660 }, { "epoch": 0.18708679139119427, "grad_norm": 3.15625, "learning_rate": 3.7412095639943743e-06, "loss": 1.7796, "step": 665 }, { "epoch": 0.18849345899563932, "grad_norm": 3.296875, "learning_rate": 3.7693389592123766e-06, "loss": 1.7598, "step": 670 }, { "epoch": 0.1899001266000844, "grad_norm": 2.765625, "learning_rate": 3.7974683544303794e-06, "loss": 1.6422, "step": 675 }, { "epoch": 0.19130679420452948, "grad_norm": 3.546875, "learning_rate": 3.825597749648382e-06, "loss": 1.8552, "step": 680 }, { "epoch": 0.19271346180897453, "grad_norm": 3.921875, "learning_rate": 3.853727144866385e-06, "loss": 1.5753, "step": 685 }, { "epoch": 0.1941201294134196, "grad_norm": 3.421875, "learning_rate": 3.881856540084388e-06, "loss": 1.7207, "step": 690 }, { "epoch": 0.19552679701786468, "grad_norm": 5.21875, "learning_rate": 3.909985935302391e-06, "loss": 1.5458, "step": 695 }, { "epoch": 0.19693346462230976, "grad_norm": 4.03125, "learning_rate": 3.938115330520394e-06, "loss": 1.6871, "step": 700 }, { "epoch": 0.1983401322267548, "grad_norm": 3.328125, "learning_rate": 3.9662447257383965e-06, "loss": 1.9603, "step": 705 }, { "epoch": 0.1997467998311999, "grad_norm": 4.0625, "learning_rate": 3.9943741209564e-06, "loss": 1.6939, "step": 710 }, { "epoch": 0.20115346743564497, "grad_norm": 4.03125, "learning_rate": 4.022503516174402e-06, "loss": 1.7818, "step": 715 }, { "epoch": 0.20256013504009002, "grad_norm": 3.515625, "learning_rate": 4.050632911392405e-06, "loss": 1.7131, "step": 720 }, { "epoch": 0.2039668026445351, "grad_norm": 2.984375, "learning_rate": 4.078762306610408e-06, "loss": 1.8219, "step": 725 }, { "epoch": 0.20537347024898017, "grad_norm": 3.65625, "learning_rate": 4.106891701828411e-06, "loss": 1.6883, "step": 730 }, { "epoch": 0.20678013785342522, "grad_norm": 3.453125, "learning_rate": 4.135021097046413e-06, "loss": 1.8892, "step": 735 }, { "epoch": 0.2081868054578703, "grad_norm": 4.0625, "learning_rate": 4.163150492264416e-06, "loss": 1.6866, "step": 740 }, { "epoch": 0.20959347306231538, "grad_norm": 3.296875, "learning_rate": 4.191279887482419e-06, "loss": 1.4289, "step": 745 }, { "epoch": 0.21100014066676046, "grad_norm": 3.734375, "learning_rate": 4.219409282700422e-06, "loss": 1.5471, "step": 750 }, { "epoch": 0.2124068082712055, "grad_norm": 3.59375, "learning_rate": 4.247538677918425e-06, "loss": 1.9359, "step": 755 }, { "epoch": 0.21381347587565058, "grad_norm": 3.390625, "learning_rate": 4.275668073136427e-06, "loss": 1.9177, "step": 760 }, { "epoch": 0.21522014348009566, "grad_norm": 3.53125, "learning_rate": 4.30379746835443e-06, "loss": 1.9106, "step": 765 }, { "epoch": 0.2166268110845407, "grad_norm": 3.375, "learning_rate": 4.331926863572433e-06, "loss": 1.9653, "step": 770 }, { "epoch": 0.2180334786889858, "grad_norm": 3.09375, "learning_rate": 4.360056258790436e-06, "loss": 1.7532, "step": 775 }, { "epoch": 0.21944014629343087, "grad_norm": 3.09375, "learning_rate": 4.388185654008439e-06, "loss": 1.8319, "step": 780 }, { "epoch": 0.22084681389787594, "grad_norm": 3.140625, "learning_rate": 4.416315049226442e-06, "loss": 2.0233, "step": 785 }, { "epoch": 0.222253481502321, "grad_norm": 2.71875, "learning_rate": 4.444444444444444e-06, "loss": 1.9903, "step": 790 }, { "epoch": 0.22366014910676607, "grad_norm": 2.4375, "learning_rate": 4.4725738396624465e-06, "loss": 2.0019, "step": 795 }, { "epoch": 0.22506681671121115, "grad_norm": 3.171875, "learning_rate": 4.50070323488045e-06, "loss": 1.7976, "step": 800 }, { "epoch": 0.2264734843156562, "grad_norm": 3.453125, "learning_rate": 4.528832630098453e-06, "loss": 1.8314, "step": 805 }, { "epoch": 0.22788015192010128, "grad_norm": 3.046875, "learning_rate": 4.556962025316456e-06, "loss": 1.7646, "step": 810 }, { "epoch": 0.22928681952454635, "grad_norm": 9.9375, "learning_rate": 4.585091420534458e-06, "loss": 1.5575, "step": 815 }, { "epoch": 0.23069348712899143, "grad_norm": 4.5625, "learning_rate": 4.613220815752461e-06, "loss": 1.4394, "step": 820 }, { "epoch": 0.23210015473343648, "grad_norm": 3.5625, "learning_rate": 4.641350210970464e-06, "loss": 1.6944, "step": 825 }, { "epoch": 0.23350682233788156, "grad_norm": 3.625, "learning_rate": 4.669479606188467e-06, "loss": 1.9698, "step": 830 }, { "epoch": 0.23491348994232664, "grad_norm": 2.46875, "learning_rate": 4.69760900140647e-06, "loss": 1.8369, "step": 835 }, { "epoch": 0.2363201575467717, "grad_norm": 2.84375, "learning_rate": 4.725738396624473e-06, "loss": 2.0049, "step": 840 }, { "epoch": 0.23772682515121676, "grad_norm": 4.90625, "learning_rate": 4.753867791842475e-06, "loss": 1.6301, "step": 845 }, { "epoch": 0.23913349275566184, "grad_norm": 3.28125, "learning_rate": 4.7819971870604775e-06, "loss": 2.0206, "step": 850 }, { "epoch": 0.24054016036010692, "grad_norm": 3.671875, "learning_rate": 4.810126582278481e-06, "loss": 1.9102, "step": 855 }, { "epoch": 0.24194682796455197, "grad_norm": 2.515625, "learning_rate": 4.838255977496484e-06, "loss": 1.5072, "step": 860 }, { "epoch": 0.24335349556899705, "grad_norm": 3.265625, "learning_rate": 4.866385372714487e-06, "loss": 1.5767, "step": 865 }, { "epoch": 0.24476016317344212, "grad_norm": 3.140625, "learning_rate": 4.894514767932489e-06, "loss": 1.979, "step": 870 }, { "epoch": 0.24616683077788717, "grad_norm": 3.71875, "learning_rate": 4.922644163150492e-06, "loss": 1.7614, "step": 875 }, { "epoch": 0.24757349838233225, "grad_norm": 2.703125, "learning_rate": 4.950773558368495e-06, "loss": 1.7441, "step": 880 }, { "epoch": 0.24898016598677733, "grad_norm": 3.546875, "learning_rate": 4.978902953586497e-06, "loss": 1.7426, "step": 885 }, { "epoch": 0.2503868335912224, "grad_norm": 4.6875, "learning_rate": 5.0070323488045006e-06, "loss": 1.7325, "step": 890 }, { "epoch": 0.25179350119566746, "grad_norm": 3.609375, "learning_rate": 5.035161744022504e-06, "loss": 1.8025, "step": 895 }, { "epoch": 0.25320016880011254, "grad_norm": 4.5, "learning_rate": 5.063291139240506e-06, "loss": 1.7743, "step": 900 }, { "epoch": 0.2546068364045576, "grad_norm": 4.125, "learning_rate": 5.0914205344585085e-06, "loss": 1.5968, "step": 905 }, { "epoch": 0.2560135040090027, "grad_norm": 11.3125, "learning_rate": 5.119549929676512e-06, "loss": 1.7051, "step": 910 }, { "epoch": 0.25742017161344777, "grad_norm": 3.09375, "learning_rate": 5.147679324894515e-06, "loss": 1.7582, "step": 915 }, { "epoch": 0.2588268392178928, "grad_norm": 3.171875, "learning_rate": 5.175808720112518e-06, "loss": 1.7175, "step": 920 }, { "epoch": 0.26023350682233787, "grad_norm": 4.0625, "learning_rate": 5.20393811533052e-06, "loss": 1.7261, "step": 925 }, { "epoch": 0.26164017442678295, "grad_norm": 2.640625, "learning_rate": 5.232067510548523e-06, "loss": 1.7149, "step": 930 }, { "epoch": 0.263046842031228, "grad_norm": 3.28125, "learning_rate": 5.260196905766526e-06, "loss": 1.7658, "step": 935 }, { "epoch": 0.2644535096356731, "grad_norm": 3.296875, "learning_rate": 5.288326300984528e-06, "loss": 1.691, "step": 940 }, { "epoch": 0.2658601772401182, "grad_norm": 2.96875, "learning_rate": 5.3164556962025316e-06, "loss": 2.1113, "step": 945 }, { "epoch": 0.26726684484456326, "grad_norm": 2.671875, "learning_rate": 5.344585091420535e-06, "loss": 1.9024, "step": 950 }, { "epoch": 0.2686735124490083, "grad_norm": 4.875, "learning_rate": 5.372714486638537e-06, "loss": 1.8391, "step": 955 }, { "epoch": 0.27008018005345336, "grad_norm": 3.53125, "learning_rate": 5.4008438818565395e-06, "loss": 1.607, "step": 960 }, { "epoch": 0.27148684765789843, "grad_norm": 4.5625, "learning_rate": 5.428973277074543e-06, "loss": 1.8196, "step": 965 }, { "epoch": 0.2728935152623435, "grad_norm": 3.203125, "learning_rate": 5.457102672292546e-06, "loss": 1.6765, "step": 970 }, { "epoch": 0.2743001828667886, "grad_norm": 3.015625, "learning_rate": 5.485232067510548e-06, "loss": 1.8108, "step": 975 }, { "epoch": 0.27570685047123367, "grad_norm": 2.84375, "learning_rate": 5.513361462728551e-06, "loss": 1.8147, "step": 980 }, { "epoch": 0.27711351807567874, "grad_norm": 3.296875, "learning_rate": 5.541490857946554e-06, "loss": 1.936, "step": 985 }, { "epoch": 0.27852018568012377, "grad_norm": 3.8125, "learning_rate": 5.569620253164557e-06, "loss": 1.781, "step": 990 }, { "epoch": 0.27992685328456884, "grad_norm": 3.84375, "learning_rate": 5.597749648382559e-06, "loss": 1.4884, "step": 995 }, { "epoch": 0.2813335208890139, "grad_norm": 2.359375, "learning_rate": 5.6258790436005626e-06, "loss": 1.472, "step": 1000 }, { "epoch": 0.282740188493459, "grad_norm": 2.703125, "learning_rate": 5.654008438818566e-06, "loss": 1.4928, "step": 1005 }, { "epoch": 0.2841468560979041, "grad_norm": 3.203125, "learning_rate": 5.682137834036567e-06, "loss": 1.6619, "step": 1010 }, { "epoch": 0.28555352370234915, "grad_norm": 3.5625, "learning_rate": 5.7102672292545705e-06, "loss": 2.0167, "step": 1015 }, { "epoch": 0.28696019130679423, "grad_norm": 3.328125, "learning_rate": 5.738396624472574e-06, "loss": 1.7161, "step": 1020 }, { "epoch": 0.28836685891123925, "grad_norm": 3.765625, "learning_rate": 5.766526019690577e-06, "loss": 1.9748, "step": 1025 }, { "epoch": 0.28977352651568433, "grad_norm": 4.59375, "learning_rate": 5.794655414908579e-06, "loss": 1.7424, "step": 1030 }, { "epoch": 0.2911801941201294, "grad_norm": 2.625, "learning_rate": 5.822784810126582e-06, "loss": 1.6562, "step": 1035 }, { "epoch": 0.2925868617245745, "grad_norm": 2.90625, "learning_rate": 5.850914205344585e-06, "loss": 1.7536, "step": 1040 }, { "epoch": 0.29399352932901957, "grad_norm": 2.4375, "learning_rate": 5.879043600562588e-06, "loss": 1.8031, "step": 1045 }, { "epoch": 0.29540019693346464, "grad_norm": 2.953125, "learning_rate": 5.90717299578059e-06, "loss": 1.6396, "step": 1050 }, { "epoch": 0.29680686453790966, "grad_norm": 2.78125, "learning_rate": 5.9353023909985935e-06, "loss": 1.7845, "step": 1055 }, { "epoch": 0.29821353214235474, "grad_norm": 2.96875, "learning_rate": 5.963431786216597e-06, "loss": 1.9526, "step": 1060 }, { "epoch": 0.2996201997467998, "grad_norm": 2.734375, "learning_rate": 5.991561181434598e-06, "loss": 1.5986, "step": 1065 }, { "epoch": 0.3010268673512449, "grad_norm": 3.765625, "learning_rate": 6.0196905766526015e-06, "loss": 1.7045, "step": 1070 }, { "epoch": 0.30243353495569, "grad_norm": 4.28125, "learning_rate": 6.047819971870605e-06, "loss": 1.6006, "step": 1075 }, { "epoch": 0.30384020256013505, "grad_norm": 2.328125, "learning_rate": 6.075949367088608e-06, "loss": 1.8305, "step": 1080 }, { "epoch": 0.30524687016458013, "grad_norm": 4.9375, "learning_rate": 6.10407876230661e-06, "loss": 1.6308, "step": 1085 }, { "epoch": 0.30665353776902515, "grad_norm": 2.375, "learning_rate": 6.132208157524613e-06, "loss": 1.7865, "step": 1090 }, { "epoch": 0.30806020537347023, "grad_norm": 2.734375, "learning_rate": 6.160337552742616e-06, "loss": 1.8331, "step": 1095 }, { "epoch": 0.3094668729779153, "grad_norm": 3.015625, "learning_rate": 6.188466947960618e-06, "loss": 1.7786, "step": 1100 }, { "epoch": 0.3108735405823604, "grad_norm": 3.0, "learning_rate": 6.216596343178621e-06, "loss": 2.0003, "step": 1105 }, { "epoch": 0.31228020818680546, "grad_norm": 2.84375, "learning_rate": 6.2447257383966245e-06, "loss": 1.824, "step": 1110 }, { "epoch": 0.31368687579125054, "grad_norm": 3.296875, "learning_rate": 6.272855133614628e-06, "loss": 1.8362, "step": 1115 }, { "epoch": 0.3150935433956956, "grad_norm": 3.921875, "learning_rate": 6.300984528832629e-06, "loss": 1.6952, "step": 1120 }, { "epoch": 0.31650021100014064, "grad_norm": 5.84375, "learning_rate": 6.3291139240506325e-06, "loss": 1.5749, "step": 1125 }, { "epoch": 0.3179068786045857, "grad_norm": 3.21875, "learning_rate": 6.357243319268636e-06, "loss": 1.5969, "step": 1130 }, { "epoch": 0.3193135462090308, "grad_norm": 4.375, "learning_rate": 6.385372714486638e-06, "loss": 1.5812, "step": 1135 }, { "epoch": 0.3207202138134759, "grad_norm": 2.328125, "learning_rate": 6.413502109704641e-06, "loss": 1.9075, "step": 1140 }, { "epoch": 0.32212688141792095, "grad_norm": 2.59375, "learning_rate": 6.4416315049226436e-06, "loss": 1.9003, "step": 1145 }, { "epoch": 0.32353354902236603, "grad_norm": 3.046875, "learning_rate": 6.469760900140647e-06, "loss": 1.7614, "step": 1150 }, { "epoch": 0.3249402166268111, "grad_norm": 4.9375, "learning_rate": 6.497890295358649e-06, "loss": 1.812, "step": 1155 }, { "epoch": 0.32634688423125613, "grad_norm": 3.234375, "learning_rate": 6.526019690576652e-06, "loss": 1.6565, "step": 1160 }, { "epoch": 0.3277535518357012, "grad_norm": 3.140625, "learning_rate": 6.5541490857946555e-06, "loss": 1.9087, "step": 1165 }, { "epoch": 0.3291602194401463, "grad_norm": 3.375, "learning_rate": 6.582278481012659e-06, "loss": 1.6842, "step": 1170 }, { "epoch": 0.33056688704459136, "grad_norm": 4.09375, "learning_rate": 6.61040787623066e-06, "loss": 1.4252, "step": 1175 }, { "epoch": 0.33197355464903644, "grad_norm": 3.203125, "learning_rate": 6.6385372714486634e-06, "loss": 1.6601, "step": 1180 }, { "epoch": 0.3333802222534815, "grad_norm": 2.59375, "learning_rate": 6.666666666666667e-06, "loss": 1.7866, "step": 1185 }, { "epoch": 0.3347868898579266, "grad_norm": 3.109375, "learning_rate": 6.694796061884669e-06, "loss": 1.6193, "step": 1190 }, { "epoch": 0.3361935574623716, "grad_norm": 3.234375, "learning_rate": 6.722925457102672e-06, "loss": 1.6185, "step": 1195 }, { "epoch": 0.3376002250668167, "grad_norm": 3.296875, "learning_rate": 6.7510548523206746e-06, "loss": 1.7356, "step": 1200 }, { "epoch": 0.3390068926712618, "grad_norm": 2.78125, "learning_rate": 6.779184247538678e-06, "loss": 1.8204, "step": 1205 }, { "epoch": 0.34041356027570685, "grad_norm": 3.203125, "learning_rate": 6.80731364275668e-06, "loss": 1.6249, "step": 1210 }, { "epoch": 0.34182022788015193, "grad_norm": 2.890625, "learning_rate": 6.835443037974683e-06, "loss": 1.8842, "step": 1215 }, { "epoch": 0.343226895484597, "grad_norm": 2.53125, "learning_rate": 6.8635724331926865e-06, "loss": 1.7801, "step": 1220 }, { "epoch": 0.3446335630890421, "grad_norm": 3.921875, "learning_rate": 6.891701828410689e-06, "loss": 1.4851, "step": 1225 }, { "epoch": 0.3460402306934871, "grad_norm": 3.125, "learning_rate": 6.919831223628691e-06, "loss": 1.7178, "step": 1230 }, { "epoch": 0.3474468982979322, "grad_norm": 2.109375, "learning_rate": 6.9479606188466944e-06, "loss": 1.6033, "step": 1235 }, { "epoch": 0.34885356590237726, "grad_norm": 3.203125, "learning_rate": 6.976090014064698e-06, "loss": 1.7969, "step": 1240 }, { "epoch": 0.35026023350682234, "grad_norm": 2.515625, "learning_rate": 7.0042194092827e-06, "loss": 1.8187, "step": 1245 }, { "epoch": 0.3516669011112674, "grad_norm": 2.859375, "learning_rate": 7.032348804500703e-06, "loss": 1.819, "step": 1250 }, { "epoch": 0.3530735687157125, "grad_norm": 2.578125, "learning_rate": 7.0604781997187056e-06, "loss": 1.7712, "step": 1255 }, { "epoch": 0.35448023632015757, "grad_norm": 5.375, "learning_rate": 7.088607594936708e-06, "loss": 1.7155, "step": 1260 }, { "epoch": 0.3558869039246026, "grad_norm": 3.296875, "learning_rate": 7.116736990154711e-06, "loss": 1.6361, "step": 1265 }, { "epoch": 0.35729357152904767, "grad_norm": 3.796875, "learning_rate": 7.144866385372714e-06, "loss": 1.7478, "step": 1270 }, { "epoch": 0.35870023913349275, "grad_norm": 3.0, "learning_rate": 7.1729957805907175e-06, "loss": 1.844, "step": 1275 }, { "epoch": 0.3601069067379378, "grad_norm": 3.015625, "learning_rate": 7.20112517580872e-06, "loss": 1.6414, "step": 1280 }, { "epoch": 0.3615135743423829, "grad_norm": 2.578125, "learning_rate": 7.229254571026722e-06, "loss": 1.7427, "step": 1285 }, { "epoch": 0.362920241946828, "grad_norm": 2.359375, "learning_rate": 7.2573839662447254e-06, "loss": 1.7395, "step": 1290 }, { "epoch": 0.36432690955127306, "grad_norm": 2.59375, "learning_rate": 7.285513361462729e-06, "loss": 1.8215, "step": 1295 }, { "epoch": 0.3657335771557181, "grad_norm": 3.515625, "learning_rate": 7.313642756680731e-06, "loss": 1.7573, "step": 1300 }, { "epoch": 0.36714024476016316, "grad_norm": 3.203125, "learning_rate": 7.341772151898734e-06, "loss": 1.841, "step": 1305 }, { "epoch": 0.36854691236460824, "grad_norm": 3.015625, "learning_rate": 7.3699015471167365e-06, "loss": 1.842, "step": 1310 }, { "epoch": 0.3699535799690533, "grad_norm": 2.875, "learning_rate": 7.398030942334739e-06, "loss": 2.0199, "step": 1315 }, { "epoch": 0.3713602475734984, "grad_norm": 3.28125, "learning_rate": 7.426160337552742e-06, "loss": 1.8881, "step": 1320 }, { "epoch": 0.37276691517794347, "grad_norm": 2.640625, "learning_rate": 7.454289732770745e-06, "loss": 1.7692, "step": 1325 }, { "epoch": 0.37417358278238855, "grad_norm": 4.84375, "learning_rate": 7.4824191279887485e-06, "loss": 1.8311, "step": 1330 }, { "epoch": 0.37558025038683357, "grad_norm": 5.34375, "learning_rate": 7.510548523206751e-06, "loss": 1.7703, "step": 1335 }, { "epoch": 0.37698691799127865, "grad_norm": 2.59375, "learning_rate": 7.538677918424753e-06, "loss": 1.7416, "step": 1340 }, { "epoch": 0.3783935855957237, "grad_norm": 3.109375, "learning_rate": 7.566807313642756e-06, "loss": 1.7398, "step": 1345 }, { "epoch": 0.3798002532001688, "grad_norm": 2.875, "learning_rate": 7.594936708860759e-06, "loss": 1.571, "step": 1350 }, { "epoch": 0.3812069208046139, "grad_norm": 3.34375, "learning_rate": 7.623066104078762e-06, "loss": 1.4582, "step": 1355 }, { "epoch": 0.38261358840905896, "grad_norm": 3.609375, "learning_rate": 7.651195499296764e-06, "loss": 1.981, "step": 1360 }, { "epoch": 0.38402025601350404, "grad_norm": 4.6875, "learning_rate": 7.679324894514768e-06, "loss": 1.611, "step": 1365 }, { "epoch": 0.38542692361794906, "grad_norm": 2.828125, "learning_rate": 7.70745428973277e-06, "loss": 1.7369, "step": 1370 }, { "epoch": 0.38683359122239414, "grad_norm": 3.234375, "learning_rate": 7.735583684950773e-06, "loss": 2.081, "step": 1375 }, { "epoch": 0.3882402588268392, "grad_norm": 3.34375, "learning_rate": 7.763713080168775e-06, "loss": 1.5745, "step": 1380 }, { "epoch": 0.3896469264312843, "grad_norm": 2.1875, "learning_rate": 7.791842475386778e-06, "loss": 1.7473, "step": 1385 }, { "epoch": 0.39105359403572937, "grad_norm": 2.71875, "learning_rate": 7.819971870604782e-06, "loss": 1.7226, "step": 1390 }, { "epoch": 0.39246026164017445, "grad_norm": 3.40625, "learning_rate": 7.848101265822784e-06, "loss": 1.5063, "step": 1395 }, { "epoch": 0.3938669292446195, "grad_norm": 2.78125, "learning_rate": 7.876230661040788e-06, "loss": 1.5973, "step": 1400 }, { "epoch": 0.39527359684906455, "grad_norm": 3.625, "learning_rate": 7.904360056258789e-06, "loss": 1.6063, "step": 1405 }, { "epoch": 0.3966802644535096, "grad_norm": 2.78125, "learning_rate": 7.932489451476793e-06, "loss": 1.889, "step": 1410 }, { "epoch": 0.3980869320579547, "grad_norm": 4.15625, "learning_rate": 7.960618846694795e-06, "loss": 1.65, "step": 1415 }, { "epoch": 0.3994935996623998, "grad_norm": 4.34375, "learning_rate": 7.9887482419128e-06, "loss": 1.6546, "step": 1420 }, { "epoch": 0.40090026726684486, "grad_norm": 4.6875, "learning_rate": 7.999998914675671e-06, "loss": 1.6465, "step": 1425 }, { "epoch": 0.40230693487128993, "grad_norm": 2.6875, "learning_rate": 7.999992282140243e-06, "loss": 1.7975, "step": 1430 }, { "epoch": 0.40371360247573496, "grad_norm": 3.625, "learning_rate": 7.999979620037334e-06, "loss": 1.7769, "step": 1435 }, { "epoch": 0.40512027008018003, "grad_norm": 3.25, "learning_rate": 7.999960928386025e-06, "loss": 1.6168, "step": 1440 }, { "epoch": 0.4065269376846251, "grad_norm": 3.28125, "learning_rate": 7.9999362072145e-06, "loss": 1.8668, "step": 1445 }, { "epoch": 0.4079336052890702, "grad_norm": 2.875, "learning_rate": 7.999905456560018e-06, "loss": 1.8308, "step": 1450 }, { "epoch": 0.40934027289351527, "grad_norm": 3.984375, "learning_rate": 7.999868676468933e-06, "loss": 1.7166, "step": 1455 }, { "epoch": 0.41074694049796034, "grad_norm": 3.125, "learning_rate": 7.99982586699669e-06, "loss": 1.9376, "step": 1460 }, { "epoch": 0.4121536081024054, "grad_norm": 2.578125, "learning_rate": 7.999777028207818e-06, "loss": 1.9246, "step": 1465 }, { "epoch": 0.41356027570685044, "grad_norm": 3.703125, "learning_rate": 7.999722160175935e-06, "loss": 1.8283, "step": 1470 }, { "epoch": 0.4149669433112955, "grad_norm": 2.6875, "learning_rate": 7.99966126298375e-06, "loss": 1.5573, "step": 1475 }, { "epoch": 0.4163736109157406, "grad_norm": 2.9375, "learning_rate": 7.99959433672306e-06, "loss": 1.7425, "step": 1480 }, { "epoch": 0.4177802785201857, "grad_norm": 3.03125, "learning_rate": 7.999521381494747e-06, "loss": 1.468, "step": 1485 }, { "epoch": 0.41918694612463075, "grad_norm": 3.578125, "learning_rate": 7.999442397408785e-06, "loss": 2.0143, "step": 1490 }, { "epoch": 0.42059361372907583, "grad_norm": 3.796875, "learning_rate": 7.999357384584235e-06, "loss": 1.5066, "step": 1495 }, { "epoch": 0.4220002813335209, "grad_norm": 3.046875, "learning_rate": 7.999266343149242e-06, "loss": 1.5112, "step": 1500 }, { "epoch": 0.42340694893796593, "grad_norm": 3.484375, "learning_rate": 7.999169273241046e-06, "loss": 1.6816, "step": 1505 }, { "epoch": 0.424813616542411, "grad_norm": 4.34375, "learning_rate": 7.999066175005965e-06, "loss": 1.7814, "step": 1510 }, { "epoch": 0.4262202841468561, "grad_norm": 3.5625, "learning_rate": 7.99895704859941e-06, "loss": 1.5777, "step": 1515 }, { "epoch": 0.42762695175130117, "grad_norm": 2.765625, "learning_rate": 7.99884189418588e-06, "loss": 1.614, "step": 1520 }, { "epoch": 0.42903361935574624, "grad_norm": 2.46875, "learning_rate": 7.998720711938954e-06, "loss": 1.3149, "step": 1525 }, { "epoch": 0.4304402869601913, "grad_norm": 3.125, "learning_rate": 7.998593502041306e-06, "loss": 1.7651, "step": 1530 }, { "epoch": 0.4318469545646364, "grad_norm": 3.0625, "learning_rate": 7.998460264684688e-06, "loss": 1.5757, "step": 1535 }, { "epoch": 0.4332536221690814, "grad_norm": 2.625, "learning_rate": 7.998321000069943e-06, "loss": 1.9167, "step": 1540 }, { "epoch": 0.4346602897735265, "grad_norm": 3.84375, "learning_rate": 7.998175708406999e-06, "loss": 1.388, "step": 1545 }, { "epoch": 0.4360669573779716, "grad_norm": 2.71875, "learning_rate": 7.998024389914864e-06, "loss": 1.7055, "step": 1550 }, { "epoch": 0.43747362498241665, "grad_norm": 3.71875, "learning_rate": 7.997867044821638e-06, "loss": 1.5819, "step": 1555 }, { "epoch": 0.43888029258686173, "grad_norm": 4.40625, "learning_rate": 7.997703673364501e-06, "loss": 1.7198, "step": 1560 }, { "epoch": 0.4402869601913068, "grad_norm": 2.96875, "learning_rate": 7.997534275789718e-06, "loss": 1.894, "step": 1565 }, { "epoch": 0.4416936277957519, "grad_norm": 3.0, "learning_rate": 7.99735885235264e-06, "loss": 1.5067, "step": 1570 }, { "epoch": 0.4431002954001969, "grad_norm": 3.8125, "learning_rate": 7.997177403317696e-06, "loss": 1.6449, "step": 1575 }, { "epoch": 0.444506963004642, "grad_norm": 3.109375, "learning_rate": 7.996989928958404e-06, "loss": 1.8517, "step": 1580 }, { "epoch": 0.44591363060908706, "grad_norm": 3.96875, "learning_rate": 7.996796429557362e-06, "loss": 1.7391, "step": 1585 }, { "epoch": 0.44732029821353214, "grad_norm": 2.421875, "learning_rate": 7.996596905406248e-06, "loss": 1.8785, "step": 1590 }, { "epoch": 0.4487269658179772, "grad_norm": 2.625, "learning_rate": 7.996391356805825e-06, "loss": 1.6024, "step": 1595 }, { "epoch": 0.4501336334224223, "grad_norm": 6.21875, "learning_rate": 7.996179784065935e-06, "loss": 1.6681, "step": 1600 }, { "epoch": 0.4515403010268674, "grad_norm": 3.390625, "learning_rate": 7.995962187505502e-06, "loss": 1.742, "step": 1605 }, { "epoch": 0.4529469686313124, "grad_norm": 2.9375, "learning_rate": 7.995738567452531e-06, "loss": 1.477, "step": 1610 }, { "epoch": 0.4543536362357575, "grad_norm": 2.953125, "learning_rate": 7.995508924244104e-06, "loss": 1.7455, "step": 1615 }, { "epoch": 0.45576030384020255, "grad_norm": 2.625, "learning_rate": 7.995273258226387e-06, "loss": 1.7959, "step": 1620 }, { "epoch": 0.45716697144464763, "grad_norm": 4.5625, "learning_rate": 7.995031569754617e-06, "loss": 1.7619, "step": 1625 }, { "epoch": 0.4585736390490927, "grad_norm": 3.25, "learning_rate": 7.994783859193119e-06, "loss": 1.6018, "step": 1630 }, { "epoch": 0.4599803066535378, "grad_norm": 2.921875, "learning_rate": 7.994530126915285e-06, "loss": 1.7328, "step": 1635 }, { "epoch": 0.46138697425798286, "grad_norm": 4.65625, "learning_rate": 7.994270373303593e-06, "loss": 1.6123, "step": 1640 }, { "epoch": 0.4627936418624279, "grad_norm": 2.96875, "learning_rate": 7.994004598749597e-06, "loss": 1.6376, "step": 1645 }, { "epoch": 0.46420030946687296, "grad_norm": 2.59375, "learning_rate": 7.99373280365392e-06, "loss": 1.5974, "step": 1650 }, { "epoch": 0.46560697707131804, "grad_norm": 2.859375, "learning_rate": 7.993454988426265e-06, "loss": 1.3981, "step": 1655 }, { "epoch": 0.4670136446757631, "grad_norm": 2.953125, "learning_rate": 7.993171153485412e-06, "loss": 1.7091, "step": 1660 }, { "epoch": 0.4684203122802082, "grad_norm": 2.515625, "learning_rate": 7.992881299259208e-06, "loss": 1.4902, "step": 1665 }, { "epoch": 0.4698269798846533, "grad_norm": 2.59375, "learning_rate": 7.99258542618458e-06, "loss": 1.8131, "step": 1670 }, { "epoch": 0.47123364748909835, "grad_norm": 3.640625, "learning_rate": 7.992283534707527e-06, "loss": 1.5423, "step": 1675 }, { "epoch": 0.4726403150935434, "grad_norm": 3.359375, "learning_rate": 7.991975625283116e-06, "loss": 1.7866, "step": 1680 }, { "epoch": 0.47404698269798845, "grad_norm": 3.71875, "learning_rate": 7.991661698375489e-06, "loss": 1.4981, "step": 1685 }, { "epoch": 0.47545365030243353, "grad_norm": 2.21875, "learning_rate": 7.991341754457858e-06, "loss": 1.6244, "step": 1690 }, { "epoch": 0.4768603179068786, "grad_norm": 3.4375, "learning_rate": 7.991015794012506e-06, "loss": 1.6531, "step": 1695 }, { "epoch": 0.4782669855113237, "grad_norm": 3.0625, "learning_rate": 7.990683817530783e-06, "loss": 1.6086, "step": 1700 }, { "epoch": 0.47967365311576876, "grad_norm": 2.859375, "learning_rate": 7.990345825513106e-06, "loss": 1.3878, "step": 1705 }, { "epoch": 0.48108032072021384, "grad_norm": 3.40625, "learning_rate": 7.990001818468968e-06, "loss": 1.658, "step": 1710 }, { "epoch": 0.48248698832465886, "grad_norm": 2.84375, "learning_rate": 7.989651796916918e-06, "loss": 1.9873, "step": 1715 }, { "epoch": 0.48389365592910394, "grad_norm": 2.484375, "learning_rate": 7.98929576138458e-06, "loss": 1.75, "step": 1720 }, { "epoch": 0.485300323533549, "grad_norm": 5.1875, "learning_rate": 7.98893371240864e-06, "loss": 1.5848, "step": 1725 }, { "epoch": 0.4867069911379941, "grad_norm": 2.59375, "learning_rate": 7.988565650534847e-06, "loss": 1.6315, "step": 1730 }, { "epoch": 0.48811365874243917, "grad_norm": 2.28125, "learning_rate": 7.988191576318015e-06, "loss": 1.613, "step": 1735 }, { "epoch": 0.48952032634688425, "grad_norm": 3.640625, "learning_rate": 7.987811490322025e-06, "loss": 1.4464, "step": 1740 }, { "epoch": 0.4909269939513293, "grad_norm": 3.84375, "learning_rate": 7.987425393119813e-06, "loss": 1.7572, "step": 1745 }, { "epoch": 0.49233366155577435, "grad_norm": 3.625, "learning_rate": 7.987033285293382e-06, "loss": 1.5372, "step": 1750 }, { "epoch": 0.4937403291602194, "grad_norm": 9.3125, "learning_rate": 7.986635167433794e-06, "loss": 1.8296, "step": 1755 }, { "epoch": 0.4951469967646645, "grad_norm": 3.515625, "learning_rate": 7.986231040141167e-06, "loss": 1.8108, "step": 1760 }, { "epoch": 0.4965536643691096, "grad_norm": 2.40625, "learning_rate": 7.985820904024682e-06, "loss": 1.6946, "step": 1765 }, { "epoch": 0.49796033197355466, "grad_norm": 3.25, "learning_rate": 7.985404759702576e-06, "loss": 1.5829, "step": 1770 }, { "epoch": 0.49936699957799974, "grad_norm": 3.40625, "learning_rate": 7.984982607802143e-06, "loss": 1.7967, "step": 1775 }, { "epoch": 0.5007736671824448, "grad_norm": 4.46875, "learning_rate": 7.984554448959733e-06, "loss": 1.7127, "step": 1780 }, { "epoch": 0.5021803347868898, "grad_norm": 3.3125, "learning_rate": 7.984120283820747e-06, "loss": 1.7665, "step": 1785 }, { "epoch": 0.5035870023913349, "grad_norm": 4.1875, "learning_rate": 7.983680113039648e-06, "loss": 1.6801, "step": 1790 }, { "epoch": 0.50499366999578, "grad_norm": 5.15625, "learning_rate": 7.983233937279946e-06, "loss": 1.7679, "step": 1795 }, { "epoch": 0.5064003376002251, "grad_norm": 3.1875, "learning_rate": 7.982781757214201e-06, "loss": 1.5918, "step": 1800 }, { "epoch": 0.5078070052046701, "grad_norm": 2.890625, "learning_rate": 7.982323573524031e-06, "loss": 1.5204, "step": 1805 }, { "epoch": 0.5092136728091152, "grad_norm": 4.09375, "learning_rate": 7.981859386900095e-06, "loss": 1.791, "step": 1810 }, { "epoch": 0.5106203404135603, "grad_norm": 5.90625, "learning_rate": 7.98138919804211e-06, "loss": 1.6106, "step": 1815 }, { "epoch": 0.5120270080180054, "grad_norm": 3.8125, "learning_rate": 7.980913007658834e-06, "loss": 1.6606, "step": 1820 }, { "epoch": 0.5134336756224505, "grad_norm": 3.828125, "learning_rate": 7.980430816468074e-06, "loss": 1.5026, "step": 1825 }, { "epoch": 0.5148403432268955, "grad_norm": 9.0, "learning_rate": 7.979942625196683e-06, "loss": 1.4711, "step": 1830 }, { "epoch": 0.5162470108313405, "grad_norm": 3.703125, "learning_rate": 7.979448434580558e-06, "loss": 1.8645, "step": 1835 }, { "epoch": 0.5176536784357856, "grad_norm": 3.5625, "learning_rate": 7.978948245364639e-06, "loss": 1.7167, "step": 1840 }, { "epoch": 0.5190603460402307, "grad_norm": 4.125, "learning_rate": 7.97844205830291e-06, "loss": 1.6777, "step": 1845 }, { "epoch": 0.5204670136446757, "grad_norm": 3.0625, "learning_rate": 7.977929874158391e-06, "loss": 1.7533, "step": 1850 }, { "epoch": 0.5218736812491208, "grad_norm": 2.734375, "learning_rate": 7.97741169370315e-06, "loss": 1.7477, "step": 1855 }, { "epoch": 0.5232803488535659, "grad_norm": 2.765625, "learning_rate": 7.976887517718287e-06, "loss": 1.8136, "step": 1860 }, { "epoch": 0.524687016458011, "grad_norm": 3.25, "learning_rate": 7.976357346993943e-06, "loss": 1.5982, "step": 1865 }, { "epoch": 0.526093684062456, "grad_norm": 3.734375, "learning_rate": 7.975821182329293e-06, "loss": 1.6659, "step": 1870 }, { "epoch": 0.5275003516669011, "grad_norm": 7.0625, "learning_rate": 7.975279024532551e-06, "loss": 1.7181, "step": 1875 }, { "epoch": 0.5289070192713462, "grad_norm": 3.109375, "learning_rate": 7.974730874420964e-06, "loss": 1.7149, "step": 1880 }, { "epoch": 0.5303136868757913, "grad_norm": 4.71875, "learning_rate": 7.974176732820807e-06, "loss": 1.5102, "step": 1885 }, { "epoch": 0.5317203544802364, "grad_norm": 2.890625, "learning_rate": 7.973616600567391e-06, "loss": 1.7282, "step": 1890 }, { "epoch": 0.5331270220846814, "grad_norm": 2.5625, "learning_rate": 7.973050478505058e-06, "loss": 1.5252, "step": 1895 }, { "epoch": 0.5345336896891265, "grad_norm": 3.296875, "learning_rate": 7.972478367487176e-06, "loss": 1.6819, "step": 1900 }, { "epoch": 0.5359403572935715, "grad_norm": 3.3125, "learning_rate": 7.971900268376144e-06, "loss": 1.5836, "step": 1905 }, { "epoch": 0.5373470248980166, "grad_norm": 4.1875, "learning_rate": 7.971316182043384e-06, "loss": 1.6865, "step": 1910 }, { "epoch": 0.5387536925024616, "grad_norm": 2.421875, "learning_rate": 7.970726109369344e-06, "loss": 1.6588, "step": 1915 }, { "epoch": 0.5401603601069067, "grad_norm": 4.6875, "learning_rate": 7.970130051243498e-06, "loss": 1.7915, "step": 1920 }, { "epoch": 0.5415670277113518, "grad_norm": 2.515625, "learning_rate": 7.969528008564342e-06, "loss": 1.7502, "step": 1925 }, { "epoch": 0.5429736953157969, "grad_norm": 4.15625, "learning_rate": 7.96891998223939e-06, "loss": 1.5522, "step": 1930 }, { "epoch": 0.544380362920242, "grad_norm": 3.625, "learning_rate": 7.968305973185177e-06, "loss": 1.8124, "step": 1935 }, { "epoch": 0.545787030524687, "grad_norm": 3.953125, "learning_rate": 7.96768598232726e-06, "loss": 1.7465, "step": 1940 }, { "epoch": 0.5471936981291321, "grad_norm": 3.03125, "learning_rate": 7.967060010600207e-06, "loss": 1.7834, "step": 1945 }, { "epoch": 0.5486003657335772, "grad_norm": 3.265625, "learning_rate": 7.966428058947607e-06, "loss": 1.5552, "step": 1950 }, { "epoch": 0.5500070333380223, "grad_norm": 5.0625, "learning_rate": 7.965790128322056e-06, "loss": 1.6003, "step": 1955 }, { "epoch": 0.5514137009424673, "grad_norm": 2.171875, "learning_rate": 7.965146219685173e-06, "loss": 1.6883, "step": 1960 }, { "epoch": 0.5528203685469124, "grad_norm": 2.953125, "learning_rate": 7.96449633400758e-06, "loss": 1.5172, "step": 1965 }, { "epoch": 0.5542270361513575, "grad_norm": 2.75, "learning_rate": 7.963840472268913e-06, "loss": 1.5894, "step": 1970 }, { "epoch": 0.5556337037558025, "grad_norm": 2.765625, "learning_rate": 7.963178635457812e-06, "loss": 1.5496, "step": 1975 }, { "epoch": 0.5570403713602475, "grad_norm": 2.984375, "learning_rate": 7.962510824571927e-06, "loss": 1.8202, "step": 1980 }, { "epoch": 0.5584470389646926, "grad_norm": 3.0, "learning_rate": 7.961837040617912e-06, "loss": 1.6368, "step": 1985 }, { "epoch": 0.5598537065691377, "grad_norm": 2.890625, "learning_rate": 7.961157284611427e-06, "loss": 1.7324, "step": 1990 }, { "epoch": 0.5612603741735828, "grad_norm": 2.734375, "learning_rate": 7.960471557577132e-06, "loss": 1.5617, "step": 1995 }, { "epoch": 0.5626670417780278, "grad_norm": 2.78125, "learning_rate": 7.959779860548688e-06, "loss": 1.7674, "step": 2000 }, { "epoch": 0.5640737093824729, "grad_norm": 3.5625, "learning_rate": 7.959082194568757e-06, "loss": 1.8521, "step": 2005 }, { "epoch": 0.565480376986918, "grad_norm": 2.203125, "learning_rate": 7.958378560688997e-06, "loss": 1.605, "step": 2010 }, { "epoch": 0.5668870445913631, "grad_norm": 3.09375, "learning_rate": 7.957668959970058e-06, "loss": 1.6868, "step": 2015 }, { "epoch": 0.5682937121958082, "grad_norm": 2.203125, "learning_rate": 7.956953393481593e-06, "loss": 1.9079, "step": 2020 }, { "epoch": 0.5697003798002532, "grad_norm": 4.4375, "learning_rate": 7.956231862302242e-06, "loss": 1.6886, "step": 2025 }, { "epoch": 0.5711070474046983, "grad_norm": 2.578125, "learning_rate": 7.955504367519637e-06, "loss": 1.6482, "step": 2030 }, { "epoch": 0.5725137150091434, "grad_norm": 2.359375, "learning_rate": 7.954770910230399e-06, "loss": 1.9038, "step": 2035 }, { "epoch": 0.5739203826135885, "grad_norm": 2.34375, "learning_rate": 7.954031491540138e-06, "loss": 1.7288, "step": 2040 }, { "epoch": 0.5753270502180334, "grad_norm": 2.5625, "learning_rate": 7.953286112563452e-06, "loss": 1.6836, "step": 2045 }, { "epoch": 0.5767337178224785, "grad_norm": 2.828125, "learning_rate": 7.952534774423918e-06, "loss": 1.6717, "step": 2050 }, { "epoch": 0.5781403854269236, "grad_norm": 4.6875, "learning_rate": 7.951777478254102e-06, "loss": 1.6014, "step": 2055 }, { "epoch": 0.5795470530313687, "grad_norm": 2.703125, "learning_rate": 7.951014225195548e-06, "loss": 1.4636, "step": 2060 }, { "epoch": 0.5809537206358137, "grad_norm": 2.484375, "learning_rate": 7.950245016398778e-06, "loss": 1.4488, "step": 2065 }, { "epoch": 0.5823603882402588, "grad_norm": 3.6875, "learning_rate": 7.949469853023294e-06, "loss": 1.5397, "step": 2070 }, { "epoch": 0.5837670558447039, "grad_norm": 3.46875, "learning_rate": 7.948688736237573e-06, "loss": 1.751, "step": 2075 }, { "epoch": 0.585173723449149, "grad_norm": 3.703125, "learning_rate": 7.947901667219067e-06, "loss": 1.7123, "step": 2080 }, { "epoch": 0.586580391053594, "grad_norm": 4.53125, "learning_rate": 7.9471086471542e-06, "loss": 1.7476, "step": 2085 }, { "epoch": 0.5879870586580391, "grad_norm": 2.640625, "learning_rate": 7.946309677238364e-06, "loss": 1.8185, "step": 2090 }, { "epoch": 0.5893937262624842, "grad_norm": 3.625, "learning_rate": 7.945504758675926e-06, "loss": 1.5302, "step": 2095 }, { "epoch": 0.5908003938669293, "grad_norm": 3.078125, "learning_rate": 7.944693892680213e-06, "loss": 1.4795, "step": 2100 }, { "epoch": 0.5922070614713744, "grad_norm": 3.03125, "learning_rate": 7.943877080473521e-06, "loss": 1.4504, "step": 2105 }, { "epoch": 0.5936137290758193, "grad_norm": 3.5625, "learning_rate": 7.94305432328711e-06, "loss": 1.636, "step": 2110 }, { "epoch": 0.5950203966802644, "grad_norm": 3.546875, "learning_rate": 7.942225622361197e-06, "loss": 1.6305, "step": 2115 }, { "epoch": 0.5964270642847095, "grad_norm": 3.328125, "learning_rate": 7.941390978944963e-06, "loss": 1.7123, "step": 2120 }, { "epoch": 0.5978337318891546, "grad_norm": 2.96875, "learning_rate": 7.940550394296545e-06, "loss": 1.7594, "step": 2125 }, { "epoch": 0.5992403994935996, "grad_norm": 3.078125, "learning_rate": 7.939703869683038e-06, "loss": 1.5839, "step": 2130 }, { "epoch": 0.6006470670980447, "grad_norm": 2.875, "learning_rate": 7.938851406380484e-06, "loss": 1.5178, "step": 2135 }, { "epoch": 0.6020537347024898, "grad_norm": 2.515625, "learning_rate": 7.937993005673886e-06, "loss": 1.7266, "step": 2140 }, { "epoch": 0.6034604023069349, "grad_norm": 3.59375, "learning_rate": 7.93712866885719e-06, "loss": 1.5003, "step": 2145 }, { "epoch": 0.60486706991138, "grad_norm": 3.078125, "learning_rate": 7.936258397233296e-06, "loss": 1.6785, "step": 2150 }, { "epoch": 0.606273737515825, "grad_norm": 2.484375, "learning_rate": 7.935382192114043e-06, "loss": 1.7834, "step": 2155 }, { "epoch": 0.6076804051202701, "grad_norm": 2.953125, "learning_rate": 7.93450005482022e-06, "loss": 1.7403, "step": 2160 }, { "epoch": 0.6090870727247152, "grad_norm": 3.046875, "learning_rate": 7.933611986681556e-06, "loss": 1.7666, "step": 2165 }, { "epoch": 0.6104937403291603, "grad_norm": 2.546875, "learning_rate": 7.93271798903672e-06, "loss": 1.5542, "step": 2170 }, { "epoch": 0.6119004079336053, "grad_norm": 2.96875, "learning_rate": 7.931818063233322e-06, "loss": 1.8542, "step": 2175 }, { "epoch": 0.6133070755380503, "grad_norm": 4.75, "learning_rate": 7.930912210627902e-06, "loss": 1.5718, "step": 2180 }, { "epoch": 0.6147137431424954, "grad_norm": 2.8125, "learning_rate": 7.930000432585939e-06, "loss": 1.5713, "step": 2185 }, { "epoch": 0.6161204107469405, "grad_norm": 2.625, "learning_rate": 7.929082730481841e-06, "loss": 1.8829, "step": 2190 }, { "epoch": 0.6175270783513855, "grad_norm": 2.96875, "learning_rate": 7.928159105698949e-06, "loss": 1.9, "step": 2195 }, { "epoch": 0.6189337459558306, "grad_norm": 3.703125, "learning_rate": 7.927229559629529e-06, "loss": 1.6255, "step": 2200 }, { "epoch": 0.6203404135602757, "grad_norm": 2.984375, "learning_rate": 7.926294093674777e-06, "loss": 1.4732, "step": 2205 }, { "epoch": 0.6217470811647208, "grad_norm": 2.40625, "learning_rate": 7.925352709244804e-06, "loss": 1.6324, "step": 2210 }, { "epoch": 0.6231537487691658, "grad_norm": 3.703125, "learning_rate": 7.924405407758654e-06, "loss": 1.5333, "step": 2215 }, { "epoch": 0.6245604163736109, "grad_norm": 3.1875, "learning_rate": 7.923452190644279e-06, "loss": 1.7322, "step": 2220 }, { "epoch": 0.625967083978056, "grad_norm": 2.5625, "learning_rate": 7.922493059338556e-06, "loss": 1.7649, "step": 2225 }, { "epoch": 0.6273737515825011, "grad_norm": 2.75, "learning_rate": 7.921528015287276e-06, "loss": 1.6691, "step": 2230 }, { "epoch": 0.6287804191869462, "grad_norm": 2.859375, "learning_rate": 7.920557059945137e-06, "loss": 1.7656, "step": 2235 }, { "epoch": 0.6301870867913912, "grad_norm": 2.6875, "learning_rate": 7.919580194775758e-06, "loss": 1.7602, "step": 2240 }, { "epoch": 0.6315937543958363, "grad_norm": 2.515625, "learning_rate": 7.918597421251656e-06, "loss": 1.7364, "step": 2245 }, { "epoch": 0.6330004220002813, "grad_norm": 3.09375, "learning_rate": 7.917608740854259e-06, "loss": 1.6754, "step": 2250 }, { "epoch": 0.6344070896047264, "grad_norm": 2.921875, "learning_rate": 7.9166141550739e-06, "loss": 1.5354, "step": 2255 }, { "epoch": 0.6358137572091714, "grad_norm": 3.890625, "learning_rate": 7.915613665409813e-06, "loss": 1.532, "step": 2260 }, { "epoch": 0.6372204248136165, "grad_norm": 2.25, "learning_rate": 7.914607273370129e-06, "loss": 1.6626, "step": 2265 }, { "epoch": 0.6386270924180616, "grad_norm": 3.671875, "learning_rate": 7.913594980471877e-06, "loss": 1.7334, "step": 2270 }, { "epoch": 0.6400337600225067, "grad_norm": 3.265625, "learning_rate": 7.912576788240987e-06, "loss": 1.537, "step": 2275 }, { "epoch": 0.6414404276269517, "grad_norm": 3.40625, "learning_rate": 7.911552698212271e-06, "loss": 1.7401, "step": 2280 }, { "epoch": 0.6428470952313968, "grad_norm": 3.03125, "learning_rate": 7.910522711929444e-06, "loss": 1.7289, "step": 2285 }, { "epoch": 0.6442537628358419, "grad_norm": 3.125, "learning_rate": 7.909486830945092e-06, "loss": 1.5732, "step": 2290 }, { "epoch": 0.645660430440287, "grad_norm": 2.875, "learning_rate": 7.908445056820707e-06, "loss": 1.7419, "step": 2295 }, { "epoch": 0.6470670980447321, "grad_norm": 3.59375, "learning_rate": 7.907397391126647e-06, "loss": 1.6438, "step": 2300 }, { "epoch": 0.6484737656491771, "grad_norm": 3.03125, "learning_rate": 7.906343835442159e-06, "loss": 1.3731, "step": 2305 }, { "epoch": 0.6498804332536222, "grad_norm": 2.640625, "learning_rate": 7.90528439135537e-06, "loss": 1.6436, "step": 2310 }, { "epoch": 0.6512871008580673, "grad_norm": 4.125, "learning_rate": 7.904219060463277e-06, "loss": 1.8662, "step": 2315 }, { "epoch": 0.6526937684625123, "grad_norm": 4.59375, "learning_rate": 7.903147844371757e-06, "loss": 1.7982, "step": 2320 }, { "epoch": 0.6541004360669573, "grad_norm": 2.484375, "learning_rate": 7.902070744695553e-06, "loss": 1.6941, "step": 2325 }, { "epoch": 0.6555071036714024, "grad_norm": 2.9375, "learning_rate": 7.900987763058281e-06, "loss": 1.8189, "step": 2330 }, { "epoch": 0.6569137712758475, "grad_norm": 2.65625, "learning_rate": 7.899898901092425e-06, "loss": 1.6437, "step": 2335 }, { "epoch": 0.6583204388802926, "grad_norm": 2.953125, "learning_rate": 7.898804160439322e-06, "loss": 1.5489, "step": 2340 }, { "epoch": 0.6597271064847376, "grad_norm": 3.03125, "learning_rate": 7.897703542749186e-06, "loss": 1.5735, "step": 2345 }, { "epoch": 0.6611337740891827, "grad_norm": 2.703125, "learning_rate": 7.896597049681078e-06, "loss": 1.561, "step": 2350 }, { "epoch": 0.6625404416936278, "grad_norm": 2.359375, "learning_rate": 7.895484682902921e-06, "loss": 1.8226, "step": 2355 }, { "epoch": 0.6639471092980729, "grad_norm": 3.71875, "learning_rate": 7.89436644409149e-06, "loss": 1.6574, "step": 2360 }, { "epoch": 0.665353776902518, "grad_norm": 4.71875, "learning_rate": 7.893242334932415e-06, "loss": 1.4988, "step": 2365 }, { "epoch": 0.666760444506963, "grad_norm": 2.96875, "learning_rate": 7.892112357120171e-06, "loss": 1.7978, "step": 2370 }, { "epoch": 0.6681671121114081, "grad_norm": 2.859375, "learning_rate": 7.890976512358079e-06, "loss": 1.6548, "step": 2375 }, { "epoch": 0.6695737797158532, "grad_norm": 2.953125, "learning_rate": 7.889834802358309e-06, "loss": 1.6971, "step": 2380 }, { "epoch": 0.6709804473202983, "grad_norm": 2.578125, "learning_rate": 7.888687228841864e-06, "loss": 1.5706, "step": 2385 }, { "epoch": 0.6723871149247432, "grad_norm": 3.640625, "learning_rate": 7.887533793538594e-06, "loss": 1.4289, "step": 2390 }, { "epoch": 0.6737937825291883, "grad_norm": 2.28125, "learning_rate": 7.886374498187178e-06, "loss": 1.7071, "step": 2395 }, { "epoch": 0.6752004501336334, "grad_norm": 3.40625, "learning_rate": 7.885209344535135e-06, "loss": 1.8025, "step": 2400 }, { "epoch": 0.6766071177380785, "grad_norm": 3.953125, "learning_rate": 7.884038334338812e-06, "loss": 1.6936, "step": 2405 }, { "epoch": 0.6780137853425235, "grad_norm": 2.171875, "learning_rate": 7.88286146936338e-06, "loss": 1.8952, "step": 2410 }, { "epoch": 0.6794204529469686, "grad_norm": 2.515625, "learning_rate": 7.881678751382842e-06, "loss": 1.5186, "step": 2415 }, { "epoch": 0.6808271205514137, "grad_norm": 3.09375, "learning_rate": 7.880490182180022e-06, "loss": 1.9398, "step": 2420 }, { "epoch": 0.6822337881558588, "grad_norm": 3.265625, "learning_rate": 7.879295763546558e-06, "loss": 1.7953, "step": 2425 }, { "epoch": 0.6836404557603039, "grad_norm": 2.84375, "learning_rate": 7.878095497282916e-06, "loss": 1.8955, "step": 2430 }, { "epoch": 0.6850471233647489, "grad_norm": 3.25, "learning_rate": 7.876889385198367e-06, "loss": 1.5763, "step": 2435 }, { "epoch": 0.686453790969194, "grad_norm": 2.734375, "learning_rate": 7.875677429111e-06, "loss": 1.5909, "step": 2440 }, { "epoch": 0.6878604585736391, "grad_norm": 5.9375, "learning_rate": 7.874459630847711e-06, "loss": 1.6029, "step": 2445 }, { "epoch": 0.6892671261780842, "grad_norm": 2.5625, "learning_rate": 7.873235992244203e-06, "loss": 1.4505, "step": 2450 }, { "epoch": 0.6906737937825291, "grad_norm": 3.46875, "learning_rate": 7.872006515144983e-06, "loss": 1.8632, "step": 2455 }, { "epoch": 0.6920804613869742, "grad_norm": 3.6875, "learning_rate": 7.870771201403356e-06, "loss": 1.6993, "step": 2460 }, { "epoch": 0.6934871289914193, "grad_norm": 2.6875, "learning_rate": 7.86953005288143e-06, "loss": 1.803, "step": 2465 }, { "epoch": 0.6948937965958644, "grad_norm": 3.09375, "learning_rate": 7.868283071450105e-06, "loss": 1.7066, "step": 2470 }, { "epoch": 0.6963004642003094, "grad_norm": 3.875, "learning_rate": 7.867030258989072e-06, "loss": 1.6787, "step": 2475 }, { "epoch": 0.6977071318047545, "grad_norm": 3.375, "learning_rate": 7.865771617386817e-06, "loss": 1.5385, "step": 2480 }, { "epoch": 0.6991137994091996, "grad_norm": 3.171875, "learning_rate": 7.86450714854061e-06, "loss": 1.6667, "step": 2485 }, { "epoch": 0.7005204670136447, "grad_norm": 1.9609375, "learning_rate": 7.863236854356502e-06, "loss": 1.6079, "step": 2490 }, { "epoch": 0.7019271346180898, "grad_norm": 3.328125, "learning_rate": 7.861960736749331e-06, "loss": 1.7048, "step": 2495 }, { "epoch": 0.7033338022225348, "grad_norm": 3.640625, "learning_rate": 7.860678797642707e-06, "loss": 1.855, "step": 2500 }, { "epoch": 0.7047404698269799, "grad_norm": 2.359375, "learning_rate": 7.859391038969021e-06, "loss": 1.8016, "step": 2505 }, { "epoch": 0.706147137431425, "grad_norm": 2.515625, "learning_rate": 7.858097462669432e-06, "loss": 1.8948, "step": 2510 }, { "epoch": 0.7075538050358701, "grad_norm": 2.71875, "learning_rate": 7.85679807069387e-06, "loss": 1.5018, "step": 2515 }, { "epoch": 0.7089604726403151, "grad_norm": 4.03125, "learning_rate": 7.855492865001033e-06, "loss": 1.7206, "step": 2520 }, { "epoch": 0.7103671402447601, "grad_norm": 3.515625, "learning_rate": 7.85418184755838e-06, "loss": 1.5677, "step": 2525 }, { "epoch": 0.7117738078492052, "grad_norm": 2.9375, "learning_rate": 7.852865020342133e-06, "loss": 1.7892, "step": 2530 }, { "epoch": 0.7131804754536503, "grad_norm": 3.8125, "learning_rate": 7.851542385337269e-06, "loss": 1.3885, "step": 2535 }, { "epoch": 0.7145871430580953, "grad_norm": 3.8125, "learning_rate": 7.850213944537522e-06, "loss": 1.6664, "step": 2540 }, { "epoch": 0.7159938106625404, "grad_norm": 2.765625, "learning_rate": 7.848879699945377e-06, "loss": 1.5967, "step": 2545 }, { "epoch": 0.7174004782669855, "grad_norm": 3.828125, "learning_rate": 7.847539653572066e-06, "loss": 1.5588, "step": 2550 }, { "epoch": 0.7188071458714306, "grad_norm": 4.9375, "learning_rate": 7.846193807437571e-06, "loss": 1.7426, "step": 2555 }, { "epoch": 0.7202138134758757, "grad_norm": 2.859375, "learning_rate": 7.84484216357061e-06, "loss": 1.7578, "step": 2560 }, { "epoch": 0.7216204810803207, "grad_norm": 3.671875, "learning_rate": 7.843484724008645e-06, "loss": 1.5375, "step": 2565 }, { "epoch": 0.7230271486847658, "grad_norm": 4.15625, "learning_rate": 7.842121490797876e-06, "loss": 1.4915, "step": 2570 }, { "epoch": 0.7244338162892109, "grad_norm": 2.84375, "learning_rate": 7.840752465993228e-06, "loss": 1.385, "step": 2575 }, { "epoch": 0.725840483893656, "grad_norm": 3.765625, "learning_rate": 7.839377651658368e-06, "loss": 1.3509, "step": 2580 }, { "epoch": 0.727247151498101, "grad_norm": 2.40625, "learning_rate": 7.837997049865677e-06, "loss": 1.6331, "step": 2585 }, { "epoch": 0.7286538191025461, "grad_norm": 3.265625, "learning_rate": 7.836610662696273e-06, "loss": 1.785, "step": 2590 }, { "epoch": 0.7300604867069911, "grad_norm": 4.0, "learning_rate": 7.835218492239987e-06, "loss": 1.7578, "step": 2595 }, { "epoch": 0.7314671543114362, "grad_norm": 3.875, "learning_rate": 7.833820540595369e-06, "loss": 1.7416, "step": 2600 }, { "epoch": 0.7328738219158812, "grad_norm": 3.96875, "learning_rate": 7.832416809869684e-06, "loss": 1.7128, "step": 2605 }, { "epoch": 0.7342804895203263, "grad_norm": 2.78125, "learning_rate": 7.831007302178908e-06, "loss": 1.7317, "step": 2610 }, { "epoch": 0.7356871571247714, "grad_norm": 3.296875, "learning_rate": 7.829592019647729e-06, "loss": 1.365, "step": 2615 }, { "epoch": 0.7370938247292165, "grad_norm": 3.390625, "learning_rate": 7.82817096440953e-06, "loss": 1.7279, "step": 2620 }, { "epoch": 0.7385004923336616, "grad_norm": 4.53125, "learning_rate": 7.826744138606408e-06, "loss": 1.2845, "step": 2625 }, { "epoch": 0.7399071599381066, "grad_norm": 3.265625, "learning_rate": 7.825311544389149e-06, "loss": 1.5838, "step": 2630 }, { "epoch": 0.7413138275425517, "grad_norm": 2.75, "learning_rate": 7.82387318391724e-06, "loss": 1.6606, "step": 2635 }, { "epoch": 0.7427204951469968, "grad_norm": 3.328125, "learning_rate": 7.822429059358859e-06, "loss": 1.767, "step": 2640 }, { "epoch": 0.7441271627514419, "grad_norm": 3.203125, "learning_rate": 7.820979172890869e-06, "loss": 1.6674, "step": 2645 }, { "epoch": 0.7455338303558869, "grad_norm": 2.78125, "learning_rate": 7.819523526698824e-06, "loss": 1.7634, "step": 2650 }, { "epoch": 0.746940497960332, "grad_norm": 3.25, "learning_rate": 7.818062122976954e-06, "loss": 1.6022, "step": 2655 }, { "epoch": 0.7483471655647771, "grad_norm": 2.84375, "learning_rate": 7.816594963928176e-06, "loss": 1.6332, "step": 2660 }, { "epoch": 0.7497538331692221, "grad_norm": 5.9375, "learning_rate": 7.815122051764075e-06, "loss": 1.711, "step": 2665 }, { "epoch": 0.7511605007736671, "grad_norm": 3.265625, "learning_rate": 7.813643388704912e-06, "loss": 1.4206, "step": 2670 }, { "epoch": 0.7525671683781122, "grad_norm": 2.46875, "learning_rate": 7.812158976979614e-06, "loss": 1.5857, "step": 2675 }, { "epoch": 0.7539738359825573, "grad_norm": 2.921875, "learning_rate": 7.810668818825778e-06, "loss": 1.6887, "step": 2680 }, { "epoch": 0.7553805035870024, "grad_norm": 3.34375, "learning_rate": 7.80917291648966e-06, "loss": 1.4489, "step": 2685 }, { "epoch": 0.7567871711914474, "grad_norm": 2.6875, "learning_rate": 7.807671272226175e-06, "loss": 1.7821, "step": 2690 }, { "epoch": 0.7581938387958925, "grad_norm": 2.046875, "learning_rate": 7.806163888298894e-06, "loss": 1.449, "step": 2695 }, { "epoch": 0.7596005064003376, "grad_norm": 2.6875, "learning_rate": 7.80465076698004e-06, "loss": 1.7329, "step": 2700 }, { "epoch": 0.7610071740047827, "grad_norm": 3.203125, "learning_rate": 7.80313191055048e-06, "loss": 1.5321, "step": 2705 }, { "epoch": 0.7624138416092278, "grad_norm": 3.203125, "learning_rate": 7.801607321299738e-06, "loss": 1.3949, "step": 2710 }, { "epoch": 0.7638205092136728, "grad_norm": 3.25, "learning_rate": 7.800077001525966e-06, "loss": 1.6693, "step": 2715 }, { "epoch": 0.7652271768181179, "grad_norm": 3.09375, "learning_rate": 7.798540953535962e-06, "loss": 1.3889, "step": 2720 }, { "epoch": 0.766633844422563, "grad_norm": 3.015625, "learning_rate": 7.796999179645157e-06, "loss": 1.8232, "step": 2725 }, { "epoch": 0.7680405120270081, "grad_norm": 3.0, "learning_rate": 7.795451682177613e-06, "loss": 1.5121, "step": 2730 }, { "epoch": 0.769447179631453, "grad_norm": 4.28125, "learning_rate": 7.793898463466018e-06, "loss": 1.7762, "step": 2735 }, { "epoch": 0.7708538472358981, "grad_norm": 3.375, "learning_rate": 7.792339525851686e-06, "loss": 1.6207, "step": 2740 }, { "epoch": 0.7722605148403432, "grad_norm": 2.734375, "learning_rate": 7.790774871684554e-06, "loss": 1.6523, "step": 2745 }, { "epoch": 0.7736671824447883, "grad_norm": 3.15625, "learning_rate": 7.789204503323172e-06, "loss": 1.8693, "step": 2750 }, { "epoch": 0.7750738500492333, "grad_norm": 3.921875, "learning_rate": 7.787628423134702e-06, "loss": 1.5125, "step": 2755 }, { "epoch": 0.7764805176536784, "grad_norm": 2.5625, "learning_rate": 7.786046633494924e-06, "loss": 1.4513, "step": 2760 }, { "epoch": 0.7778871852581235, "grad_norm": 3.265625, "learning_rate": 7.784459136788217e-06, "loss": 1.7672, "step": 2765 }, { "epoch": 0.7792938528625686, "grad_norm": 3.203125, "learning_rate": 7.782865935407566e-06, "loss": 1.6418, "step": 2770 }, { "epoch": 0.7807005204670137, "grad_norm": 3.515625, "learning_rate": 7.781267031754553e-06, "loss": 1.6256, "step": 2775 }, { "epoch": 0.7821071880714587, "grad_norm": 3.25, "learning_rate": 7.779662428239359e-06, "loss": 1.4991, "step": 2780 }, { "epoch": 0.7835138556759038, "grad_norm": 2.984375, "learning_rate": 7.778052127280754e-06, "loss": 1.7964, "step": 2785 }, { "epoch": 0.7849205232803489, "grad_norm": 3.21875, "learning_rate": 7.776436131306096e-06, "loss": 1.5225, "step": 2790 }, { "epoch": 0.786327190884794, "grad_norm": 3.359375, "learning_rate": 7.774814442751332e-06, "loss": 1.5578, "step": 2795 }, { "epoch": 0.787733858489239, "grad_norm": 3.984375, "learning_rate": 7.773187064060981e-06, "loss": 1.6137, "step": 2800 }, { "epoch": 0.789140526093684, "grad_norm": 3.921875, "learning_rate": 7.771553997688153e-06, "loss": 1.604, "step": 2805 }, { "epoch": 0.7905471936981291, "grad_norm": 3.0, "learning_rate": 7.769915246094519e-06, "loss": 1.583, "step": 2810 }, { "epoch": 0.7919538613025742, "grad_norm": 5.0, "learning_rate": 7.768270811750326e-06, "loss": 1.613, "step": 2815 }, { "epoch": 0.7933605289070192, "grad_norm": 3.6875, "learning_rate": 7.766620697134385e-06, "loss": 1.6581, "step": 2820 }, { "epoch": 0.7947671965114643, "grad_norm": 3.03125, "learning_rate": 7.76496490473407e-06, "loss": 1.5417, "step": 2825 }, { "epoch": 0.7961738641159094, "grad_norm": 2.953125, "learning_rate": 7.763303437045313e-06, "loss": 1.9133, "step": 2830 }, { "epoch": 0.7975805317203545, "grad_norm": 3.53125, "learning_rate": 7.761636296572605e-06, "loss": 1.8703, "step": 2835 }, { "epoch": 0.7989871993247996, "grad_norm": 4.9375, "learning_rate": 7.759963485828982e-06, "loss": 1.5487, "step": 2840 }, { "epoch": 0.8003938669292446, "grad_norm": 3.0625, "learning_rate": 7.75828500733603e-06, "loss": 1.7283, "step": 2845 }, { "epoch": 0.8018005345336897, "grad_norm": 3.875, "learning_rate": 7.75660086362388e-06, "loss": 1.4787, "step": 2850 }, { "epoch": 0.8032072021381348, "grad_norm": 3.65625, "learning_rate": 7.754911057231202e-06, "loss": 1.6337, "step": 2855 }, { "epoch": 0.8046138697425799, "grad_norm": 2.703125, "learning_rate": 7.7532155907052e-06, "loss": 1.6622, "step": 2860 }, { "epoch": 0.806020537347025, "grad_norm": 2.671875, "learning_rate": 7.751514466601611e-06, "loss": 1.8183, "step": 2865 }, { "epoch": 0.8074272049514699, "grad_norm": 3.015625, "learning_rate": 7.749807687484702e-06, "loss": 1.5107, "step": 2870 }, { "epoch": 0.808833872555915, "grad_norm": 3.5, "learning_rate": 7.748095255927262e-06, "loss": 1.3176, "step": 2875 }, { "epoch": 0.8102405401603601, "grad_norm": 3.265625, "learning_rate": 7.746377174510603e-06, "loss": 1.7498, "step": 2880 }, { "epoch": 0.8116472077648051, "grad_norm": 2.75, "learning_rate": 7.74465344582455e-06, "loss": 1.7046, "step": 2885 }, { "epoch": 0.8130538753692502, "grad_norm": 3.265625, "learning_rate": 7.742924072467442e-06, "loss": 1.6646, "step": 2890 }, { "epoch": 0.8144605429736953, "grad_norm": 3.25, "learning_rate": 7.74118905704613e-06, "loss": 1.5339, "step": 2895 }, { "epoch": 0.8158672105781404, "grad_norm": 3.375, "learning_rate": 7.739448402175967e-06, "loss": 1.7851, "step": 2900 }, { "epoch": 0.8172738781825855, "grad_norm": 2.390625, "learning_rate": 7.737702110480804e-06, "loss": 1.7214, "step": 2905 }, { "epoch": 0.8186805457870305, "grad_norm": 8.6875, "learning_rate": 7.735950184592994e-06, "loss": 1.7116, "step": 2910 }, { "epoch": 0.8200872133914756, "grad_norm": 3.21875, "learning_rate": 7.734192627153382e-06, "loss": 1.761, "step": 2915 }, { "epoch": 0.8214938809959207, "grad_norm": 2.84375, "learning_rate": 7.732429440811297e-06, "loss": 1.6123, "step": 2920 }, { "epoch": 0.8229005486003658, "grad_norm": 3.40625, "learning_rate": 7.730660628224563e-06, "loss": 1.6235, "step": 2925 }, { "epoch": 0.8243072162048108, "grad_norm": 3.265625, "learning_rate": 7.728886192059474e-06, "loss": 1.8165, "step": 2930 }, { "epoch": 0.8257138838092559, "grad_norm": 3.171875, "learning_rate": 7.727106134990808e-06, "loss": 1.5903, "step": 2935 }, { "epoch": 0.8271205514137009, "grad_norm": 3.21875, "learning_rate": 7.725320459701813e-06, "loss": 1.3788, "step": 2940 }, { "epoch": 0.828527219018146, "grad_norm": 3.453125, "learning_rate": 7.723529168884205e-06, "loss": 1.6623, "step": 2945 }, { "epoch": 0.829933886622591, "grad_norm": 3.796875, "learning_rate": 7.72173226523817e-06, "loss": 1.6399, "step": 2950 }, { "epoch": 0.8313405542270361, "grad_norm": 3.375, "learning_rate": 7.719929751472348e-06, "loss": 1.677, "step": 2955 }, { "epoch": 0.8327472218314812, "grad_norm": 2.984375, "learning_rate": 7.71812163030384e-06, "loss": 1.3748, "step": 2960 }, { "epoch": 0.8341538894359263, "grad_norm": 5.59375, "learning_rate": 7.7163079044582e-06, "loss": 1.7135, "step": 2965 }, { "epoch": 0.8355605570403714, "grad_norm": 2.71875, "learning_rate": 7.714488576669427e-06, "loss": 1.7007, "step": 2970 }, { "epoch": 0.8369672246448164, "grad_norm": 3.734375, "learning_rate": 7.712663649679966e-06, "loss": 1.5469, "step": 2975 }, { "epoch": 0.8383738922492615, "grad_norm": 4.71875, "learning_rate": 7.710833126240702e-06, "loss": 1.5403, "step": 2980 }, { "epoch": 0.8397805598537066, "grad_norm": 3.0625, "learning_rate": 7.70899700911096e-06, "loss": 1.5021, "step": 2985 }, { "epoch": 0.8411872274581517, "grad_norm": 2.71875, "learning_rate": 7.707155301058488e-06, "loss": 1.7806, "step": 2990 }, { "epoch": 0.8425938950625967, "grad_norm": 3.328125, "learning_rate": 7.705308004859471e-06, "loss": 1.5206, "step": 2995 }, { "epoch": 0.8440005626670418, "grad_norm": 3.40625, "learning_rate": 7.703455123298512e-06, "loss": 1.5729, "step": 3000 }, { "epoch": 0.8454072302714869, "grad_norm": 3.75, "learning_rate": 7.701596659168637e-06, "loss": 1.6561, "step": 3005 }, { "epoch": 0.8468138978759319, "grad_norm": 4.125, "learning_rate": 7.699732615271283e-06, "loss": 1.4492, "step": 3010 }, { "epoch": 0.8482205654803769, "grad_norm": 2.671875, "learning_rate": 7.697862994416301e-06, "loss": 1.7029, "step": 3015 }, { "epoch": 0.849627233084822, "grad_norm": 3.765625, "learning_rate": 7.695987799421947e-06, "loss": 1.4796, "step": 3020 }, { "epoch": 0.8510339006892671, "grad_norm": 3.015625, "learning_rate": 7.694107033114882e-06, "loss": 1.6716, "step": 3025 }, { "epoch": 0.8524405682937122, "grad_norm": 2.953125, "learning_rate": 7.692220698330161e-06, "loss": 1.6725, "step": 3030 }, { "epoch": 0.8538472358981573, "grad_norm": 4.21875, "learning_rate": 7.690328797911235e-06, "loss": 1.7419, "step": 3035 }, { "epoch": 0.8552539035026023, "grad_norm": 3.359375, "learning_rate": 7.688431334709947e-06, "loss": 1.4105, "step": 3040 }, { "epoch": 0.8566605711070474, "grad_norm": 3.078125, "learning_rate": 7.686528311586523e-06, "loss": 1.2964, "step": 3045 }, { "epoch": 0.8580672387114925, "grad_norm": 2.9375, "learning_rate": 7.684619731409566e-06, "loss": 1.4006, "step": 3050 }, { "epoch": 0.8594739063159376, "grad_norm": 4.03125, "learning_rate": 7.682705597056066e-06, "loss": 1.6018, "step": 3055 }, { "epoch": 0.8608805739203826, "grad_norm": 9.1875, "learning_rate": 7.680785911411375e-06, "loss": 1.8649, "step": 3060 }, { "epoch": 0.8622872415248277, "grad_norm": 3.125, "learning_rate": 7.678860677369218e-06, "loss": 1.5029, "step": 3065 }, { "epoch": 0.8636939091292728, "grad_norm": 2.765625, "learning_rate": 7.676929897831684e-06, "loss": 1.4425, "step": 3070 }, { "epoch": 0.8651005767337179, "grad_norm": 3.265625, "learning_rate": 7.674993575709218e-06, "loss": 1.7859, "step": 3075 }, { "epoch": 0.8665072443381628, "grad_norm": 2.4375, "learning_rate": 7.673051713920624e-06, "loss": 1.7595, "step": 3080 }, { "epoch": 0.8679139119426079, "grad_norm": 4.3125, "learning_rate": 7.671104315393053e-06, "loss": 1.3791, "step": 3085 }, { "epoch": 0.869320579547053, "grad_norm": 2.546875, "learning_rate": 7.669151383062003e-06, "loss": 1.8302, "step": 3090 }, { "epoch": 0.8707272471514981, "grad_norm": 2.6875, "learning_rate": 7.667192919871313e-06, "loss": 1.6029, "step": 3095 }, { "epoch": 0.8721339147559432, "grad_norm": 3.09375, "learning_rate": 7.665228928773164e-06, "loss": 1.7232, "step": 3100 }, { "epoch": 0.8735405823603882, "grad_norm": 2.734375, "learning_rate": 7.663259412728062e-06, "loss": 1.6841, "step": 3105 }, { "epoch": 0.8749472499648333, "grad_norm": 3.171875, "learning_rate": 7.661284374704848e-06, "loss": 1.5463, "step": 3110 }, { "epoch": 0.8763539175692784, "grad_norm": 3.0, "learning_rate": 7.659303817680682e-06, "loss": 1.6332, "step": 3115 }, { "epoch": 0.8777605851737235, "grad_norm": 2.53125, "learning_rate": 7.657317744641047e-06, "loss": 1.9633, "step": 3120 }, { "epoch": 0.8791672527781685, "grad_norm": 2.21875, "learning_rate": 7.655326158579739e-06, "loss": 1.6777, "step": 3125 }, { "epoch": 0.8805739203826136, "grad_norm": 2.4375, "learning_rate": 7.65332906249886e-06, "loss": 1.5168, "step": 3130 }, { "epoch": 0.8819805879870587, "grad_norm": 3.0, "learning_rate": 7.65132645940883e-06, "loss": 1.8697, "step": 3135 }, { "epoch": 0.8833872555915038, "grad_norm": 3.265625, "learning_rate": 7.649318352328356e-06, "loss": 1.3569, "step": 3140 }, { "epoch": 0.8847939231959489, "grad_norm": 2.671875, "learning_rate": 7.647304744284452e-06, "loss": 1.6584, "step": 3145 }, { "epoch": 0.8862005908003938, "grad_norm": 3.109375, "learning_rate": 7.645285638312418e-06, "loss": 1.6259, "step": 3150 }, { "epoch": 0.8876072584048389, "grad_norm": 2.5625, "learning_rate": 7.643261037455844e-06, "loss": 1.7632, "step": 3155 }, { "epoch": 0.889013926009284, "grad_norm": 3.0625, "learning_rate": 7.641230944766605e-06, "loss": 1.5457, "step": 3160 }, { "epoch": 0.890420593613729, "grad_norm": 2.734375, "learning_rate": 7.63919536330485e-06, "loss": 1.599, "step": 3165 }, { "epoch": 0.8918272612181741, "grad_norm": 3.421875, "learning_rate": 7.637154296139003e-06, "loss": 1.733, "step": 3170 }, { "epoch": 0.8932339288226192, "grad_norm": 3.484375, "learning_rate": 7.63510774634576e-06, "loss": 1.6175, "step": 3175 }, { "epoch": 0.8946405964270643, "grad_norm": 3.09375, "learning_rate": 7.633055717010078e-06, "loss": 1.7642, "step": 3180 }, { "epoch": 0.8960472640315094, "grad_norm": 3.109375, "learning_rate": 7.630998211225177e-06, "loss": 1.5858, "step": 3185 }, { "epoch": 0.8974539316359544, "grad_norm": 2.421875, "learning_rate": 7.62893523209253e-06, "loss": 1.4382, "step": 3190 }, { "epoch": 0.8988605992403995, "grad_norm": 2.75, "learning_rate": 7.62686678272186e-06, "loss": 1.7159, "step": 3195 }, { "epoch": 0.9002672668448446, "grad_norm": 4.5, "learning_rate": 7.624792866231137e-06, "loss": 1.4671, "step": 3200 }, { "epoch": 0.9016739344492897, "grad_norm": 2.21875, "learning_rate": 7.622713485746573e-06, "loss": 1.6495, "step": 3205 }, { "epoch": 0.9030806020537347, "grad_norm": 2.984375, "learning_rate": 7.620628644402613e-06, "loss": 1.4105, "step": 3210 }, { "epoch": 0.9044872696581798, "grad_norm": 3.5, "learning_rate": 7.618538345341938e-06, "loss": 1.55, "step": 3215 }, { "epoch": 0.9058939372626248, "grad_norm": 3.28125, "learning_rate": 7.6164425917154545e-06, "loss": 1.4803, "step": 3220 }, { "epoch": 0.9073006048670699, "grad_norm": 3.359375, "learning_rate": 7.614341386682289e-06, "loss": 1.4704, "step": 3225 }, { "epoch": 0.908707272471515, "grad_norm": 6.6875, "learning_rate": 7.612234733409786e-06, "loss": 1.7306, "step": 3230 }, { "epoch": 0.91011394007596, "grad_norm": 4.25, "learning_rate": 7.610122635073507e-06, "loss": 1.7463, "step": 3235 }, { "epoch": 0.9115206076804051, "grad_norm": 3.5, "learning_rate": 7.608005094857213e-06, "loss": 1.7264, "step": 3240 }, { "epoch": 0.9129272752848502, "grad_norm": 3.4375, "learning_rate": 7.60588211595288e-06, "loss": 1.5772, "step": 3245 }, { "epoch": 0.9143339428892953, "grad_norm": 3.359375, "learning_rate": 7.603753701560669e-06, "loss": 1.7315, "step": 3250 }, { "epoch": 0.9157406104937403, "grad_norm": 2.328125, "learning_rate": 7.6016198548889446e-06, "loss": 1.5967, "step": 3255 }, { "epoch": 0.9171472780981854, "grad_norm": 3.328125, "learning_rate": 7.599480579154253e-06, "loss": 1.5748, "step": 3260 }, { "epoch": 0.9185539457026305, "grad_norm": 3.46875, "learning_rate": 7.59733587758133e-06, "loss": 1.6969, "step": 3265 }, { "epoch": 0.9199606133070756, "grad_norm": 2.6875, "learning_rate": 7.595185753403086e-06, "loss": 1.3966, "step": 3270 }, { "epoch": 0.9213672809115206, "grad_norm": 3.65625, "learning_rate": 7.593030209860608e-06, "loss": 1.8503, "step": 3275 }, { "epoch": 0.9227739485159657, "grad_norm": 3.46875, "learning_rate": 7.590869250203151e-06, "loss": 1.4599, "step": 3280 }, { "epoch": 0.9241806161204107, "grad_norm": 3.828125, "learning_rate": 7.588702877688133e-06, "loss": 1.7539, "step": 3285 }, { "epoch": 0.9255872837248558, "grad_norm": 4.75, "learning_rate": 7.586531095581135e-06, "loss": 1.6305, "step": 3290 }, { "epoch": 0.9269939513293008, "grad_norm": 3.4375, "learning_rate": 7.584353907155886e-06, "loss": 1.9388, "step": 3295 }, { "epoch": 0.9284006189337459, "grad_norm": 2.671875, "learning_rate": 7.5821713156942725e-06, "loss": 1.6272, "step": 3300 }, { "epoch": 0.929807286538191, "grad_norm": 3.75, "learning_rate": 7.57998332448632e-06, "loss": 1.6722, "step": 3305 }, { "epoch": 0.9312139541426361, "grad_norm": 2.96875, "learning_rate": 7.577789936830194e-06, "loss": 1.533, "step": 3310 }, { "epoch": 0.9326206217470812, "grad_norm": 3.6875, "learning_rate": 7.575591156032198e-06, "loss": 1.6827, "step": 3315 }, { "epoch": 0.9340272893515262, "grad_norm": 2.53125, "learning_rate": 7.573386985406761e-06, "loss": 1.5372, "step": 3320 }, { "epoch": 0.9354339569559713, "grad_norm": 2.75, "learning_rate": 7.571177428276439e-06, "loss": 1.749, "step": 3325 }, { "epoch": 0.9368406245604164, "grad_norm": 3.71875, "learning_rate": 7.568962487971905e-06, "loss": 1.5339, "step": 3330 }, { "epoch": 0.9382472921648615, "grad_norm": 3.390625, "learning_rate": 7.56674216783195e-06, "loss": 1.416, "step": 3335 }, { "epoch": 0.9396539597693065, "grad_norm": 3.03125, "learning_rate": 7.564516471203474e-06, "loss": 1.6917, "step": 3340 }, { "epoch": 0.9410606273737516, "grad_norm": 3.6875, "learning_rate": 7.562285401441478e-06, "loss": 1.6447, "step": 3345 }, { "epoch": 0.9424672949781967, "grad_norm": 3.6875, "learning_rate": 7.560048961909068e-06, "loss": 1.7682, "step": 3350 }, { "epoch": 0.9438739625826417, "grad_norm": 2.953125, "learning_rate": 7.5578071559774384e-06, "loss": 1.5933, "step": 3355 }, { "epoch": 0.9452806301870867, "grad_norm": 3.5, "learning_rate": 7.555559987025878e-06, "loss": 1.7298, "step": 3360 }, { "epoch": 0.9466872977915318, "grad_norm": 3.3125, "learning_rate": 7.553307458441755e-06, "loss": 1.7833, "step": 3365 }, { "epoch": 0.9480939653959769, "grad_norm": 3.078125, "learning_rate": 7.551049573620521e-06, "loss": 1.7445, "step": 3370 }, { "epoch": 0.949500633000422, "grad_norm": 3.734375, "learning_rate": 7.5487863359656994e-06, "loss": 1.3276, "step": 3375 }, { "epoch": 0.9509073006048671, "grad_norm": 3.140625, "learning_rate": 7.546517748888882e-06, "loss": 1.7944, "step": 3380 }, { "epoch": 0.9523139682093121, "grad_norm": 2.578125, "learning_rate": 7.544243815809729e-06, "loss": 1.6007, "step": 3385 }, { "epoch": 0.9537206358137572, "grad_norm": 2.953125, "learning_rate": 7.54196454015595e-06, "loss": 1.6207, "step": 3390 }, { "epoch": 0.9551273034182023, "grad_norm": 2.78125, "learning_rate": 7.539679925363316e-06, "loss": 1.534, "step": 3395 }, { "epoch": 0.9565339710226474, "grad_norm": 4.40625, "learning_rate": 7.5373899748756435e-06, "loss": 1.5586, "step": 3400 }, { "epoch": 0.9579406386270924, "grad_norm": 4.59375, "learning_rate": 7.53509469214479e-06, "loss": 1.3578, "step": 3405 }, { "epoch": 0.9593473062315375, "grad_norm": 4.09375, "learning_rate": 7.532794080630655e-06, "loss": 1.7003, "step": 3410 }, { "epoch": 0.9607539738359826, "grad_norm": 3.15625, "learning_rate": 7.530488143801166e-06, "loss": 1.6732, "step": 3415 }, { "epoch": 0.9621606414404277, "grad_norm": 3.609375, "learning_rate": 7.528176885132283e-06, "loss": 1.675, "step": 3420 }, { "epoch": 0.9635673090448726, "grad_norm": 2.90625, "learning_rate": 7.525860308107983e-06, "loss": 1.6125, "step": 3425 }, { "epoch": 0.9649739766493177, "grad_norm": 2.875, "learning_rate": 7.523538416220264e-06, "loss": 1.621, "step": 3430 }, { "epoch": 0.9663806442537628, "grad_norm": 3.640625, "learning_rate": 7.52121121296913e-06, "loss": 1.7444, "step": 3435 }, { "epoch": 0.9677873118582079, "grad_norm": 3.140625, "learning_rate": 7.518878701862599e-06, "loss": 1.8158, "step": 3440 }, { "epoch": 0.969193979462653, "grad_norm": 3.0, "learning_rate": 7.5165408864166845e-06, "loss": 1.6573, "step": 3445 }, { "epoch": 0.970600647067098, "grad_norm": 2.984375, "learning_rate": 7.514197770155398e-06, "loss": 1.5889, "step": 3450 }, { "epoch": 0.9720073146715431, "grad_norm": 3.0, "learning_rate": 7.511849356610738e-06, "loss": 1.3515, "step": 3455 }, { "epoch": 0.9734139822759882, "grad_norm": 2.96875, "learning_rate": 7.5094956493226955e-06, "loss": 1.6675, "step": 3460 }, { "epoch": 0.9748206498804333, "grad_norm": 3.09375, "learning_rate": 7.507136651839233e-06, "loss": 1.5557, "step": 3465 }, { "epoch": 0.9762273174848783, "grad_norm": 3.0, "learning_rate": 7.504772367716292e-06, "loss": 1.4733, "step": 3470 }, { "epoch": 0.9776339850893234, "grad_norm": 3.09375, "learning_rate": 7.5024028005177814e-06, "loss": 1.6102, "step": 3475 }, { "epoch": 0.9790406526937685, "grad_norm": 2.890625, "learning_rate": 7.500027953815577e-06, "loss": 1.5866, "step": 3480 }, { "epoch": 0.9804473202982136, "grad_norm": 3.3125, "learning_rate": 7.497647831189506e-06, "loss": 1.5003, "step": 3485 }, { "epoch": 0.9818539879026587, "grad_norm": 2.78125, "learning_rate": 7.495262436227356e-06, "loss": 1.6196, "step": 3490 }, { "epoch": 0.9832606555071036, "grad_norm": 3.828125, "learning_rate": 7.492871772524859e-06, "loss": 1.6242, "step": 3495 }, { "epoch": 0.9846673231115487, "grad_norm": 4.6875, "learning_rate": 7.490475843685686e-06, "loss": 1.635, "step": 3500 }, { "epoch": 0.9860739907159938, "grad_norm": 3.25, "learning_rate": 7.488074653321452e-06, "loss": 1.5958, "step": 3505 }, { "epoch": 0.9874806583204389, "grad_norm": 2.8125, "learning_rate": 7.485668205051696e-06, "loss": 1.5429, "step": 3510 }, { "epoch": 0.9888873259248839, "grad_norm": 2.59375, "learning_rate": 7.4832565025038855e-06, "loss": 1.8323, "step": 3515 }, { "epoch": 0.990293993529329, "grad_norm": 2.921875, "learning_rate": 7.480839549313409e-06, "loss": 1.6328, "step": 3520 }, { "epoch": 0.9917006611337741, "grad_norm": 2.453125, "learning_rate": 7.478417349123569e-06, "loss": 1.6957, "step": 3525 }, { "epoch": 0.9931073287382192, "grad_norm": 4.28125, "learning_rate": 7.475989905585578e-06, "loss": 1.4479, "step": 3530 }, { "epoch": 0.9945139963426642, "grad_norm": 2.640625, "learning_rate": 7.473557222358551e-06, "loss": 1.5085, "step": 3535 }, { "epoch": 0.9959206639471093, "grad_norm": 4.375, "learning_rate": 7.471119303109502e-06, "loss": 1.5398, "step": 3540 }, { "epoch": 0.9973273315515544, "grad_norm": 2.234375, "learning_rate": 7.468676151513339e-06, "loss": 1.503, "step": 3545 }, { "epoch": 0.9987339991559995, "grad_norm": 3.796875, "learning_rate": 7.4662277712528536e-06, "loss": 1.6194, "step": 3550 }, { "epoch": 0.9998593332395554, "eval_loss": 1.611290454864502, "eval_runtime": 329.7515, "eval_samples_per_second": 9.577, "eval_steps_per_second": 4.788, "step": 3554 }, { "epoch": 1.0001406667604444, "grad_norm": 4.03125, "learning_rate": 7.463774166018723e-06, "loss": 1.6976, "step": 3555 }, { "epoch": 1.0015473343648895, "grad_norm": 3.25, "learning_rate": 7.461315339509499e-06, "loss": 1.6923, "step": 3560 }, { "epoch": 1.0029540019693346, "grad_norm": 2.90625, "learning_rate": 7.458851295431601e-06, "loss": 1.5505, "step": 3565 }, { "epoch": 1.0043606695737797, "grad_norm": 3.140625, "learning_rate": 7.456382037499322e-06, "loss": 1.5004, "step": 3570 }, { "epoch": 1.0057673371782248, "grad_norm": 3.234375, "learning_rate": 7.453907569434804e-06, "loss": 1.5441, "step": 3575 }, { "epoch": 1.0071740047826698, "grad_norm": 3.1875, "learning_rate": 7.451427894968049e-06, "loss": 1.4524, "step": 3580 }, { "epoch": 1.008580672387115, "grad_norm": 3.15625, "learning_rate": 7.448943017836903e-06, "loss": 1.5271, "step": 3585 }, { "epoch": 1.00998733999156, "grad_norm": 3.03125, "learning_rate": 7.44645294178706e-06, "loss": 1.5947, "step": 3590 }, { "epoch": 1.011394007596005, "grad_norm": 2.171875, "learning_rate": 7.443957670572046e-06, "loss": 1.7221, "step": 3595 }, { "epoch": 1.0128006752004501, "grad_norm": 4.1875, "learning_rate": 7.4414572079532205e-06, "loss": 1.538, "step": 3600 }, { "epoch": 1.0142073428048952, "grad_norm": 3.296875, "learning_rate": 7.438951557699767e-06, "loss": 1.4715, "step": 3605 }, { "epoch": 1.0156140104093403, "grad_norm": 3.171875, "learning_rate": 7.436440723588688e-06, "loss": 1.5015, "step": 3610 }, { "epoch": 1.0170206780137854, "grad_norm": 3.640625, "learning_rate": 7.433924709404806e-06, "loss": 1.5526, "step": 3615 }, { "epoch": 1.0184273456182305, "grad_norm": 3.9375, "learning_rate": 7.4314035189407436e-06, "loss": 1.2446, "step": 3620 }, { "epoch": 1.0198340132226755, "grad_norm": 3.734375, "learning_rate": 7.428877155996934e-06, "loss": 1.6687, "step": 3625 }, { "epoch": 1.0212406808271206, "grad_norm": 2.09375, "learning_rate": 7.4263456243816e-06, "loss": 1.3963, "step": 3630 }, { "epoch": 1.0226473484315657, "grad_norm": 2.6875, "learning_rate": 7.42380892791076e-06, "loss": 1.606, "step": 3635 }, { "epoch": 1.0240540160360108, "grad_norm": 2.734375, "learning_rate": 7.421267070408218e-06, "loss": 1.6143, "step": 3640 }, { "epoch": 1.0254606836404558, "grad_norm": 2.96875, "learning_rate": 7.418720055705556e-06, "loss": 1.6649, "step": 3645 }, { "epoch": 1.026867351244901, "grad_norm": 2.296875, "learning_rate": 7.416167887642132e-06, "loss": 1.6411, "step": 3650 }, { "epoch": 1.028274018849346, "grad_norm": 4.28125, "learning_rate": 7.413610570065069e-06, "loss": 1.7296, "step": 3655 }, { "epoch": 1.029680686453791, "grad_norm": 2.96875, "learning_rate": 7.411048106829253e-06, "loss": 1.6742, "step": 3660 }, { "epoch": 1.031087354058236, "grad_norm": 3.40625, "learning_rate": 7.408480501797333e-06, "loss": 1.4126, "step": 3665 }, { "epoch": 1.032494021662681, "grad_norm": 5.0625, "learning_rate": 7.405907758839698e-06, "loss": 1.6467, "step": 3670 }, { "epoch": 1.033900689267126, "grad_norm": 4.46875, "learning_rate": 7.403329881834489e-06, "loss": 1.4774, "step": 3675 }, { "epoch": 1.0353073568715712, "grad_norm": 3.140625, "learning_rate": 7.400746874667586e-06, "loss": 1.6924, "step": 3680 }, { "epoch": 1.0367140244760162, "grad_norm": 3.375, "learning_rate": 7.398158741232598e-06, "loss": 1.635, "step": 3685 }, { "epoch": 1.0381206920804613, "grad_norm": 5.84375, "learning_rate": 7.395565485430866e-06, "loss": 1.4753, "step": 3690 }, { "epoch": 1.0395273596849064, "grad_norm": 3.484375, "learning_rate": 7.392967111171448e-06, "loss": 1.4399, "step": 3695 }, { "epoch": 1.0409340272893515, "grad_norm": 3.234375, "learning_rate": 7.390363622371122e-06, "loss": 1.6359, "step": 3700 }, { "epoch": 1.0423406948937965, "grad_norm": 3.03125, "learning_rate": 7.387755022954373e-06, "loss": 1.6028, "step": 3705 }, { "epoch": 1.0437473624982416, "grad_norm": 2.359375, "learning_rate": 7.385141316853388e-06, "loss": 1.4053, "step": 3710 }, { "epoch": 1.0451540301026867, "grad_norm": 2.875, "learning_rate": 7.382522508008056e-06, "loss": 1.5238, "step": 3715 }, { "epoch": 1.0465606977071318, "grad_norm": 2.15625, "learning_rate": 7.379898600365956e-06, "loss": 1.7097, "step": 3720 }, { "epoch": 1.0479673653115769, "grad_norm": 2.8125, "learning_rate": 7.377269597882351e-06, "loss": 1.5788, "step": 3725 }, { "epoch": 1.049374032916022, "grad_norm": 2.3125, "learning_rate": 7.374635504520186e-06, "loss": 1.6983, "step": 3730 }, { "epoch": 1.050780700520467, "grad_norm": 4.15625, "learning_rate": 7.371996324250083e-06, "loss": 1.5622, "step": 3735 }, { "epoch": 1.052187368124912, "grad_norm": 3.5625, "learning_rate": 7.369352061050324e-06, "loss": 1.4207, "step": 3740 }, { "epoch": 1.0535940357293572, "grad_norm": 2.3125, "learning_rate": 7.366702718906859e-06, "loss": 1.5624, "step": 3745 }, { "epoch": 1.0550007033338022, "grad_norm": 3.75, "learning_rate": 7.364048301813293e-06, "loss": 1.3799, "step": 3750 }, { "epoch": 1.0564073709382473, "grad_norm": 3.65625, "learning_rate": 7.361388813770881e-06, "loss": 1.781, "step": 3755 }, { "epoch": 1.0578140385426924, "grad_norm": 3.96875, "learning_rate": 7.35872425878852e-06, "loss": 1.5601, "step": 3760 }, { "epoch": 1.0592207061471375, "grad_norm": 2.71875, "learning_rate": 7.356054640882747e-06, "loss": 1.5782, "step": 3765 }, { "epoch": 1.0606273737515826, "grad_norm": 3.40625, "learning_rate": 7.35337996407773e-06, "loss": 1.7065, "step": 3770 }, { "epoch": 1.0620340413560276, "grad_norm": 3.5, "learning_rate": 7.350700232405263e-06, "loss": 1.7547, "step": 3775 }, { "epoch": 1.0634407089604727, "grad_norm": 3.03125, "learning_rate": 7.3480154499047585e-06, "loss": 1.797, "step": 3780 }, { "epoch": 1.0648473765649178, "grad_norm": 4.0625, "learning_rate": 7.345325620623246e-06, "loss": 1.6371, "step": 3785 }, { "epoch": 1.0662540441693629, "grad_norm": 3.328125, "learning_rate": 7.3426307486153575e-06, "loss": 1.6329, "step": 3790 }, { "epoch": 1.067660711773808, "grad_norm": 2.625, "learning_rate": 7.339930837943331e-06, "loss": 1.5983, "step": 3795 }, { "epoch": 1.0690673793782528, "grad_norm": 3.03125, "learning_rate": 7.337225892676997e-06, "loss": 1.6337, "step": 3800 }, { "epoch": 1.070474046982698, "grad_norm": 2.640625, "learning_rate": 7.334515916893774e-06, "loss": 1.6965, "step": 3805 }, { "epoch": 1.071880714587143, "grad_norm": 3.421875, "learning_rate": 7.3318009146786695e-06, "loss": 1.5862, "step": 3810 }, { "epoch": 1.073287382191588, "grad_norm": 3.25, "learning_rate": 7.3290808901242595e-06, "loss": 1.5898, "step": 3815 }, { "epoch": 1.0746940497960331, "grad_norm": 3.359375, "learning_rate": 7.326355847330698e-06, "loss": 1.5154, "step": 3820 }, { "epoch": 1.0761007174004782, "grad_norm": 2.9375, "learning_rate": 7.323625790405698e-06, "loss": 1.6193, "step": 3825 }, { "epoch": 1.0775073850049233, "grad_norm": 2.65625, "learning_rate": 7.320890723464535e-06, "loss": 1.7274, "step": 3830 }, { "epoch": 1.0789140526093683, "grad_norm": 3.765625, "learning_rate": 7.3181506506300324e-06, "loss": 1.418, "step": 3835 }, { "epoch": 1.0803207202138134, "grad_norm": 3.140625, "learning_rate": 7.315405576032563e-06, "loss": 1.6552, "step": 3840 }, { "epoch": 1.0817273878182585, "grad_norm": 4.65625, "learning_rate": 7.3126555038100374e-06, "loss": 1.5523, "step": 3845 }, { "epoch": 1.0831340554227036, "grad_norm": 3.359375, "learning_rate": 7.3099004381079e-06, "loss": 1.4922, "step": 3850 }, { "epoch": 1.0845407230271487, "grad_norm": 2.9375, "learning_rate": 7.307140383079125e-06, "loss": 1.5572, "step": 3855 }, { "epoch": 1.0859473906315937, "grad_norm": 2.953125, "learning_rate": 7.304375342884201e-06, "loss": 1.4289, "step": 3860 }, { "epoch": 1.0873540582360388, "grad_norm": 2.8125, "learning_rate": 7.301605321691138e-06, "loss": 1.7568, "step": 3865 }, { "epoch": 1.088760725840484, "grad_norm": 3.125, "learning_rate": 7.2988303236754515e-06, "loss": 1.4492, "step": 3870 }, { "epoch": 1.090167393444929, "grad_norm": 3.125, "learning_rate": 7.296050353020156e-06, "loss": 1.8061, "step": 3875 }, { "epoch": 1.091574061049374, "grad_norm": 3.03125, "learning_rate": 7.293265413915767e-06, "loss": 1.775, "step": 3880 }, { "epoch": 1.0929807286538191, "grad_norm": 2.828125, "learning_rate": 7.290475510560288e-06, "loss": 1.6663, "step": 3885 }, { "epoch": 1.0943873962582642, "grad_norm": 2.75, "learning_rate": 7.287680647159202e-06, "loss": 1.6116, "step": 3890 }, { "epoch": 1.0957940638627093, "grad_norm": 3.015625, "learning_rate": 7.2848808279254745e-06, "loss": 1.4655, "step": 3895 }, { "epoch": 1.0972007314671544, "grad_norm": 2.71875, "learning_rate": 7.282076057079537e-06, "loss": 1.7702, "step": 3900 }, { "epoch": 1.0986073990715994, "grad_norm": 3.171875, "learning_rate": 7.2792663388492865e-06, "loss": 1.4311, "step": 3905 }, { "epoch": 1.1000140666760445, "grad_norm": 3.28125, "learning_rate": 7.2764516774700775e-06, "loss": 1.6944, "step": 3910 }, { "epoch": 1.1014207342804896, "grad_norm": 3.015625, "learning_rate": 7.273632077184716e-06, "loss": 1.7071, "step": 3915 }, { "epoch": 1.1028274018849347, "grad_norm": 2.609375, "learning_rate": 7.270807542243453e-06, "loss": 1.5593, "step": 3920 }, { "epoch": 1.1042340694893797, "grad_norm": 2.765625, "learning_rate": 7.2679780769039775e-06, "loss": 1.5341, "step": 3925 }, { "epoch": 1.1056407370938248, "grad_norm": 2.34375, "learning_rate": 7.26514368543141e-06, "loss": 1.8932, "step": 3930 }, { "epoch": 1.10704740469827, "grad_norm": 3.203125, "learning_rate": 7.262304372098299e-06, "loss": 1.7494, "step": 3935 }, { "epoch": 1.108454072302715, "grad_norm": 4.0625, "learning_rate": 7.259460141184609e-06, "loss": 1.6125, "step": 3940 }, { "epoch": 1.1098607399071598, "grad_norm": 2.515625, "learning_rate": 7.25661099697772e-06, "loss": 1.7168, "step": 3945 }, { "epoch": 1.111267407511605, "grad_norm": 2.734375, "learning_rate": 7.253756943772416e-06, "loss": 1.7773, "step": 3950 }, { "epoch": 1.11267407511605, "grad_norm": 2.859375, "learning_rate": 7.250897985870884e-06, "loss": 1.6824, "step": 3955 }, { "epoch": 1.114080742720495, "grad_norm": 2.625, "learning_rate": 7.248034127582698e-06, "loss": 1.6164, "step": 3960 }, { "epoch": 1.1154874103249401, "grad_norm": 2.984375, "learning_rate": 7.245165373224829e-06, "loss": 1.6013, "step": 3965 }, { "epoch": 1.1168940779293852, "grad_norm": 3.953125, "learning_rate": 7.242291727121617e-06, "loss": 1.6286, "step": 3970 }, { "epoch": 1.1183007455338303, "grad_norm": 2.859375, "learning_rate": 7.2394131936047845e-06, "loss": 1.5001, "step": 3975 }, { "epoch": 1.1197074131382754, "grad_norm": 3.109375, "learning_rate": 7.236529777013416e-06, "loss": 1.5944, "step": 3980 }, { "epoch": 1.1211140807427205, "grad_norm": 3.140625, "learning_rate": 7.233641481693959e-06, "loss": 1.5331, "step": 3985 }, { "epoch": 1.1225207483471655, "grad_norm": 3.140625, "learning_rate": 7.230748312000216e-06, "loss": 1.772, "step": 3990 }, { "epoch": 1.1239274159516106, "grad_norm": 4.5625, "learning_rate": 7.227850272293334e-06, "loss": 1.5381, "step": 3995 }, { "epoch": 1.1253340835560557, "grad_norm": 3.140625, "learning_rate": 7.224947366941805e-06, "loss": 1.7734, "step": 4000 }, { "epoch": 1.1267407511605008, "grad_norm": 2.390625, "learning_rate": 7.2220396003214525e-06, "loss": 1.703, "step": 4005 }, { "epoch": 1.1281474187649458, "grad_norm": 2.734375, "learning_rate": 7.219126976815427e-06, "loss": 1.3594, "step": 4010 }, { "epoch": 1.129554086369391, "grad_norm": 2.90625, "learning_rate": 7.216209500814205e-06, "loss": 1.3872, "step": 4015 }, { "epoch": 1.130960753973836, "grad_norm": 3.296875, "learning_rate": 7.213287176715571e-06, "loss": 1.5567, "step": 4020 }, { "epoch": 1.132367421578281, "grad_norm": 2.703125, "learning_rate": 7.210360008924625e-06, "loss": 1.5977, "step": 4025 }, { "epoch": 1.1337740891827262, "grad_norm": 3.375, "learning_rate": 7.207428001853762e-06, "loss": 1.7796, "step": 4030 }, { "epoch": 1.1351807567871712, "grad_norm": 2.703125, "learning_rate": 7.204491159922675e-06, "loss": 1.7479, "step": 4035 }, { "epoch": 1.1365874243916163, "grad_norm": 3.59375, "learning_rate": 7.201549487558344e-06, "loss": 1.5466, "step": 4040 }, { "epoch": 1.1379940919960614, "grad_norm": 2.375, "learning_rate": 7.198602989195029e-06, "loss": 1.5066, "step": 4045 }, { "epoch": 1.1394007596005065, "grad_norm": 4.78125, "learning_rate": 7.19565166927427e-06, "loss": 1.3287, "step": 4050 }, { "epoch": 1.1408074272049515, "grad_norm": 3.34375, "learning_rate": 7.192695532244867e-06, "loss": 1.4833, "step": 4055 }, { "epoch": 1.1422140948093966, "grad_norm": 2.59375, "learning_rate": 7.1897345825628875e-06, "loss": 1.5049, "step": 4060 }, { "epoch": 1.1436207624138417, "grad_norm": 3.359375, "learning_rate": 7.186768824691652e-06, "loss": 1.7101, "step": 4065 }, { "epoch": 1.1450274300182868, "grad_norm": 2.9375, "learning_rate": 7.183798263101729e-06, "loss": 1.6311, "step": 4070 }, { "epoch": 1.1464340976227319, "grad_norm": 4.3125, "learning_rate": 7.180822902270926e-06, "loss": 1.2967, "step": 4075 }, { "epoch": 1.1478407652271767, "grad_norm": 2.921875, "learning_rate": 7.177842746684287e-06, "loss": 1.3902, "step": 4080 }, { "epoch": 1.149247432831622, "grad_norm": 2.796875, "learning_rate": 7.174857800834083e-06, "loss": 1.5712, "step": 4085 }, { "epoch": 1.1506541004360669, "grad_norm": 3.546875, "learning_rate": 7.171868069219804e-06, "loss": 1.5564, "step": 4090 }, { "epoch": 1.152060768040512, "grad_norm": 2.953125, "learning_rate": 7.16887355634816e-06, "loss": 1.435, "step": 4095 }, { "epoch": 1.153467435644957, "grad_norm": 3.875, "learning_rate": 7.16587426673306e-06, "loss": 1.6016, "step": 4100 }, { "epoch": 1.154874103249402, "grad_norm": 3.5625, "learning_rate": 7.16287020489562e-06, "loss": 1.6038, "step": 4105 }, { "epoch": 1.1562807708538472, "grad_norm": 3.6875, "learning_rate": 7.159861375364146e-06, "loss": 1.5753, "step": 4110 }, { "epoch": 1.1576874384582923, "grad_norm": 4.5, "learning_rate": 7.156847782674132e-06, "loss": 1.6043, "step": 4115 }, { "epoch": 1.1590941060627373, "grad_norm": 2.9375, "learning_rate": 7.153829431368252e-06, "loss": 1.5679, "step": 4120 }, { "epoch": 1.1605007736671824, "grad_norm": 3.3125, "learning_rate": 7.150806325996354e-06, "loss": 1.5646, "step": 4125 }, { "epoch": 1.1619074412716275, "grad_norm": 3.078125, "learning_rate": 7.147778471115449e-06, "loss": 1.693, "step": 4130 }, { "epoch": 1.1633141088760726, "grad_norm": 3.609375, "learning_rate": 7.144745871289711e-06, "loss": 1.6215, "step": 4135 }, { "epoch": 1.1647207764805176, "grad_norm": 4.15625, "learning_rate": 7.141708531090467e-06, "loss": 1.5527, "step": 4140 }, { "epoch": 1.1661274440849627, "grad_norm": 2.921875, "learning_rate": 7.138666455096183e-06, "loss": 1.8448, "step": 4145 }, { "epoch": 1.1675341116894078, "grad_norm": 4.34375, "learning_rate": 7.1356196478924734e-06, "loss": 1.7495, "step": 4150 }, { "epoch": 1.1689407792938529, "grad_norm": 3.625, "learning_rate": 7.132568114072077e-06, "loss": 1.4359, "step": 4155 }, { "epoch": 1.170347446898298, "grad_norm": 4.0625, "learning_rate": 7.12951185823486e-06, "loss": 1.6285, "step": 4160 }, { "epoch": 1.171754114502743, "grad_norm": 2.671875, "learning_rate": 7.126450884987807e-06, "loss": 1.7377, "step": 4165 }, { "epoch": 1.173160782107188, "grad_norm": 2.609375, "learning_rate": 7.123385198945012e-06, "loss": 1.7262, "step": 4170 }, { "epoch": 1.1745674497116332, "grad_norm": 3.375, "learning_rate": 7.120314804727676e-06, "loss": 1.5923, "step": 4175 }, { "epoch": 1.1759741173160783, "grad_norm": 3.09375, "learning_rate": 7.117239706964094e-06, "loss": 1.7389, "step": 4180 }, { "epoch": 1.1773807849205233, "grad_norm": 3.125, "learning_rate": 7.114159910289652e-06, "loss": 1.7112, "step": 4185 }, { "epoch": 1.1787874525249684, "grad_norm": 3.21875, "learning_rate": 7.111075419346821e-06, "loss": 1.7166, "step": 4190 }, { "epoch": 1.1801941201294135, "grad_norm": 2.859375, "learning_rate": 7.107986238785145e-06, "loss": 1.4257, "step": 4195 }, { "epoch": 1.1816007877338586, "grad_norm": 3.421875, "learning_rate": 7.10489237326124e-06, "loss": 1.6122, "step": 4200 }, { "epoch": 1.1830074553383036, "grad_norm": 9.8125, "learning_rate": 7.101793827438781e-06, "loss": 1.415, "step": 4205 }, { "epoch": 1.1844141229427487, "grad_norm": 3.046875, "learning_rate": 7.098690605988501e-06, "loss": 1.5566, "step": 4210 }, { "epoch": 1.1858207905471936, "grad_norm": 2.3125, "learning_rate": 7.095582713588179e-06, "loss": 1.3807, "step": 4215 }, { "epoch": 1.1872274581516389, "grad_norm": 3.265625, "learning_rate": 7.092470154922638e-06, "loss": 1.7086, "step": 4220 }, { "epoch": 1.1886341257560837, "grad_norm": 2.296875, "learning_rate": 7.089352934683729e-06, "loss": 1.5553, "step": 4225 }, { "epoch": 1.1900407933605288, "grad_norm": 3.40625, "learning_rate": 7.086231057570337e-06, "loss": 1.3568, "step": 4230 }, { "epoch": 1.191447460964974, "grad_norm": 3.765625, "learning_rate": 7.083104528288361e-06, "loss": 1.5711, "step": 4235 }, { "epoch": 1.192854128569419, "grad_norm": 2.609375, "learning_rate": 7.079973351550716e-06, "loss": 1.5155, "step": 4240 }, { "epoch": 1.194260796173864, "grad_norm": 3.578125, "learning_rate": 7.076837532077321e-06, "loss": 1.5077, "step": 4245 }, { "epoch": 1.1956674637783091, "grad_norm": 3.15625, "learning_rate": 7.073697074595095e-06, "loss": 1.5776, "step": 4250 }, { "epoch": 1.1970741313827542, "grad_norm": 4.25, "learning_rate": 7.070551983837945e-06, "loss": 1.5348, "step": 4255 }, { "epoch": 1.1984807989871993, "grad_norm": 2.90625, "learning_rate": 7.067402264546766e-06, "loss": 1.7168, "step": 4260 }, { "epoch": 1.1998874665916444, "grad_norm": 3.875, "learning_rate": 7.064247921469429e-06, "loss": 1.6378, "step": 4265 }, { "epoch": 1.2012941341960894, "grad_norm": 2.765625, "learning_rate": 7.061088959360772e-06, "loss": 1.4526, "step": 4270 }, { "epoch": 1.2027008018005345, "grad_norm": 3.609375, "learning_rate": 7.0579253829826e-06, "loss": 1.6888, "step": 4275 }, { "epoch": 1.2041074694049796, "grad_norm": 4.0625, "learning_rate": 7.05475719710367e-06, "loss": 1.6771, "step": 4280 }, { "epoch": 1.2055141370094247, "grad_norm": 3.125, "learning_rate": 7.051584406499691e-06, "loss": 1.7519, "step": 4285 }, { "epoch": 1.2069208046138697, "grad_norm": 2.703125, "learning_rate": 7.048407015953309e-06, "loss": 1.6397, "step": 4290 }, { "epoch": 1.2083274722183148, "grad_norm": 4.75, "learning_rate": 7.045225030254107e-06, "loss": 1.6319, "step": 4295 }, { "epoch": 1.20973413982276, "grad_norm": 3.296875, "learning_rate": 7.042038454198593e-06, "loss": 1.5327, "step": 4300 }, { "epoch": 1.211140807427205, "grad_norm": 4.8125, "learning_rate": 7.038847292590196e-06, "loss": 1.4904, "step": 4305 }, { "epoch": 1.21254747503165, "grad_norm": 3.78125, "learning_rate": 7.0356515502392555e-06, "loss": 1.4568, "step": 4310 }, { "epoch": 1.2139541426360951, "grad_norm": 3.625, "learning_rate": 7.032451231963016e-06, "loss": 1.5869, "step": 4315 }, { "epoch": 1.2153608102405402, "grad_norm": 3.78125, "learning_rate": 7.0292463425856235e-06, "loss": 1.6826, "step": 4320 }, { "epoch": 1.2167674778449853, "grad_norm": 3.328125, "learning_rate": 7.026036886938108e-06, "loss": 1.5787, "step": 4325 }, { "epoch": 1.2181741454494304, "grad_norm": 4.34375, "learning_rate": 7.022822869858389e-06, "loss": 1.575, "step": 4330 }, { "epoch": 1.2195808130538754, "grad_norm": 2.703125, "learning_rate": 7.0196042961912575e-06, "loss": 1.5395, "step": 4335 }, { "epoch": 1.2209874806583205, "grad_norm": 2.421875, "learning_rate": 7.016381170788375e-06, "loss": 1.6192, "step": 4340 }, { "epoch": 1.2223941482627656, "grad_norm": 2.84375, "learning_rate": 7.013153498508263e-06, "loss": 1.6263, "step": 4345 }, { "epoch": 1.2238008158672105, "grad_norm": 4.34375, "learning_rate": 7.009921284216299e-06, "loss": 1.4436, "step": 4350 }, { "epoch": 1.2252074834716558, "grad_norm": 3.4375, "learning_rate": 7.006684532784707e-06, "loss": 1.425, "step": 4355 }, { "epoch": 1.2266141510761006, "grad_norm": 4.375, "learning_rate": 7.003443249092547e-06, "loss": 1.3797, "step": 4360 }, { "epoch": 1.228020818680546, "grad_norm": 6.65625, "learning_rate": 7.000197438025715e-06, "loss": 1.7619, "step": 4365 }, { "epoch": 1.2294274862849908, "grad_norm": 2.75, "learning_rate": 6.9969471044769275e-06, "loss": 1.6214, "step": 4370 }, { "epoch": 1.2308341538894358, "grad_norm": 2.578125, "learning_rate": 6.993692253345722e-06, "loss": 1.7026, "step": 4375 }, { "epoch": 1.232240821493881, "grad_norm": 3.140625, "learning_rate": 6.990432889538444e-06, "loss": 1.5261, "step": 4380 }, { "epoch": 1.233647489098326, "grad_norm": 3.78125, "learning_rate": 6.98716901796824e-06, "loss": 1.6586, "step": 4385 }, { "epoch": 1.235054156702771, "grad_norm": 3.65625, "learning_rate": 6.983900643555056e-06, "loss": 1.5024, "step": 4390 }, { "epoch": 1.2364608243072162, "grad_norm": 3.875, "learning_rate": 6.980627771225618e-06, "loss": 1.5133, "step": 4395 }, { "epoch": 1.2378674919116612, "grad_norm": 2.6875, "learning_rate": 6.977350405913442e-06, "loss": 1.4485, "step": 4400 }, { "epoch": 1.2392741595161063, "grad_norm": 3.46875, "learning_rate": 6.974068552558806e-06, "loss": 1.6037, "step": 4405 }, { "epoch": 1.2406808271205514, "grad_norm": 2.640625, "learning_rate": 6.970782216108764e-06, "loss": 1.833, "step": 4410 }, { "epoch": 1.2420874947249965, "grad_norm": 2.890625, "learning_rate": 6.967491401517118e-06, "loss": 1.6397, "step": 4415 }, { "epoch": 1.2434941623294415, "grad_norm": 2.984375, "learning_rate": 6.964196113744427e-06, "loss": 1.6655, "step": 4420 }, { "epoch": 1.2449008299338866, "grad_norm": 2.46875, "learning_rate": 6.960896357757989e-06, "loss": 1.588, "step": 4425 }, { "epoch": 1.2463074975383317, "grad_norm": 3.03125, "learning_rate": 6.957592138531841e-06, "loss": 1.7372, "step": 4430 }, { "epoch": 1.2477141651427768, "grad_norm": 3.375, "learning_rate": 6.954283461046744e-06, "loss": 1.459, "step": 4435 }, { "epoch": 1.2491208327472219, "grad_norm": 2.828125, "learning_rate": 6.950970330290182e-06, "loss": 1.5329, "step": 4440 }, { "epoch": 1.250527500351667, "grad_norm": 4.0, "learning_rate": 6.947652751256351e-06, "loss": 1.6937, "step": 4445 }, { "epoch": 1.251934167956112, "grad_norm": 3.640625, "learning_rate": 6.944330728946153e-06, "loss": 1.5424, "step": 4450 }, { "epoch": 1.253340835560557, "grad_norm": 2.765625, "learning_rate": 6.941004268367185e-06, "loss": 1.4694, "step": 4455 }, { "epoch": 1.2547475031650022, "grad_norm": 3.140625, "learning_rate": 6.937673374533738e-06, "loss": 1.5039, "step": 4460 }, { "epoch": 1.2561541707694472, "grad_norm": 2.71875, "learning_rate": 6.934338052466785e-06, "loss": 1.5262, "step": 4465 }, { "epoch": 1.2575608383738923, "grad_norm": 4.3125, "learning_rate": 6.93099830719397e-06, "loss": 1.5302, "step": 4470 }, { "epoch": 1.2589675059783374, "grad_norm": 3.15625, "learning_rate": 6.92765414374961e-06, "loss": 1.6213, "step": 4475 }, { "epoch": 1.2603741735827825, "grad_norm": 4.25, "learning_rate": 6.924305567174678e-06, "loss": 1.4612, "step": 4480 }, { "epoch": 1.2617808411872273, "grad_norm": 3.75, "learning_rate": 6.920952582516802e-06, "loss": 1.5592, "step": 4485 }, { "epoch": 1.2631875087916726, "grad_norm": 5.59375, "learning_rate": 6.917595194830253e-06, "loss": 1.709, "step": 4490 }, { "epoch": 1.2645941763961175, "grad_norm": 3.3125, "learning_rate": 6.91423340917594e-06, "loss": 1.4438, "step": 4495 }, { "epoch": 1.2660008440005628, "grad_norm": 2.859375, "learning_rate": 6.9108672306214e-06, "loss": 1.9044, "step": 4500 }, { "epoch": 1.2674075116050076, "grad_norm": 2.671875, "learning_rate": 6.907496664240796e-06, "loss": 1.6878, "step": 4505 }, { "epoch": 1.268814179209453, "grad_norm": 3.84375, "learning_rate": 6.9041217151149e-06, "loss": 1.529, "step": 4510 }, { "epoch": 1.2702208468138978, "grad_norm": 2.984375, "learning_rate": 6.900742388331091e-06, "loss": 1.5458, "step": 4515 }, { "epoch": 1.2716275144183429, "grad_norm": 3.5, "learning_rate": 6.897358688983351e-06, "loss": 1.5387, "step": 4520 }, { "epoch": 1.273034182022788, "grad_norm": 3.140625, "learning_rate": 6.893970622172251e-06, "loss": 1.6777, "step": 4525 }, { "epoch": 1.274440849627233, "grad_norm": 3.984375, "learning_rate": 6.890578193004944e-06, "loss": 1.654, "step": 4530 }, { "epoch": 1.275847517231678, "grad_norm": 2.890625, "learning_rate": 6.88718140659516e-06, "loss": 1.7976, "step": 4535 }, { "epoch": 1.2772541848361232, "grad_norm": 2.625, "learning_rate": 6.883780268063198e-06, "loss": 1.4941, "step": 4540 }, { "epoch": 1.2786608524405683, "grad_norm": 3.453125, "learning_rate": 6.880374782535915e-06, "loss": 1.6671, "step": 4545 }, { "epoch": 1.2800675200450133, "grad_norm": 3.953125, "learning_rate": 6.8769649551467235e-06, "loss": 1.3581, "step": 4550 }, { "epoch": 1.2814741876494584, "grad_norm": 2.3125, "learning_rate": 6.87355079103558e-06, "loss": 1.4248, "step": 4555 }, { "epoch": 1.2828808552539035, "grad_norm": 2.25, "learning_rate": 6.8701322953489755e-06, "loss": 1.7387, "step": 4560 }, { "epoch": 1.2842875228583486, "grad_norm": 2.703125, "learning_rate": 6.866709473239932e-06, "loss": 1.6799, "step": 4565 }, { "epoch": 1.2856941904627937, "grad_norm": 3.5625, "learning_rate": 6.8632823298679985e-06, "loss": 1.492, "step": 4570 }, { "epoch": 1.2871008580672387, "grad_norm": 3.5, "learning_rate": 6.859850870399229e-06, "loss": 1.7618, "step": 4575 }, { "epoch": 1.2885075256716838, "grad_norm": 3.40625, "learning_rate": 6.856415100006188e-06, "loss": 1.6899, "step": 4580 }, { "epoch": 1.2899141932761289, "grad_norm": 4.46875, "learning_rate": 6.852975023867939e-06, "loss": 1.3663, "step": 4585 }, { "epoch": 1.291320860880574, "grad_norm": 3.109375, "learning_rate": 6.849530647170033e-06, "loss": 1.5616, "step": 4590 }, { "epoch": 1.292727528485019, "grad_norm": 3.1875, "learning_rate": 6.846081975104507e-06, "loss": 1.8242, "step": 4595 }, { "epoch": 1.2941341960894641, "grad_norm": 3.109375, "learning_rate": 6.842629012869872e-06, "loss": 1.7529, "step": 4600 }, { "epoch": 1.2955408636939092, "grad_norm": 2.734375, "learning_rate": 6.839171765671104e-06, "loss": 1.6587, "step": 4605 }, { "epoch": 1.2969475312983543, "grad_norm": 3.34375, "learning_rate": 6.835710238719638e-06, "loss": 1.7018, "step": 4610 }, { "epoch": 1.2983541989027994, "grad_norm": 2.734375, "learning_rate": 6.832244437233364e-06, "loss": 1.6016, "step": 4615 }, { "epoch": 1.2997608665072442, "grad_norm": 3.578125, "learning_rate": 6.828774366436613e-06, "loss": 1.8004, "step": 4620 }, { "epoch": 1.3011675341116895, "grad_norm": 2.890625, "learning_rate": 6.82530003156015e-06, "loss": 1.5459, "step": 4625 }, { "epoch": 1.3025742017161344, "grad_norm": 3.125, "learning_rate": 6.82182143784117e-06, "loss": 1.3069, "step": 4630 }, { "epoch": 1.3039808693205797, "grad_norm": 3.40625, "learning_rate": 6.818338590523288e-06, "loss": 1.7134, "step": 4635 }, { "epoch": 1.3053875369250245, "grad_norm": 2.421875, "learning_rate": 6.8148514948565275e-06, "loss": 1.3821, "step": 4640 }, { "epoch": 1.3067942045294698, "grad_norm": 4.40625, "learning_rate": 6.81136015609732e-06, "loss": 1.5428, "step": 4645 }, { "epoch": 1.3082008721339147, "grad_norm": 2.1875, "learning_rate": 6.8078645795084925e-06, "loss": 1.8607, "step": 4650 }, { "epoch": 1.3096075397383597, "grad_norm": 6.96875, "learning_rate": 6.804364770359257e-06, "loss": 1.6046, "step": 4655 }, { "epoch": 1.3110142073428048, "grad_norm": 2.921875, "learning_rate": 6.8008607339252075e-06, "loss": 1.6103, "step": 4660 }, { "epoch": 1.31242087494725, "grad_norm": 3.546875, "learning_rate": 6.797352475488311e-06, "loss": 1.3723, "step": 4665 }, { "epoch": 1.313827542551695, "grad_norm": 3.21875, "learning_rate": 6.7938400003368975e-06, "loss": 1.6436, "step": 4670 }, { "epoch": 1.31523421015614, "grad_norm": 2.65625, "learning_rate": 6.790323313765654e-06, "loss": 1.4495, "step": 4675 }, { "epoch": 1.3166408777605851, "grad_norm": 3.296875, "learning_rate": 6.786802421075615e-06, "loss": 1.7251, "step": 4680 }, { "epoch": 1.3180475453650302, "grad_norm": 3.265625, "learning_rate": 6.783277327574156e-06, "loss": 1.6689, "step": 4685 }, { "epoch": 1.3194542129694753, "grad_norm": 4.0, "learning_rate": 6.779748038574986e-06, "loss": 1.821, "step": 4690 }, { "epoch": 1.3208608805739204, "grad_norm": 3.59375, "learning_rate": 6.776214559398134e-06, "loss": 1.7114, "step": 4695 }, { "epoch": 1.3222675481783654, "grad_norm": 2.953125, "learning_rate": 6.772676895369951e-06, "loss": 1.6903, "step": 4700 }, { "epoch": 1.3236742157828105, "grad_norm": 4.15625, "learning_rate": 6.769135051823092e-06, "loss": 1.5268, "step": 4705 }, { "epoch": 1.3250808833872556, "grad_norm": 2.78125, "learning_rate": 6.7655890340965125e-06, "loss": 1.8307, "step": 4710 }, { "epoch": 1.3264875509917007, "grad_norm": 2.84375, "learning_rate": 6.762038847535461e-06, "loss": 1.6079, "step": 4715 }, { "epoch": 1.3278942185961458, "grad_norm": 3.078125, "learning_rate": 6.758484497491473e-06, "loss": 1.5382, "step": 4720 }, { "epoch": 1.3293008862005908, "grad_norm": 4.46875, "learning_rate": 6.754925989322353e-06, "loss": 1.5711, "step": 4725 }, { "epoch": 1.330707553805036, "grad_norm": 3.640625, "learning_rate": 6.751363328392182e-06, "loss": 1.7392, "step": 4730 }, { "epoch": 1.332114221409481, "grad_norm": 3.265625, "learning_rate": 6.747796520071293e-06, "loss": 1.4317, "step": 4735 }, { "epoch": 1.333520889013926, "grad_norm": 3.046875, "learning_rate": 6.744225569736276e-06, "loss": 1.7177, "step": 4740 }, { "epoch": 1.3349275566183711, "grad_norm": 1.96875, "learning_rate": 6.740650482769963e-06, "loss": 1.7749, "step": 4745 }, { "epoch": 1.3363342242228162, "grad_norm": 2.6875, "learning_rate": 6.737071264561421e-06, "loss": 1.6097, "step": 4750 }, { "epoch": 1.337740891827261, "grad_norm": 2.9375, "learning_rate": 6.733487920505945e-06, "loss": 1.8134, "step": 4755 }, { "epoch": 1.3391475594317064, "grad_norm": 3.484375, "learning_rate": 6.729900456005049e-06, "loss": 1.7359, "step": 4760 }, { "epoch": 1.3405542270361512, "grad_norm": 3.140625, "learning_rate": 6.7263088764664575e-06, "loss": 1.5704, "step": 4765 }, { "epoch": 1.3419608946405965, "grad_norm": 4.09375, "learning_rate": 6.7227131873041e-06, "loss": 1.4039, "step": 4770 }, { "epoch": 1.3433675622450414, "grad_norm": 3.4375, "learning_rate": 6.719113393938099e-06, "loss": 1.5993, "step": 4775 }, { "epoch": 1.3447742298494867, "grad_norm": 3.171875, "learning_rate": 6.715509501794763e-06, "loss": 1.7964, "step": 4780 }, { "epoch": 1.3461808974539315, "grad_norm": 2.96875, "learning_rate": 6.711901516306583e-06, "loss": 1.4551, "step": 4785 }, { "epoch": 1.3475875650583766, "grad_norm": 3.1875, "learning_rate": 6.708289442912216e-06, "loss": 1.4375, "step": 4790 }, { "epoch": 1.3489942326628217, "grad_norm": 3.265625, "learning_rate": 6.7046732870564816e-06, "loss": 1.4929, "step": 4795 }, { "epoch": 1.3504009002672668, "grad_norm": 4.5, "learning_rate": 6.7010530541903565e-06, "loss": 1.5064, "step": 4800 }, { "epoch": 1.3518075678717119, "grad_norm": 2.671875, "learning_rate": 6.697428749770958e-06, "loss": 1.6775, "step": 4805 }, { "epoch": 1.353214235476157, "grad_norm": 2.84375, "learning_rate": 6.693800379261546e-06, "loss": 1.6935, "step": 4810 }, { "epoch": 1.354620903080602, "grad_norm": 3.390625, "learning_rate": 6.690167948131506e-06, "loss": 1.5423, "step": 4815 }, { "epoch": 1.356027570685047, "grad_norm": 3.28125, "learning_rate": 6.686531461856345e-06, "loss": 1.644, "step": 4820 }, { "epoch": 1.3574342382894922, "grad_norm": 3.625, "learning_rate": 6.6828909259176865e-06, "loss": 1.7919, "step": 4825 }, { "epoch": 1.3588409058939372, "grad_norm": 3.5, "learning_rate": 6.6792463458032534e-06, "loss": 1.5362, "step": 4830 }, { "epoch": 1.3602475734983823, "grad_norm": 4.46875, "learning_rate": 6.675597727006866e-06, "loss": 1.7212, "step": 4835 }, { "epoch": 1.3616542411028274, "grad_norm": 2.421875, "learning_rate": 6.671945075028434e-06, "loss": 1.6932, "step": 4840 }, { "epoch": 1.3630609087072725, "grad_norm": 2.671875, "learning_rate": 6.668288395373946e-06, "loss": 1.594, "step": 4845 }, { "epoch": 1.3644675763117176, "grad_norm": 2.578125, "learning_rate": 6.664627693555462e-06, "loss": 1.8579, "step": 4850 }, { "epoch": 1.3658742439161626, "grad_norm": 4.09375, "learning_rate": 6.660962975091104e-06, "loss": 1.6345, "step": 4855 }, { "epoch": 1.3672809115206077, "grad_norm": 2.703125, "learning_rate": 6.657294245505051e-06, "loss": 1.6163, "step": 4860 }, { "epoch": 1.3686875791250528, "grad_norm": 5.0625, "learning_rate": 6.653621510327525e-06, "loss": 1.6408, "step": 4865 }, { "epoch": 1.3700942467294979, "grad_norm": 3.578125, "learning_rate": 6.64994477509479e-06, "loss": 1.7848, "step": 4870 }, { "epoch": 1.371500914333943, "grad_norm": 6.28125, "learning_rate": 6.646264045349134e-06, "loss": 1.6636, "step": 4875 }, { "epoch": 1.372907581938388, "grad_norm": 3.515625, "learning_rate": 6.642579326638872e-06, "loss": 1.4139, "step": 4880 }, { "epoch": 1.374314249542833, "grad_norm": 4.125, "learning_rate": 6.638890624518332e-06, "loss": 1.5858, "step": 4885 }, { "epoch": 1.3757209171472782, "grad_norm": 3.4375, "learning_rate": 6.63519794454784e-06, "loss": 1.5255, "step": 4890 }, { "epoch": 1.3771275847517233, "grad_norm": 3.03125, "learning_rate": 6.631501292293725e-06, "loss": 1.7696, "step": 4895 }, { "epoch": 1.378534252356168, "grad_norm": 3.28125, "learning_rate": 6.627800673328302e-06, "loss": 1.6701, "step": 4900 }, { "epoch": 1.3799409199606134, "grad_norm": 3.015625, "learning_rate": 6.624096093229863e-06, "loss": 1.6558, "step": 4905 }, { "epoch": 1.3813475875650583, "grad_norm": 5.09375, "learning_rate": 6.620387557582672e-06, "loss": 1.531, "step": 4910 }, { "epoch": 1.3827542551695036, "grad_norm": 3.4375, "learning_rate": 6.616675071976958e-06, "loss": 1.4897, "step": 4915 }, { "epoch": 1.3841609227739484, "grad_norm": 3.265625, "learning_rate": 6.612958642008904e-06, "loss": 1.6086, "step": 4920 }, { "epoch": 1.3855675903783937, "grad_norm": 3.65625, "learning_rate": 6.609238273280633e-06, "loss": 1.496, "step": 4925 }, { "epoch": 1.3869742579828386, "grad_norm": 3.203125, "learning_rate": 6.605513971400212e-06, "loss": 1.7276, "step": 4930 }, { "epoch": 1.3883809255872837, "grad_norm": 4.46875, "learning_rate": 6.601785741981634e-06, "loss": 1.634, "step": 4935 }, { "epoch": 1.3897875931917287, "grad_norm": 3.796875, "learning_rate": 6.5980535906448114e-06, "loss": 1.5213, "step": 4940 }, { "epoch": 1.3911942607961738, "grad_norm": 4.96875, "learning_rate": 6.594317523015571e-06, "loss": 1.6396, "step": 4945 }, { "epoch": 1.3926009284006189, "grad_norm": 3.40625, "learning_rate": 6.590577544725642e-06, "loss": 1.4338, "step": 4950 }, { "epoch": 1.394007596005064, "grad_norm": 2.59375, "learning_rate": 6.586833661412646e-06, "loss": 1.8134, "step": 4955 }, { "epoch": 1.395414263609509, "grad_norm": 2.859375, "learning_rate": 6.583085878720095e-06, "loss": 1.5643, "step": 4960 }, { "epoch": 1.3968209312139541, "grad_norm": 2.59375, "learning_rate": 6.579334202297376e-06, "loss": 1.5369, "step": 4965 }, { "epoch": 1.3982275988183992, "grad_norm": 3.171875, "learning_rate": 6.575578637799747e-06, "loss": 1.6658, "step": 4970 }, { "epoch": 1.3996342664228443, "grad_norm": 2.109375, "learning_rate": 6.5718191908883265e-06, "loss": 1.7028, "step": 4975 }, { "epoch": 1.4010409340272894, "grad_norm": 3.28125, "learning_rate": 6.568055867230086e-06, "loss": 1.4661, "step": 4980 }, { "epoch": 1.4024476016317344, "grad_norm": 3.890625, "learning_rate": 6.564288672497838e-06, "loss": 1.4148, "step": 4985 }, { "epoch": 1.4038542692361795, "grad_norm": 3.15625, "learning_rate": 6.560517612370232e-06, "loss": 1.4908, "step": 4990 }, { "epoch": 1.4052609368406246, "grad_norm": 3.5, "learning_rate": 6.556742692531747e-06, "loss": 1.7999, "step": 4995 }, { "epoch": 1.4066676044450697, "grad_norm": 3.25, "learning_rate": 6.552963918672675e-06, "loss": 1.5283, "step": 5000 }, { "epoch": 1.4080742720495147, "grad_norm": 3.53125, "learning_rate": 6.549181296489121e-06, "loss": 1.6524, "step": 5005 }, { "epoch": 1.4094809396539598, "grad_norm": 2.59375, "learning_rate": 6.545394831682989e-06, "loss": 1.5902, "step": 5010 }, { "epoch": 1.410887607258405, "grad_norm": 3.015625, "learning_rate": 6.541604529961978e-06, "loss": 1.6948, "step": 5015 }, { "epoch": 1.41229427486285, "grad_norm": 2.84375, "learning_rate": 6.537810397039568e-06, "loss": 1.4649, "step": 5020 }, { "epoch": 1.413700942467295, "grad_norm": 4.0, "learning_rate": 6.534012438635015e-06, "loss": 1.5802, "step": 5025 }, { "epoch": 1.4151076100717401, "grad_norm": 3.296875, "learning_rate": 6.530210660473341e-06, "loss": 1.6675, "step": 5030 }, { "epoch": 1.416514277676185, "grad_norm": 2.921875, "learning_rate": 6.526405068285329e-06, "loss": 1.7445, "step": 5035 }, { "epoch": 1.4179209452806303, "grad_norm": 2.546875, "learning_rate": 6.522595667807506e-06, "loss": 1.6208, "step": 5040 }, { "epoch": 1.4193276128850751, "grad_norm": 4.15625, "learning_rate": 6.518782464782144e-06, "loss": 1.6644, "step": 5045 }, { "epoch": 1.4207342804895204, "grad_norm": 3.890625, "learning_rate": 6.514965464957246e-06, "loss": 1.4335, "step": 5050 }, { "epoch": 1.4221409480939653, "grad_norm": 2.9375, "learning_rate": 6.511144674086536e-06, "loss": 1.5942, "step": 5055 }, { "epoch": 1.4235476156984106, "grad_norm": 3.859375, "learning_rate": 6.507320097929453e-06, "loss": 1.5058, "step": 5060 }, { "epoch": 1.4249542833028555, "grad_norm": 2.765625, "learning_rate": 6.5034917422511465e-06, "loss": 1.6495, "step": 5065 }, { "epoch": 1.4263609509073005, "grad_norm": 3.703125, "learning_rate": 6.499659612822458e-06, "loss": 1.6415, "step": 5070 }, { "epoch": 1.4277676185117456, "grad_norm": 2.890625, "learning_rate": 6.49582371541992e-06, "loss": 1.5523, "step": 5075 }, { "epoch": 1.4291742861161907, "grad_norm": 2.875, "learning_rate": 6.491984055825744e-06, "loss": 1.5582, "step": 5080 }, { "epoch": 1.4305809537206358, "grad_norm": 3.359375, "learning_rate": 6.488140639827812e-06, "loss": 1.4445, "step": 5085 }, { "epoch": 1.4319876213250808, "grad_norm": 3.609375, "learning_rate": 6.484293473219671e-06, "loss": 1.8517, "step": 5090 }, { "epoch": 1.433394288929526, "grad_norm": 3.015625, "learning_rate": 6.480442561800517e-06, "loss": 1.753, "step": 5095 }, { "epoch": 1.434800956533971, "grad_norm": 3.8125, "learning_rate": 6.4765879113751965e-06, "loss": 1.6244, "step": 5100 }, { "epoch": 1.436207624138416, "grad_norm": 3.375, "learning_rate": 6.472729527754188e-06, "loss": 1.6974, "step": 5105 }, { "epoch": 1.4376142917428612, "grad_norm": 3.59375, "learning_rate": 6.4688674167536e-06, "loss": 1.5025, "step": 5110 }, { "epoch": 1.4390209593473062, "grad_norm": 3.046875, "learning_rate": 6.465001584195157e-06, "loss": 1.3674, "step": 5115 }, { "epoch": 1.4404276269517513, "grad_norm": 3.65625, "learning_rate": 6.461132035906196e-06, "loss": 1.627, "step": 5120 }, { "epoch": 1.4418342945561964, "grad_norm": 3.890625, "learning_rate": 6.4572587777196534e-06, "loss": 1.6016, "step": 5125 }, { "epoch": 1.4432409621606415, "grad_norm": 3.1875, "learning_rate": 6.453381815474059e-06, "loss": 1.5537, "step": 5130 }, { "epoch": 1.4446476297650865, "grad_norm": 5.5, "learning_rate": 6.4495011550135245e-06, "loss": 1.7066, "step": 5135 }, { "epoch": 1.4460542973695316, "grad_norm": 5.09375, "learning_rate": 6.4456168021877376e-06, "loss": 1.4199, "step": 5140 }, { "epoch": 1.4474609649739767, "grad_norm": 3.515625, "learning_rate": 6.4417287628519504e-06, "loss": 1.6078, "step": 5145 }, { "epoch": 1.4488676325784218, "grad_norm": 5.9375, "learning_rate": 6.437837042866975e-06, "loss": 1.6382, "step": 5150 }, { "epoch": 1.4502743001828668, "grad_norm": 4.53125, "learning_rate": 6.43394164809917e-06, "loss": 1.7017, "step": 5155 }, { "epoch": 1.451680967787312, "grad_norm": 2.953125, "learning_rate": 6.4300425844204305e-06, "loss": 1.4553, "step": 5160 }, { "epoch": 1.453087635391757, "grad_norm": 4.03125, "learning_rate": 6.426139857708187e-06, "loss": 1.4535, "step": 5165 }, { "epoch": 1.4544943029962019, "grad_norm": 2.8125, "learning_rate": 6.422233473845388e-06, "loss": 1.578, "step": 5170 }, { "epoch": 1.4559009706006472, "grad_norm": 2.875, "learning_rate": 6.418323438720497e-06, "loss": 1.5562, "step": 5175 }, { "epoch": 1.457307638205092, "grad_norm": 3.34375, "learning_rate": 6.414409758227482e-06, "loss": 1.4599, "step": 5180 }, { "epoch": 1.4587143058095373, "grad_norm": 3.875, "learning_rate": 6.4104924382657995e-06, "loss": 1.5497, "step": 5185 }, { "epoch": 1.4601209734139822, "grad_norm": 3.34375, "learning_rate": 6.4065714847404035e-06, "loss": 1.4751, "step": 5190 }, { "epoch": 1.4615276410184275, "grad_norm": 2.765625, "learning_rate": 6.402646903561715e-06, "loss": 1.7455, "step": 5195 }, { "epoch": 1.4629343086228723, "grad_norm": 4.5625, "learning_rate": 6.398718700645628e-06, "loss": 1.4352, "step": 5200 }, { "epoch": 1.4643409762273174, "grad_norm": 3.65625, "learning_rate": 6.394786881913496e-06, "loss": 1.6347, "step": 5205 }, { "epoch": 1.4657476438317625, "grad_norm": 8.8125, "learning_rate": 6.39085145329212e-06, "loss": 1.4356, "step": 5210 }, { "epoch": 1.4671543114362076, "grad_norm": 3.78125, "learning_rate": 6.386912420713746e-06, "loss": 1.6925, "step": 5215 }, { "epoch": 1.4685609790406526, "grad_norm": 4.96875, "learning_rate": 6.382969790116052e-06, "loss": 1.6203, "step": 5220 }, { "epoch": 1.4699676466450977, "grad_norm": 4.0, "learning_rate": 6.379023567442136e-06, "loss": 1.5191, "step": 5225 }, { "epoch": 1.4713743142495428, "grad_norm": 4.5625, "learning_rate": 6.375073758640516e-06, "loss": 1.1834, "step": 5230 }, { "epoch": 1.4727809818539879, "grad_norm": 3.53125, "learning_rate": 6.371120369665112e-06, "loss": 1.4152, "step": 5235 }, { "epoch": 1.474187649458433, "grad_norm": 3.21875, "learning_rate": 6.3671634064752425e-06, "loss": 1.3893, "step": 5240 }, { "epoch": 1.475594317062878, "grad_norm": 3.671875, "learning_rate": 6.3632028750356125e-06, "loss": 1.7045, "step": 5245 }, { "epoch": 1.477000984667323, "grad_norm": 3.640625, "learning_rate": 6.359238781316307e-06, "loss": 1.653, "step": 5250 }, { "epoch": 1.4784076522717682, "grad_norm": 3.25, "learning_rate": 6.35527113129278e-06, "loss": 1.6699, "step": 5255 }, { "epoch": 1.4798143198762133, "grad_norm": 2.65625, "learning_rate": 6.351299930945846e-06, "loss": 1.5831, "step": 5260 }, { "epoch": 1.4812209874806583, "grad_norm": 3.015625, "learning_rate": 6.347325186261672e-06, "loss": 1.5442, "step": 5265 }, { "epoch": 1.4826276550851034, "grad_norm": 4.0, "learning_rate": 6.343346903231769e-06, "loss": 1.5064, "step": 5270 }, { "epoch": 1.4840343226895485, "grad_norm": 3.0, "learning_rate": 6.339365087852977e-06, "loss": 1.7953, "step": 5275 }, { "epoch": 1.4854409902939936, "grad_norm": 3.203125, "learning_rate": 6.335379746127465e-06, "loss": 1.7672, "step": 5280 }, { "epoch": 1.4868476578984386, "grad_norm": 2.3125, "learning_rate": 6.3313908840627165e-06, "loss": 1.4996, "step": 5285 }, { "epoch": 1.4882543255028837, "grad_norm": 2.59375, "learning_rate": 6.327398507671523e-06, "loss": 1.7181, "step": 5290 }, { "epoch": 1.4896609931073288, "grad_norm": 3.171875, "learning_rate": 6.3234026229719685e-06, "loss": 1.7623, "step": 5295 }, { "epoch": 1.4910676607117739, "grad_norm": 3.46875, "learning_rate": 6.319403235987431e-06, "loss": 1.9328, "step": 5300 }, { "epoch": 1.492474328316219, "grad_norm": 3.625, "learning_rate": 6.315400352746566e-06, "loss": 1.6441, "step": 5305 }, { "epoch": 1.493880995920664, "grad_norm": 3.921875, "learning_rate": 6.311393979283296e-06, "loss": 1.5421, "step": 5310 }, { "epoch": 1.495287663525109, "grad_norm": 4.46875, "learning_rate": 6.307384121636811e-06, "loss": 1.6609, "step": 5315 }, { "epoch": 1.4966943311295542, "grad_norm": 4.15625, "learning_rate": 6.303370785851545e-06, "loss": 1.7574, "step": 5320 }, { "epoch": 1.498100998733999, "grad_norm": 2.9375, "learning_rate": 6.299353977977184e-06, "loss": 1.7455, "step": 5325 }, { "epoch": 1.4995076663384443, "grad_norm": 3.015625, "learning_rate": 6.295333704068641e-06, "loss": 1.6189, "step": 5330 }, { "epoch": 1.5009143339428892, "grad_norm": 4.25, "learning_rate": 6.2913099701860565e-06, "loss": 1.5672, "step": 5335 }, { "epoch": 1.5023210015473345, "grad_norm": 3.6875, "learning_rate": 6.287282782394786e-06, "loss": 1.6904, "step": 5340 }, { "epoch": 1.5037276691517794, "grad_norm": 2.890625, "learning_rate": 6.283252146765391e-06, "loss": 1.5382, "step": 5345 }, { "epoch": 1.5051343367562247, "grad_norm": 2.53125, "learning_rate": 6.279218069373631e-06, "loss": 1.7194, "step": 5350 }, { "epoch": 1.5065410043606695, "grad_norm": 3.140625, "learning_rate": 6.275180556300452e-06, "loss": 1.5181, "step": 5355 }, { "epoch": 1.5079476719651146, "grad_norm": 3.765625, "learning_rate": 6.27113961363198e-06, "loss": 1.5198, "step": 5360 }, { "epoch": 1.5093543395695597, "grad_norm": 2.421875, "learning_rate": 6.267095247459514e-06, "loss": 1.7598, "step": 5365 }, { "epoch": 1.5107610071740047, "grad_norm": 3.828125, "learning_rate": 6.263047463879506e-06, "loss": 1.7883, "step": 5370 }, { "epoch": 1.5121676747784498, "grad_norm": 4.4375, "learning_rate": 6.258996268993568e-06, "loss": 1.4889, "step": 5375 }, { "epoch": 1.513574342382895, "grad_norm": 3.28125, "learning_rate": 6.254941668908447e-06, "loss": 1.5787, "step": 5380 }, { "epoch": 1.51498100998734, "grad_norm": 4.9375, "learning_rate": 6.250883669736028e-06, "loss": 1.4366, "step": 5385 }, { "epoch": 1.516387677591785, "grad_norm": 3.578125, "learning_rate": 6.246822277593317e-06, "loss": 1.5613, "step": 5390 }, { "epoch": 1.5177943451962301, "grad_norm": 3.40625, "learning_rate": 6.242757498602435e-06, "loss": 1.7432, "step": 5395 }, { "epoch": 1.5192010128006752, "grad_norm": 3.546875, "learning_rate": 6.238689338890608e-06, "loss": 1.4916, "step": 5400 }, { "epoch": 1.5206076804051203, "grad_norm": 3.828125, "learning_rate": 6.23461780459016e-06, "loss": 1.4807, "step": 5405 }, { "epoch": 1.5220143480095654, "grad_norm": 4.09375, "learning_rate": 6.2305429018385e-06, "loss": 1.7498, "step": 5410 }, { "epoch": 1.5234210156140104, "grad_norm": 3.0, "learning_rate": 6.226464636778116e-06, "loss": 1.6306, "step": 5415 }, { "epoch": 1.5248276832184555, "grad_norm": 2.890625, "learning_rate": 6.222383015556562e-06, "loss": 1.5031, "step": 5420 }, { "epoch": 1.5262343508229006, "grad_norm": 2.578125, "learning_rate": 6.2182980443264545e-06, "loss": 1.607, "step": 5425 }, { "epoch": 1.5276410184273455, "grad_norm": 3.59375, "learning_rate": 6.2142097292454555e-06, "loss": 1.5335, "step": 5430 }, { "epoch": 1.5290476860317908, "grad_norm": 4.09375, "learning_rate": 6.210118076476271e-06, "loss": 1.6874, "step": 5435 }, { "epoch": 1.5304543536362356, "grad_norm": 3.125, "learning_rate": 6.206023092186637e-06, "loss": 1.707, "step": 5440 }, { "epoch": 1.531861021240681, "grad_norm": 2.859375, "learning_rate": 6.20192478254931e-06, "loss": 1.5466, "step": 5445 }, { "epoch": 1.5332676888451258, "grad_norm": 2.9375, "learning_rate": 6.197823153742064e-06, "loss": 1.4202, "step": 5450 }, { "epoch": 1.534674356449571, "grad_norm": 2.953125, "learning_rate": 6.19371821194767e-06, "loss": 1.7848, "step": 5455 }, { "epoch": 1.536081024054016, "grad_norm": 3.625, "learning_rate": 6.189609963353897e-06, "loss": 1.6941, "step": 5460 }, { "epoch": 1.5374876916584612, "grad_norm": 2.5, "learning_rate": 6.185498414153494e-06, "loss": 1.6015, "step": 5465 }, { "epoch": 1.538894359262906, "grad_norm": 3.75, "learning_rate": 6.181383570544195e-06, "loss": 1.6375, "step": 5470 }, { "epoch": 1.5403010268673514, "grad_norm": 3.390625, "learning_rate": 6.17726543872869e-06, "loss": 1.3859, "step": 5475 }, { "epoch": 1.5417076944717962, "grad_norm": 3.734375, "learning_rate": 6.1731440249146286e-06, "loss": 1.6703, "step": 5480 }, { "epoch": 1.5431143620762415, "grad_norm": 2.796875, "learning_rate": 6.169019335314612e-06, "loss": 1.6369, "step": 5485 }, { "epoch": 1.5445210296806864, "grad_norm": 2.921875, "learning_rate": 6.164891376146173e-06, "loss": 1.488, "step": 5490 }, { "epoch": 1.5459276972851317, "grad_norm": 2.625, "learning_rate": 6.160760153631775e-06, "loss": 1.4559, "step": 5495 }, { "epoch": 1.5473343648895765, "grad_norm": 3.25, "learning_rate": 6.156625673998804e-06, "loss": 1.5765, "step": 5500 }, { "epoch": 1.5487410324940216, "grad_norm": 5.75, "learning_rate": 6.152487943479551e-06, "loss": 1.6279, "step": 5505 }, { "epoch": 1.5501477000984667, "grad_norm": 3.484375, "learning_rate": 6.14834696831121e-06, "loss": 1.4578, "step": 5510 }, { "epoch": 1.5515543677029118, "grad_norm": 4.59375, "learning_rate": 6.144202754735866e-06, "loss": 1.4965, "step": 5515 }, { "epoch": 1.5529610353073569, "grad_norm": 4.1875, "learning_rate": 6.140055309000482e-06, "loss": 1.4934, "step": 5520 }, { "epoch": 1.554367702911802, "grad_norm": 3.265625, "learning_rate": 6.135904637356901e-06, "loss": 1.2053, "step": 5525 }, { "epoch": 1.555774370516247, "grad_norm": 3.203125, "learning_rate": 6.13175074606182e-06, "loss": 1.9499, "step": 5530 }, { "epoch": 1.557181038120692, "grad_norm": 3.03125, "learning_rate": 6.127593641376793e-06, "loss": 1.681, "step": 5535 }, { "epoch": 1.5585877057251372, "grad_norm": 3.15625, "learning_rate": 6.12343332956822e-06, "loss": 1.5296, "step": 5540 }, { "epoch": 1.5599943733295822, "grad_norm": 3.5, "learning_rate": 6.119269816907332e-06, "loss": 1.4921, "step": 5545 }, { "epoch": 1.5614010409340273, "grad_norm": 4.65625, "learning_rate": 6.115103109670187e-06, "loss": 1.4377, "step": 5550 }, { "epoch": 1.5628077085384724, "grad_norm": 3.703125, "learning_rate": 6.110933214137657e-06, "loss": 1.3758, "step": 5555 }, { "epoch": 1.5642143761429175, "grad_norm": 3.640625, "learning_rate": 6.10676013659542e-06, "loss": 1.3795, "step": 5560 }, { "epoch": 1.5656210437473626, "grad_norm": 3.125, "learning_rate": 6.1025838833339545e-06, "loss": 1.4445, "step": 5565 }, { "epoch": 1.5670277113518076, "grad_norm": 3.328125, "learning_rate": 6.0984044606485185e-06, "loss": 1.6025, "step": 5570 }, { "epoch": 1.5684343789562525, "grad_norm": 4.21875, "learning_rate": 6.094221874839157e-06, "loss": 1.6672, "step": 5575 }, { "epoch": 1.5698410465606978, "grad_norm": 3.015625, "learning_rate": 6.090036132210673e-06, "loss": 1.7502, "step": 5580 }, { "epoch": 1.5712477141651426, "grad_norm": 3.25, "learning_rate": 6.085847239072634e-06, "loss": 1.5658, "step": 5585 }, { "epoch": 1.572654381769588, "grad_norm": 3.28125, "learning_rate": 6.081655201739359e-06, "loss": 1.5319, "step": 5590 }, { "epoch": 1.5740610493740328, "grad_norm": 3.953125, "learning_rate": 6.077460026529901e-06, "loss": 1.4186, "step": 5595 }, { "epoch": 1.575467716978478, "grad_norm": 2.59375, "learning_rate": 6.073261719768044e-06, "loss": 1.4423, "step": 5600 }, { "epoch": 1.576874384582923, "grad_norm": 2.4375, "learning_rate": 6.069060287782296e-06, "loss": 1.6494, "step": 5605 }, { "epoch": 1.5782810521873682, "grad_norm": 4.90625, "learning_rate": 6.064855736905872e-06, "loss": 1.5472, "step": 5610 }, { "epoch": 1.579687719791813, "grad_norm": 3.203125, "learning_rate": 6.060648073476691e-06, "loss": 1.7602, "step": 5615 }, { "epoch": 1.5810943873962584, "grad_norm": 3.609375, "learning_rate": 6.056437303837362e-06, "loss": 1.5036, "step": 5620 }, { "epoch": 1.5825010550007033, "grad_norm": 2.53125, "learning_rate": 6.052223434335179e-06, "loss": 1.7547, "step": 5625 }, { "epoch": 1.5839077226051486, "grad_norm": 3.15625, "learning_rate": 6.0480064713221036e-06, "loss": 1.3458, "step": 5630 }, { "epoch": 1.5853143902095934, "grad_norm": 3.046875, "learning_rate": 6.043786421154767e-06, "loss": 1.6531, "step": 5635 }, { "epoch": 1.5867210578140385, "grad_norm": 3.015625, "learning_rate": 6.0395632901944485e-06, "loss": 1.5451, "step": 5640 }, { "epoch": 1.5881277254184836, "grad_norm": 2.96875, "learning_rate": 6.035337084807077e-06, "loss": 1.5324, "step": 5645 }, { "epoch": 1.5895343930229286, "grad_norm": 4.84375, "learning_rate": 6.031107811363208e-06, "loss": 1.6149, "step": 5650 }, { "epoch": 1.5909410606273737, "grad_norm": 2.984375, "learning_rate": 6.026875476238031e-06, "loss": 1.7126, "step": 5655 }, { "epoch": 1.5923477282318188, "grad_norm": 3.09375, "learning_rate": 6.022640085811341e-06, "loss": 1.4252, "step": 5660 }, { "epoch": 1.5937543958362639, "grad_norm": 2.796875, "learning_rate": 6.018401646467546e-06, "loss": 1.5583, "step": 5665 }, { "epoch": 1.595161063440709, "grad_norm": 3.125, "learning_rate": 6.014160164595648e-06, "loss": 1.6051, "step": 5670 }, { "epoch": 1.596567731045154, "grad_norm": 4.40625, "learning_rate": 6.009915646589231e-06, "loss": 1.5316, "step": 5675 }, { "epoch": 1.5979743986495991, "grad_norm": 3.5625, "learning_rate": 6.005668098846465e-06, "loss": 1.711, "step": 5680 }, { "epoch": 1.5993810662540442, "grad_norm": 2.28125, "learning_rate": 6.001417527770076e-06, "loss": 1.5814, "step": 5685 }, { "epoch": 1.6007877338584893, "grad_norm": 3.015625, "learning_rate": 5.9971639397673565e-06, "loss": 1.768, "step": 5690 }, { "epoch": 1.6021944014629343, "grad_norm": 3.421875, "learning_rate": 5.992907341250142e-06, "loss": 1.6234, "step": 5695 }, { "epoch": 1.6036010690673794, "grad_norm": 2.75, "learning_rate": 5.988647738634803e-06, "loss": 1.5887, "step": 5700 }, { "epoch": 1.6050077366718245, "grad_norm": 3.28125, "learning_rate": 5.984385138342248e-06, "loss": 1.7158, "step": 5705 }, { "epoch": 1.6064144042762694, "grad_norm": 4.5625, "learning_rate": 5.980119546797895e-06, "loss": 1.4228, "step": 5710 }, { "epoch": 1.6078210718807147, "grad_norm": 2.5625, "learning_rate": 5.975850970431675e-06, "loss": 1.6542, "step": 5715 }, { "epoch": 1.6092277394851595, "grad_norm": 2.34375, "learning_rate": 5.971579415678018e-06, "loss": 1.616, "step": 5720 }, { "epoch": 1.6106344070896048, "grad_norm": 2.625, "learning_rate": 5.967304888975844e-06, "loss": 1.6365, "step": 5725 }, { "epoch": 1.6120410746940497, "grad_norm": 3.421875, "learning_rate": 5.9630273967685505e-06, "loss": 1.5371, "step": 5730 }, { "epoch": 1.613447742298495, "grad_norm": 2.875, "learning_rate": 5.958746945504009e-06, "loss": 1.8092, "step": 5735 }, { "epoch": 1.6148544099029398, "grad_norm": 2.734375, "learning_rate": 5.954463541634547e-06, "loss": 1.4343, "step": 5740 }, { "epoch": 1.6162610775073851, "grad_norm": 2.4375, "learning_rate": 5.950177191616946e-06, "loss": 1.5462, "step": 5745 }, { "epoch": 1.61766774511183, "grad_norm": 2.9375, "learning_rate": 5.94588790191243e-06, "loss": 1.3474, "step": 5750 }, { "epoch": 1.6190744127162753, "grad_norm": 3.296875, "learning_rate": 5.941595678986648e-06, "loss": 1.4929, "step": 5755 }, { "epoch": 1.6204810803207201, "grad_norm": 3.0, "learning_rate": 5.937300529309677e-06, "loss": 1.6187, "step": 5760 }, { "epoch": 1.6218877479251654, "grad_norm": 3.28125, "learning_rate": 5.933002459356004e-06, "loss": 1.5226, "step": 5765 }, { "epoch": 1.6232944155296103, "grad_norm": 2.90625, "learning_rate": 5.928701475604515e-06, "loss": 1.7075, "step": 5770 }, { "epoch": 1.6247010831340554, "grad_norm": 3.609375, "learning_rate": 5.924397584538491e-06, "loss": 1.6602, "step": 5775 }, { "epoch": 1.6261077507385004, "grad_norm": 9.3125, "learning_rate": 5.920090792645595e-06, "loss": 1.6669, "step": 5780 }, { "epoch": 1.6275144183429455, "grad_norm": 3.484375, "learning_rate": 5.915781106417863e-06, "loss": 1.4352, "step": 5785 }, { "epoch": 1.6289210859473906, "grad_norm": 2.296875, "learning_rate": 5.911468532351694e-06, "loss": 1.5932, "step": 5790 }, { "epoch": 1.6303277535518357, "grad_norm": 2.546875, "learning_rate": 5.907153076947839e-06, "loss": 1.422, "step": 5795 }, { "epoch": 1.6317344211562808, "grad_norm": 2.953125, "learning_rate": 5.9028347467113926e-06, "loss": 1.4173, "step": 5800 }, { "epoch": 1.6331410887607258, "grad_norm": 3.984375, "learning_rate": 5.898513548151782e-06, "loss": 1.6911, "step": 5805 }, { "epoch": 1.634547756365171, "grad_norm": 3.421875, "learning_rate": 5.894189487782763e-06, "loss": 1.7045, "step": 5810 }, { "epoch": 1.635954423969616, "grad_norm": 2.96875, "learning_rate": 5.889862572122399e-06, "loss": 1.4862, "step": 5815 }, { "epoch": 1.637361091574061, "grad_norm": 3.53125, "learning_rate": 5.88553280769306e-06, "loss": 1.3872, "step": 5820 }, { "epoch": 1.6387677591785061, "grad_norm": 2.59375, "learning_rate": 5.88120020102141e-06, "loss": 1.5883, "step": 5825 }, { "epoch": 1.6401744267829512, "grad_norm": 2.765625, "learning_rate": 5.876864758638401e-06, "loss": 1.4336, "step": 5830 }, { "epoch": 1.6415810943873963, "grad_norm": 3.46875, "learning_rate": 5.872526487079253e-06, "loss": 1.528, "step": 5835 }, { "epoch": 1.6429877619918414, "grad_norm": 4.9375, "learning_rate": 5.868185392883454e-06, "loss": 1.6604, "step": 5840 }, { "epoch": 1.6443944295962862, "grad_norm": 3.21875, "learning_rate": 5.8638414825947476e-06, "loss": 1.6125, "step": 5845 }, { "epoch": 1.6458010972007315, "grad_norm": 2.859375, "learning_rate": 5.859494762761122e-06, "loss": 1.3778, "step": 5850 }, { "epoch": 1.6472077648051764, "grad_norm": 3.015625, "learning_rate": 5.855145239934797e-06, "loss": 1.7263, "step": 5855 }, { "epoch": 1.6486144324096217, "grad_norm": 3.953125, "learning_rate": 5.850792920672225e-06, "loss": 1.3598, "step": 5860 }, { "epoch": 1.6500211000140665, "grad_norm": 3.3125, "learning_rate": 5.846437811534068e-06, "loss": 1.5234, "step": 5865 }, { "epoch": 1.6514277676185118, "grad_norm": 3.03125, "learning_rate": 5.842079919085192e-06, "loss": 1.6599, "step": 5870 }, { "epoch": 1.6528344352229567, "grad_norm": 5.46875, "learning_rate": 5.837719249894665e-06, "loss": 1.7368, "step": 5875 }, { "epoch": 1.654241102827402, "grad_norm": 3.734375, "learning_rate": 5.833355810535734e-06, "loss": 1.6333, "step": 5880 }, { "epoch": 1.6556477704318469, "grad_norm": 3.796875, "learning_rate": 5.8289896075858255e-06, "loss": 1.7083, "step": 5885 }, { "epoch": 1.6570544380362922, "grad_norm": 3.421875, "learning_rate": 5.824620647626533e-06, "loss": 1.7947, "step": 5890 }, { "epoch": 1.658461105640737, "grad_norm": 3.328125, "learning_rate": 5.820248937243602e-06, "loss": 1.5477, "step": 5895 }, { "epoch": 1.6598677732451823, "grad_norm": 2.953125, "learning_rate": 5.815874483026926e-06, "loss": 1.7948, "step": 5900 }, { "epoch": 1.6612744408496272, "grad_norm": 3.75, "learning_rate": 5.811497291570535e-06, "loss": 1.2987, "step": 5905 }, { "epoch": 1.6626811084540725, "grad_norm": 3.046875, "learning_rate": 5.807117369472585e-06, "loss": 1.6014, "step": 5910 }, { "epoch": 1.6640877760585173, "grad_norm": 4.28125, "learning_rate": 5.8027347233353465e-06, "loss": 1.5098, "step": 5915 }, { "epoch": 1.6654944436629624, "grad_norm": 4.34375, "learning_rate": 5.798349359765198e-06, "loss": 1.5863, "step": 5920 }, { "epoch": 1.6669011112674075, "grad_norm": 3.875, "learning_rate": 5.793961285372614e-06, "loss": 1.5017, "step": 5925 }, { "epoch": 1.6683077788718526, "grad_norm": 5.0, "learning_rate": 5.789570506772154e-06, "loss": 1.4812, "step": 5930 }, { "epoch": 1.6697144464762976, "grad_norm": 2.1875, "learning_rate": 5.785177030582455e-06, "loss": 1.613, "step": 5935 }, { "epoch": 1.6711211140807427, "grad_norm": 2.46875, "learning_rate": 5.7807808634262205e-06, "loss": 1.4484, "step": 5940 }, { "epoch": 1.6725277816851878, "grad_norm": 2.78125, "learning_rate": 5.776382011930211e-06, "loss": 1.5913, "step": 5945 }, { "epoch": 1.6739344492896329, "grad_norm": 3.578125, "learning_rate": 5.77198048272523e-06, "loss": 1.5753, "step": 5950 }, { "epoch": 1.675341116894078, "grad_norm": 3.375, "learning_rate": 5.767576282446121e-06, "loss": 1.6776, "step": 5955 }, { "epoch": 1.676747784498523, "grad_norm": 4.21875, "learning_rate": 5.763169417731751e-06, "loss": 1.5151, "step": 5960 }, { "epoch": 1.678154452102968, "grad_norm": 4.84375, "learning_rate": 5.758759895225008e-06, "loss": 1.2589, "step": 5965 }, { "epoch": 1.6795611197074132, "grad_norm": 3.375, "learning_rate": 5.75434772157278e-06, "loss": 1.5056, "step": 5970 }, { "epoch": 1.6809677873118583, "grad_norm": 2.828125, "learning_rate": 5.749932903425957e-06, "loss": 1.7067, "step": 5975 }, { "epoch": 1.6823744549163033, "grad_norm": 3.4375, "learning_rate": 5.745515447439411e-06, "loss": 1.4572, "step": 5980 }, { "epoch": 1.6837811225207484, "grad_norm": 2.90625, "learning_rate": 5.741095360271992e-06, "loss": 1.6003, "step": 5985 }, { "epoch": 1.6851877901251933, "grad_norm": 3.859375, "learning_rate": 5.736672648586518e-06, "loss": 1.5607, "step": 5990 }, { "epoch": 1.6865944577296386, "grad_norm": 3.09375, "learning_rate": 5.732247319049761e-06, "loss": 1.6931, "step": 5995 }, { "epoch": 1.6880011253340834, "grad_norm": 3.375, "learning_rate": 5.727819378332437e-06, "loss": 1.4869, "step": 6000 }, { "epoch": 1.6894077929385287, "grad_norm": 3.640625, "learning_rate": 5.723388833109205e-06, "loss": 1.6263, "step": 6005 }, { "epoch": 1.6908144605429736, "grad_norm": 4.59375, "learning_rate": 5.718955690058644e-06, "loss": 1.4953, "step": 6010 }, { "epoch": 1.6922211281474189, "grad_norm": 4.46875, "learning_rate": 5.714519955863249e-06, "loss": 1.378, "step": 6015 }, { "epoch": 1.6936277957518637, "grad_norm": 3.546875, "learning_rate": 5.710081637209425e-06, "loss": 1.6132, "step": 6020 }, { "epoch": 1.695034463356309, "grad_norm": 2.578125, "learning_rate": 5.705640740787467e-06, "loss": 1.6166, "step": 6025 }, { "epoch": 1.6964411309607539, "grad_norm": 3.296875, "learning_rate": 5.701197273291563e-06, "loss": 1.4201, "step": 6030 }, { "epoch": 1.6978477985651992, "grad_norm": 4.03125, "learning_rate": 5.696751241419771e-06, "loss": 1.6117, "step": 6035 }, { "epoch": 1.699254466169644, "grad_norm": 3.6875, "learning_rate": 5.692302651874016e-06, "loss": 1.6267, "step": 6040 }, { "epoch": 1.7006611337740893, "grad_norm": 3.71875, "learning_rate": 5.68785151136008e-06, "loss": 1.6265, "step": 6045 }, { "epoch": 1.7020678013785342, "grad_norm": 3.09375, "learning_rate": 5.683397826587586e-06, "loss": 1.502, "step": 6050 }, { "epoch": 1.7034744689829793, "grad_norm": 3.4375, "learning_rate": 5.678941604269999e-06, "loss": 1.66, "step": 6055 }, { "epoch": 1.7048811365874244, "grad_norm": 3.46875, "learning_rate": 5.674482851124603e-06, "loss": 1.6555, "step": 6060 }, { "epoch": 1.7062878041918694, "grad_norm": 2.84375, "learning_rate": 5.670021573872498e-06, "loss": 1.6171, "step": 6065 }, { "epoch": 1.7076944717963145, "grad_norm": 3.03125, "learning_rate": 5.665557779238593e-06, "loss": 1.723, "step": 6070 }, { "epoch": 1.7091011394007596, "grad_norm": 3.28125, "learning_rate": 5.661091473951587e-06, "loss": 1.5578, "step": 6075 }, { "epoch": 1.7105078070052047, "grad_norm": 2.765625, "learning_rate": 5.656622664743965e-06, "loss": 1.6143, "step": 6080 }, { "epoch": 1.7119144746096497, "grad_norm": 4.71875, "learning_rate": 5.652151358351988e-06, "loss": 1.5817, "step": 6085 }, { "epoch": 1.7133211422140948, "grad_norm": 3.375, "learning_rate": 5.64767756151568e-06, "loss": 1.5214, "step": 6090 }, { "epoch": 1.71472780981854, "grad_norm": 3.171875, "learning_rate": 5.643201280978816e-06, "loss": 1.6993, "step": 6095 }, { "epoch": 1.716134477422985, "grad_norm": 3.375, "learning_rate": 5.638722523488921e-06, "loss": 1.6905, "step": 6100 }, { "epoch": 1.71754114502743, "grad_norm": 3.125, "learning_rate": 5.63424129579725e-06, "loss": 1.7052, "step": 6105 }, { "epoch": 1.7189478126318751, "grad_norm": 2.875, "learning_rate": 5.629757604658781e-06, "loss": 1.6538, "step": 6110 }, { "epoch": 1.7203544802363202, "grad_norm": 3.15625, "learning_rate": 5.625271456832209e-06, "loss": 1.4619, "step": 6115 }, { "epoch": 1.7217611478407653, "grad_norm": 3.25, "learning_rate": 5.620782859079929e-06, "loss": 1.7455, "step": 6120 }, { "epoch": 1.7231678154452101, "grad_norm": 2.453125, "learning_rate": 5.6162918181680264e-06, "loss": 1.5303, "step": 6125 }, { "epoch": 1.7245744830496554, "grad_norm": 3.0625, "learning_rate": 5.611798340866278e-06, "loss": 1.6425, "step": 6130 }, { "epoch": 1.7259811506541003, "grad_norm": 4.3125, "learning_rate": 5.607302433948126e-06, "loss": 1.7051, "step": 6135 }, { "epoch": 1.7273878182585456, "grad_norm": 3.53125, "learning_rate": 5.602804104190674e-06, "loss": 1.5346, "step": 6140 }, { "epoch": 1.7287944858629904, "grad_norm": 3.3125, "learning_rate": 5.598303358374686e-06, "loss": 1.5561, "step": 6145 }, { "epoch": 1.7302011534674357, "grad_norm": 3.75, "learning_rate": 5.5938002032845596e-06, "loss": 1.6081, "step": 6150 }, { "epoch": 1.7316078210718806, "grad_norm": 2.84375, "learning_rate": 5.589294645708326e-06, "loss": 1.7825, "step": 6155 }, { "epoch": 1.733014488676326, "grad_norm": 3.46875, "learning_rate": 5.584786692437644e-06, "loss": 1.4638, "step": 6160 }, { "epoch": 1.7344211562807708, "grad_norm": 3.3125, "learning_rate": 5.580276350267774e-06, "loss": 1.8662, "step": 6165 }, { "epoch": 1.735827823885216, "grad_norm": 2.828125, "learning_rate": 5.575763625997584e-06, "loss": 1.8925, "step": 6170 }, { "epoch": 1.737234491489661, "grad_norm": 3.203125, "learning_rate": 5.5712485264295314e-06, "loss": 1.4838, "step": 6175 }, { "epoch": 1.7386411590941062, "grad_norm": 3.65625, "learning_rate": 5.566731058369655e-06, "loss": 1.6893, "step": 6180 }, { "epoch": 1.740047826698551, "grad_norm": 2.90625, "learning_rate": 5.562211228627559e-06, "loss": 1.426, "step": 6185 }, { "epoch": 1.7414544943029961, "grad_norm": 3.953125, "learning_rate": 5.557689044016414e-06, "loss": 1.4707, "step": 6190 }, { "epoch": 1.7428611619074412, "grad_norm": 3.15625, "learning_rate": 5.553164511352936e-06, "loss": 1.5854, "step": 6195 }, { "epoch": 1.7442678295118863, "grad_norm": 3.15625, "learning_rate": 5.548637637457383e-06, "loss": 1.7302, "step": 6200 }, { "epoch": 1.7456744971163314, "grad_norm": 3.234375, "learning_rate": 5.544108429153541e-06, "loss": 1.5283, "step": 6205 }, { "epoch": 1.7470811647207765, "grad_norm": 2.609375, "learning_rate": 5.539576893268714e-06, "loss": 1.8094, "step": 6210 }, { "epoch": 1.7484878323252215, "grad_norm": 2.875, "learning_rate": 5.535043036633716e-06, "loss": 1.4656, "step": 6215 }, { "epoch": 1.7498944999296666, "grad_norm": 3.75, "learning_rate": 5.530506866082858e-06, "loss": 1.5639, "step": 6220 }, { "epoch": 1.7513011675341117, "grad_norm": 3.53125, "learning_rate": 5.525968388453943e-06, "loss": 1.6606, "step": 6225 }, { "epoch": 1.7527078351385568, "grad_norm": 2.625, "learning_rate": 5.521427610588246e-06, "loss": 1.5511, "step": 6230 }, { "epoch": 1.7541145027430018, "grad_norm": 3.546875, "learning_rate": 5.51688453933051e-06, "loss": 1.3522, "step": 6235 }, { "epoch": 1.755521170347447, "grad_norm": 2.78125, "learning_rate": 5.51233918152894e-06, "loss": 1.5404, "step": 6240 }, { "epoch": 1.756927837951892, "grad_norm": 3.125, "learning_rate": 5.507791544035183e-06, "loss": 1.4346, "step": 6245 }, { "epoch": 1.758334505556337, "grad_norm": 3.828125, "learning_rate": 5.5032416337043255e-06, "loss": 1.5443, "step": 6250 }, { "epoch": 1.7597411731607822, "grad_norm": 3.734375, "learning_rate": 5.498689457394877e-06, "loss": 1.5366, "step": 6255 }, { "epoch": 1.761147840765227, "grad_norm": 3.453125, "learning_rate": 5.494135021968766e-06, "loss": 1.5194, "step": 6260 }, { "epoch": 1.7625545083696723, "grad_norm": 4.15625, "learning_rate": 5.489578334291323e-06, "loss": 1.6533, "step": 6265 }, { "epoch": 1.7639611759741172, "grad_norm": 3.125, "learning_rate": 5.485019401231275e-06, "loss": 1.5755, "step": 6270 }, { "epoch": 1.7653678435785625, "grad_norm": 3.71875, "learning_rate": 5.480458229660736e-06, "loss": 1.5212, "step": 6275 }, { "epoch": 1.7667745111830073, "grad_norm": 3.453125, "learning_rate": 5.4758948264551905e-06, "loss": 1.3376, "step": 6280 }, { "epoch": 1.7681811787874526, "grad_norm": 3.03125, "learning_rate": 5.471329198493489e-06, "loss": 1.4777, "step": 6285 }, { "epoch": 1.7695878463918975, "grad_norm": 2.859375, "learning_rate": 5.466761352657836e-06, "loss": 1.5956, "step": 6290 }, { "epoch": 1.7709945139963428, "grad_norm": 2.71875, "learning_rate": 5.462191295833777e-06, "loss": 1.6315, "step": 6295 }, { "epoch": 1.7724011816007876, "grad_norm": 3.53125, "learning_rate": 5.457619034910193e-06, "loss": 1.4154, "step": 6300 }, { "epoch": 1.773807849205233, "grad_norm": 2.921875, "learning_rate": 5.453044576779286e-06, "loss": 1.5447, "step": 6305 }, { "epoch": 1.7752145168096778, "grad_norm": 3.5625, "learning_rate": 5.448467928336571e-06, "loss": 1.4752, "step": 6310 }, { "epoch": 1.776621184414123, "grad_norm": 3.453125, "learning_rate": 5.4438890964808605e-06, "loss": 1.5837, "step": 6315 }, { "epoch": 1.778027852018568, "grad_norm": 3.09375, "learning_rate": 5.439308088114267e-06, "loss": 1.3614, "step": 6320 }, { "epoch": 1.7794345196230132, "grad_norm": 3.546875, "learning_rate": 5.434724910142175e-06, "loss": 1.4906, "step": 6325 }, { "epoch": 1.780841187227458, "grad_norm": 5.59375, "learning_rate": 5.430139569473244e-06, "loss": 1.4475, "step": 6330 }, { "epoch": 1.7822478548319032, "grad_norm": 2.75, "learning_rate": 5.425552073019392e-06, "loss": 1.6079, "step": 6335 }, { "epoch": 1.7836545224363483, "grad_norm": 3.953125, "learning_rate": 5.420962427695789e-06, "loss": 1.6304, "step": 6340 }, { "epoch": 1.7850611900407933, "grad_norm": 3.109375, "learning_rate": 5.416370640420842e-06, "loss": 1.8129, "step": 6345 }, { "epoch": 1.7864678576452384, "grad_norm": 3.796875, "learning_rate": 5.411776718116185e-06, "loss": 1.6699, "step": 6350 }, { "epoch": 1.7878745252496835, "grad_norm": 3.21875, "learning_rate": 5.4071806677066744e-06, "loss": 1.5165, "step": 6355 }, { "epoch": 1.7892811928541286, "grad_norm": 2.640625, "learning_rate": 5.402582496120372e-06, "loss": 1.6656, "step": 6360 }, { "epoch": 1.7906878604585736, "grad_norm": 2.703125, "learning_rate": 5.397982210288536e-06, "loss": 1.6198, "step": 6365 }, { "epoch": 1.7920945280630187, "grad_norm": 2.234375, "learning_rate": 5.393379817145617e-06, "loss": 1.8459, "step": 6370 }, { "epoch": 1.7935011956674638, "grad_norm": 3.4375, "learning_rate": 5.388775323629236e-06, "loss": 1.4617, "step": 6375 }, { "epoch": 1.7949078632719089, "grad_norm": 2.78125, "learning_rate": 5.384168736680182e-06, "loss": 1.6081, "step": 6380 }, { "epoch": 1.796314530876354, "grad_norm": 3.421875, "learning_rate": 5.379560063242403e-06, "loss": 1.5674, "step": 6385 }, { "epoch": 1.797721198480799, "grad_norm": 2.890625, "learning_rate": 5.374949310262985e-06, "loss": 1.3943, "step": 6390 }, { "epoch": 1.799127866085244, "grad_norm": 4.25, "learning_rate": 5.370336484692156e-06, "loss": 1.5251, "step": 6395 }, { "epoch": 1.8005345336896892, "grad_norm": 3.328125, "learning_rate": 5.3657215934832645e-06, "loss": 1.5846, "step": 6400 }, { "epoch": 1.801941201294134, "grad_norm": 3.953125, "learning_rate": 5.361104643592773e-06, "loss": 1.7833, "step": 6405 }, { "epoch": 1.8033478688985793, "grad_norm": 3.453125, "learning_rate": 5.356485641980249e-06, "loss": 1.2723, "step": 6410 }, { "epoch": 1.8047545365030242, "grad_norm": 3.546875, "learning_rate": 5.351864595608349e-06, "loss": 1.4335, "step": 6415 }, { "epoch": 1.8061612041074695, "grad_norm": 3.71875, "learning_rate": 5.347241511442816e-06, "loss": 1.4807, "step": 6420 }, { "epoch": 1.8075678717119144, "grad_norm": 3.28125, "learning_rate": 5.342616396452463e-06, "loss": 1.4345, "step": 6425 }, { "epoch": 1.8089745393163597, "grad_norm": 5.28125, "learning_rate": 5.337989257609163e-06, "loss": 1.2621, "step": 6430 }, { "epoch": 1.8103812069208045, "grad_norm": 3.0, "learning_rate": 5.333360101887843e-06, "loss": 1.6888, "step": 6435 }, { "epoch": 1.8117878745252498, "grad_norm": 2.65625, "learning_rate": 5.328728936266466e-06, "loss": 1.5794, "step": 6440 }, { "epoch": 1.8131945421296947, "grad_norm": 2.984375, "learning_rate": 5.324095767726027e-06, "loss": 1.5931, "step": 6445 }, { "epoch": 1.81460120973414, "grad_norm": 3.078125, "learning_rate": 5.319460603250541e-06, "loss": 1.6198, "step": 6450 }, { "epoch": 1.8160078773385848, "grad_norm": 4.96875, "learning_rate": 5.314823449827031e-06, "loss": 1.5342, "step": 6455 }, { "epoch": 1.8174145449430301, "grad_norm": 4.0625, "learning_rate": 5.310184314445515e-06, "loss": 1.7853, "step": 6460 }, { "epoch": 1.818821212547475, "grad_norm": 2.4375, "learning_rate": 5.305543204099006e-06, "loss": 1.7108, "step": 6465 }, { "epoch": 1.82022788015192, "grad_norm": 4.4375, "learning_rate": 5.3009001257834875e-06, "loss": 1.6419, "step": 6470 }, { "epoch": 1.8216345477563651, "grad_norm": 3.21875, "learning_rate": 5.29625508649791e-06, "loss": 1.6769, "step": 6475 }, { "epoch": 1.8230412153608102, "grad_norm": 3.640625, "learning_rate": 5.291608093244183e-06, "loss": 1.659, "step": 6480 }, { "epoch": 1.8244478829652553, "grad_norm": 3.125, "learning_rate": 5.286959153027162e-06, "loss": 1.8752, "step": 6485 }, { "epoch": 1.8258545505697004, "grad_norm": 3.40625, "learning_rate": 5.28230827285463e-06, "loss": 1.6693, "step": 6490 }, { "epoch": 1.8272612181741454, "grad_norm": 4.28125, "learning_rate": 5.277655459737303e-06, "loss": 1.4653, "step": 6495 }, { "epoch": 1.8286678857785905, "grad_norm": 2.96875, "learning_rate": 5.2730007206888074e-06, "loss": 1.5908, "step": 6500 }, { "epoch": 1.8300745533830356, "grad_norm": 3.109375, "learning_rate": 5.268344062725671e-06, "loss": 1.6227, "step": 6505 }, { "epoch": 1.8314812209874807, "grad_norm": 3.796875, "learning_rate": 5.263685492867317e-06, "loss": 1.4596, "step": 6510 }, { "epoch": 1.8328878885919258, "grad_norm": 3.0, "learning_rate": 5.259025018136049e-06, "loss": 1.5202, "step": 6515 }, { "epoch": 1.8342945561963708, "grad_norm": 3.46875, "learning_rate": 5.25436264555704e-06, "loss": 1.6152, "step": 6520 }, { "epoch": 1.835701223800816, "grad_norm": 2.390625, "learning_rate": 5.249698382158329e-06, "loss": 1.7461, "step": 6525 }, { "epoch": 1.837107891405261, "grad_norm": 3.28125, "learning_rate": 5.245032234970801e-06, "loss": 1.6038, "step": 6530 }, { "epoch": 1.838514559009706, "grad_norm": 3.625, "learning_rate": 5.240364211028183e-06, "loss": 1.7761, "step": 6535 }, { "epoch": 1.839921226614151, "grad_norm": 4.5, "learning_rate": 5.235694317367028e-06, "loss": 1.682, "step": 6540 }, { "epoch": 1.8413278942185962, "grad_norm": 2.046875, "learning_rate": 5.231022561026712e-06, "loss": 1.7722, "step": 6545 }, { "epoch": 1.842734561823041, "grad_norm": 3.484375, "learning_rate": 5.226348949049414e-06, "loss": 1.4845, "step": 6550 }, { "epoch": 1.8441412294274864, "grad_norm": 3.1875, "learning_rate": 5.2216734884801126e-06, "loss": 1.6831, "step": 6555 }, { "epoch": 1.8455478970319312, "grad_norm": 2.828125, "learning_rate": 5.216996186366573e-06, "loss": 1.6796, "step": 6560 }, { "epoch": 1.8469545646363765, "grad_norm": 2.328125, "learning_rate": 5.212317049759336e-06, "loss": 1.6936, "step": 6565 }, { "epoch": 1.8483612322408214, "grad_norm": 2.90625, "learning_rate": 5.207636085711707e-06, "loss": 1.8625, "step": 6570 }, { "epoch": 1.8497678998452667, "grad_norm": 3.15625, "learning_rate": 5.202953301279748e-06, "loss": 1.7222, "step": 6575 }, { "epoch": 1.8511745674497115, "grad_norm": 2.984375, "learning_rate": 5.198268703522263e-06, "loss": 1.437, "step": 6580 }, { "epoch": 1.8525812350541568, "grad_norm": 2.96875, "learning_rate": 5.1935822995007896e-06, "loss": 1.6653, "step": 6585 }, { "epoch": 1.8539879026586017, "grad_norm": 3.65625, "learning_rate": 5.188894096279591e-06, "loss": 1.5089, "step": 6590 }, { "epoch": 1.855394570263047, "grad_norm": 4.40625, "learning_rate": 5.184204100925639e-06, "loss": 1.4483, "step": 6595 }, { "epoch": 1.8568012378674918, "grad_norm": 6.53125, "learning_rate": 5.179512320508606e-06, "loss": 1.4933, "step": 6600 }, { "epoch": 1.858207905471937, "grad_norm": 2.765625, "learning_rate": 5.17481876210086e-06, "loss": 1.4914, "step": 6605 }, { "epoch": 1.859614573076382, "grad_norm": 3.46875, "learning_rate": 5.170123432777446e-06, "loss": 1.6377, "step": 6610 }, { "epoch": 1.861021240680827, "grad_norm": 2.390625, "learning_rate": 5.165426339616078e-06, "loss": 1.6628, "step": 6615 }, { "epoch": 1.8624279082852722, "grad_norm": 3.140625, "learning_rate": 5.160727489697131e-06, "loss": 1.6961, "step": 6620 }, { "epoch": 1.8638345758897172, "grad_norm": 2.890625, "learning_rate": 5.156026890103626e-06, "loss": 1.5194, "step": 6625 }, { "epoch": 1.8652412434941623, "grad_norm": 4.25, "learning_rate": 5.1513245479212215e-06, "loss": 1.5143, "step": 6630 }, { "epoch": 1.8666479110986074, "grad_norm": 2.796875, "learning_rate": 5.146620470238205e-06, "loss": 1.6946, "step": 6635 }, { "epoch": 1.8680545787030525, "grad_norm": 2.953125, "learning_rate": 5.1419146641454784e-06, "loss": 1.413, "step": 6640 }, { "epoch": 1.8694612463074975, "grad_norm": 3.359375, "learning_rate": 5.137207136736549e-06, "loss": 1.8252, "step": 6645 }, { "epoch": 1.8708679139119426, "grad_norm": 3.03125, "learning_rate": 5.132497895107518e-06, "loss": 1.5078, "step": 6650 }, { "epoch": 1.8722745815163877, "grad_norm": 2.828125, "learning_rate": 5.127786946357074e-06, "loss": 1.6362, "step": 6655 }, { "epoch": 1.8736812491208328, "grad_norm": 3.390625, "learning_rate": 5.123074297586475e-06, "loss": 1.6801, "step": 6660 }, { "epoch": 1.8750879167252779, "grad_norm": 3.953125, "learning_rate": 5.118359955899542e-06, "loss": 1.4851, "step": 6665 }, { "epoch": 1.876494584329723, "grad_norm": 3.375, "learning_rate": 5.113643928402651e-06, "loss": 1.5629, "step": 6670 }, { "epoch": 1.8779012519341678, "grad_norm": 2.15625, "learning_rate": 5.108926222204716e-06, "loss": 1.7819, "step": 6675 }, { "epoch": 1.879307919538613, "grad_norm": 3.8125, "learning_rate": 5.104206844417184e-06, "loss": 1.6994, "step": 6680 }, { "epoch": 1.880714587143058, "grad_norm": 2.859375, "learning_rate": 5.099485802154019e-06, "loss": 1.5289, "step": 6685 }, { "epoch": 1.8821212547475032, "grad_norm": 2.875, "learning_rate": 5.094763102531697e-06, "loss": 1.4957, "step": 6690 }, { "epoch": 1.883527922351948, "grad_norm": 3.296875, "learning_rate": 5.09003875266919e-06, "loss": 1.6463, "step": 6695 }, { "epoch": 1.8849345899563934, "grad_norm": 2.5625, "learning_rate": 5.085312759687958e-06, "loss": 1.5889, "step": 6700 }, { "epoch": 1.8863412575608383, "grad_norm": 2.90625, "learning_rate": 5.080585130711938e-06, "loss": 1.79, "step": 6705 }, { "epoch": 1.8877479251652836, "grad_norm": 3.75, "learning_rate": 5.0758558728675345e-06, "loss": 1.8305, "step": 6710 }, { "epoch": 1.8891545927697284, "grad_norm": 4.28125, "learning_rate": 5.0711249932836035e-06, "loss": 1.4388, "step": 6715 }, { "epoch": 1.8905612603741737, "grad_norm": 2.390625, "learning_rate": 5.066392499091451e-06, "loss": 1.7699, "step": 6720 }, { "epoch": 1.8919679279786186, "grad_norm": 2.765625, "learning_rate": 5.061658397424814e-06, "loss": 1.7132, "step": 6725 }, { "epoch": 1.8933745955830639, "grad_norm": 4.59375, "learning_rate": 5.056922695419849e-06, "loss": 1.3894, "step": 6730 }, { "epoch": 1.8947812631875087, "grad_norm": 2.921875, "learning_rate": 5.052185400215134e-06, "loss": 1.476, "step": 6735 }, { "epoch": 1.896187930791954, "grad_norm": 3.703125, "learning_rate": 5.047446518951638e-06, "loss": 1.61, "step": 6740 }, { "epoch": 1.8975945983963989, "grad_norm": 3.734375, "learning_rate": 5.042706058772728e-06, "loss": 1.6619, "step": 6745 }, { "epoch": 1.899001266000844, "grad_norm": 5.875, "learning_rate": 5.037964026824148e-06, "loss": 1.4887, "step": 6750 }, { "epoch": 1.900407933605289, "grad_norm": 3.453125, "learning_rate": 5.033220430254015e-06, "loss": 1.5818, "step": 6755 }, { "epoch": 1.9018146012097341, "grad_norm": 3.390625, "learning_rate": 5.0284752762128e-06, "loss": 1.4241, "step": 6760 }, { "epoch": 1.9032212688141792, "grad_norm": 3.34375, "learning_rate": 5.023728571853322e-06, "loss": 1.6242, "step": 6765 }, { "epoch": 1.9046279364186243, "grad_norm": 3.578125, "learning_rate": 5.018980324330741e-06, "loss": 1.7302, "step": 6770 }, { "epoch": 1.9060346040230693, "grad_norm": 2.8125, "learning_rate": 5.014230540802538e-06, "loss": 1.7645, "step": 6775 }, { "epoch": 1.9074412716275144, "grad_norm": 4.78125, "learning_rate": 5.009479228428513e-06, "loss": 1.4511, "step": 6780 }, { "epoch": 1.9088479392319595, "grad_norm": 3.609375, "learning_rate": 5.00472639437077e-06, "loss": 1.5802, "step": 6785 }, { "epoch": 1.9102546068364046, "grad_norm": 3.703125, "learning_rate": 4.999972045793705e-06, "loss": 1.5002, "step": 6790 }, { "epoch": 1.9116612744408497, "grad_norm": 3.328125, "learning_rate": 4.995216189863999e-06, "loss": 1.3874, "step": 6795 }, { "epoch": 1.9130679420452947, "grad_norm": 3.25, "learning_rate": 4.990458833750606e-06, "loss": 1.5605, "step": 6800 }, { "epoch": 1.9144746096497398, "grad_norm": 3.390625, "learning_rate": 4.985699984624736e-06, "loss": 1.6657, "step": 6805 }, { "epoch": 1.915881277254185, "grad_norm": 3.46875, "learning_rate": 4.980939649659856e-06, "loss": 1.574, "step": 6810 }, { "epoch": 1.91728794485863, "grad_norm": 3.21875, "learning_rate": 4.976177836031669e-06, "loss": 1.5493, "step": 6815 }, { "epoch": 1.9186946124630748, "grad_norm": 3.859375, "learning_rate": 4.97141455091811e-06, "loss": 1.7608, "step": 6820 }, { "epoch": 1.9201012800675201, "grad_norm": 2.34375, "learning_rate": 4.966649801499327e-06, "loss": 1.4364, "step": 6825 }, { "epoch": 1.921507947671965, "grad_norm": 4.09375, "learning_rate": 4.961883594957681e-06, "loss": 1.2724, "step": 6830 }, { "epoch": 1.9229146152764103, "grad_norm": 3.328125, "learning_rate": 4.957115938477726e-06, "loss": 1.4279, "step": 6835 }, { "epoch": 1.9243212828808551, "grad_norm": 5.6875, "learning_rate": 4.952346839246202e-06, "loss": 1.414, "step": 6840 }, { "epoch": 1.9257279504853004, "grad_norm": 2.9375, "learning_rate": 4.947576304452025e-06, "loss": 1.6354, "step": 6845 }, { "epoch": 1.9271346180897453, "grad_norm": 3.875, "learning_rate": 4.942804341286274e-06, "loss": 1.4963, "step": 6850 }, { "epoch": 1.9285412856941906, "grad_norm": 3.1875, "learning_rate": 4.938030956942181e-06, "loss": 1.6742, "step": 6855 }, { "epoch": 1.9299479532986354, "grad_norm": 3.84375, "learning_rate": 4.933256158615121e-06, "loss": 1.5579, "step": 6860 }, { "epoch": 1.9313546209030807, "grad_norm": 3.03125, "learning_rate": 4.9284799535026e-06, "loss": 1.3593, "step": 6865 }, { "epoch": 1.9327612885075256, "grad_norm": 4.03125, "learning_rate": 4.923702348804244e-06, "loss": 1.397, "step": 6870 }, { "epoch": 1.934167956111971, "grad_norm": 3.203125, "learning_rate": 4.918923351721791e-06, "loss": 1.652, "step": 6875 }, { "epoch": 1.9355746237164158, "grad_norm": 3.078125, "learning_rate": 4.9141429694590745e-06, "loss": 1.6749, "step": 6880 }, { "epoch": 1.9369812913208608, "grad_norm": 3.359375, "learning_rate": 4.909361209222018e-06, "loss": 1.7391, "step": 6885 }, { "epoch": 1.938387958925306, "grad_norm": 5.0625, "learning_rate": 4.9045780782186225e-06, "loss": 1.8264, "step": 6890 }, { "epoch": 1.939794626529751, "grad_norm": 2.890625, "learning_rate": 4.899793583658955e-06, "loss": 1.4543, "step": 6895 }, { "epoch": 1.941201294134196, "grad_norm": 3.515625, "learning_rate": 4.895007732755138e-06, "loss": 1.6126, "step": 6900 }, { "epoch": 1.9426079617386411, "grad_norm": 4.875, "learning_rate": 4.890220532721336e-06, "loss": 1.8178, "step": 6905 }, { "epoch": 1.9440146293430862, "grad_norm": 3.6875, "learning_rate": 4.885431990773752e-06, "loss": 1.574, "step": 6910 }, { "epoch": 1.9454212969475313, "grad_norm": 3.3125, "learning_rate": 4.880642114130609e-06, "loss": 1.3841, "step": 6915 }, { "epoch": 1.9468279645519764, "grad_norm": 5.8125, "learning_rate": 4.875850910012138e-06, "loss": 1.3911, "step": 6920 }, { "epoch": 1.9482346321564215, "grad_norm": 3.203125, "learning_rate": 4.87105838564058e-06, "loss": 1.791, "step": 6925 }, { "epoch": 1.9496412997608665, "grad_norm": 2.96875, "learning_rate": 4.8662645482401584e-06, "loss": 1.6071, "step": 6930 }, { "epoch": 1.9510479673653116, "grad_norm": 2.9375, "learning_rate": 4.861469405037079e-06, "loss": 1.3506, "step": 6935 }, { "epoch": 1.9524546349697567, "grad_norm": 2.96875, "learning_rate": 4.856672963259518e-06, "loss": 1.2333, "step": 6940 }, { "epoch": 1.9538613025742018, "grad_norm": 3.015625, "learning_rate": 4.851875230137603e-06, "loss": 1.6049, "step": 6945 }, { "epoch": 1.9552679701786468, "grad_norm": 3.734375, "learning_rate": 4.847076212903414e-06, "loss": 1.8223, "step": 6950 }, { "epoch": 1.9566746377830917, "grad_norm": 4.25, "learning_rate": 4.842275918790965e-06, "loss": 1.5588, "step": 6955 }, { "epoch": 1.958081305387537, "grad_norm": 2.625, "learning_rate": 4.837474355036191e-06, "loss": 1.6483, "step": 6960 }, { "epoch": 1.9594879729919819, "grad_norm": 2.546875, "learning_rate": 4.83267152887695e-06, "loss": 1.6539, "step": 6965 }, { "epoch": 1.9608946405964272, "grad_norm": 3.203125, "learning_rate": 4.8278674475529915e-06, "loss": 1.5487, "step": 6970 }, { "epoch": 1.962301308200872, "grad_norm": 2.984375, "learning_rate": 4.823062118305966e-06, "loss": 1.3615, "step": 6975 }, { "epoch": 1.9637079758053173, "grad_norm": 2.84375, "learning_rate": 4.8182555483794e-06, "loss": 1.6147, "step": 6980 }, { "epoch": 1.9651146434097622, "grad_norm": 2.953125, "learning_rate": 4.813447745018692e-06, "loss": 1.7156, "step": 6985 }, { "epoch": 1.9665213110142075, "grad_norm": 3.125, "learning_rate": 4.808638715471101e-06, "loss": 1.5149, "step": 6990 }, { "epoch": 1.9679279786186523, "grad_norm": 4.96875, "learning_rate": 4.803828466985732e-06, "loss": 1.2237, "step": 6995 }, { "epoch": 1.9693346462230976, "grad_norm": 3.0, "learning_rate": 4.799017006813527e-06, "loss": 1.5996, "step": 7000 }, { "epoch": 1.9707413138275425, "grad_norm": 3.359375, "learning_rate": 4.794204342207259e-06, "loss": 1.5614, "step": 7005 }, { "epoch": 1.9721479814319878, "grad_norm": 2.90625, "learning_rate": 4.789390480421512e-06, "loss": 1.65, "step": 7010 }, { "epoch": 1.9735546490364326, "grad_norm": 2.640625, "learning_rate": 4.784575428712676e-06, "loss": 1.5145, "step": 7015 }, { "epoch": 1.9749613166408777, "grad_norm": 3.640625, "learning_rate": 4.7797591943389355e-06, "loss": 1.6309, "step": 7020 }, { "epoch": 1.9763679842453228, "grad_norm": 3.359375, "learning_rate": 4.774941784560256e-06, "loss": 1.7972, "step": 7025 }, { "epoch": 1.9777746518497679, "grad_norm": 3.546875, "learning_rate": 4.770123206638376e-06, "loss": 1.6059, "step": 7030 }, { "epoch": 1.979181319454213, "grad_norm": 2.28125, "learning_rate": 4.765303467836794e-06, "loss": 1.6813, "step": 7035 }, { "epoch": 1.980587987058658, "grad_norm": 2.890625, "learning_rate": 4.760482575420762e-06, "loss": 1.6176, "step": 7040 }, { "epoch": 1.981994654663103, "grad_norm": 2.8125, "learning_rate": 4.755660536657266e-06, "loss": 1.689, "step": 7045 }, { "epoch": 1.9834013222675482, "grad_norm": 3.21875, "learning_rate": 4.7508373588150216e-06, "loss": 1.3935, "step": 7050 }, { "epoch": 1.9848079898719933, "grad_norm": 3.984375, "learning_rate": 4.746013049164463e-06, "loss": 1.4812, "step": 7055 }, { "epoch": 1.9862146574764383, "grad_norm": 3.796875, "learning_rate": 4.74118761497773e-06, "loss": 1.7011, "step": 7060 }, { "epoch": 1.9876213250808834, "grad_norm": 2.8125, "learning_rate": 4.7363610635286536e-06, "loss": 1.6525, "step": 7065 }, { "epoch": 1.9890279926853285, "grad_norm": 2.96875, "learning_rate": 4.731533402092756e-06, "loss": 1.7934, "step": 7070 }, { "epoch": 1.9904346602897736, "grad_norm": 4.6875, "learning_rate": 4.726704637947228e-06, "loss": 1.2473, "step": 7075 }, { "epoch": 1.9918413278942186, "grad_norm": 3.609375, "learning_rate": 4.721874778370921e-06, "loss": 1.2767, "step": 7080 }, { "epoch": 1.9932479954986637, "grad_norm": 4.625, "learning_rate": 4.717043830644344e-06, "loss": 1.5313, "step": 7085 }, { "epoch": 1.9946546631031086, "grad_norm": 3.4375, "learning_rate": 4.7122118020496385e-06, "loss": 1.6033, "step": 7090 }, { "epoch": 1.9960613307075539, "grad_norm": 3.40625, "learning_rate": 4.707378699870582e-06, "loss": 1.7435, "step": 7095 }, { "epoch": 1.9974679983119987, "grad_norm": 3.59375, "learning_rate": 4.702544531392565e-06, "loss": 1.6058, "step": 7100 }, { "epoch": 1.998874665916444, "grad_norm": 3.875, "learning_rate": 4.697709303902592e-06, "loss": 1.7475, "step": 7105 }, { "epoch": 2.0, "eval_loss": 1.5795072317123413, "eval_runtime": 330.9822, "eval_samples_per_second": 9.541, "eval_steps_per_second": 4.771, "step": 7109 }, { "epoch": 2.000281333520889, "grad_norm": 2.78125, "learning_rate": 4.6928730246892536e-06, "loss": 1.5257, "step": 7110 }, { "epoch": 2.001688001125334, "grad_norm": 4.125, "learning_rate": 4.6880357010427375e-06, "loss": 1.5725, "step": 7115 }, { "epoch": 2.003094668729779, "grad_norm": 3.875, "learning_rate": 4.683197340254798e-06, "loss": 1.3852, "step": 7120 }, { "epoch": 2.0045013363342243, "grad_norm": 3.71875, "learning_rate": 4.678357949618754e-06, "loss": 1.6272, "step": 7125 }, { "epoch": 2.005908003938669, "grad_norm": 3.546875, "learning_rate": 4.673517536429479e-06, "loss": 1.4325, "step": 7130 }, { "epoch": 2.0073146715431145, "grad_norm": 4.03125, "learning_rate": 4.6686761079833855e-06, "loss": 1.5526, "step": 7135 }, { "epoch": 2.0087213391475593, "grad_norm": 3.15625, "learning_rate": 4.663833671578418e-06, "loss": 1.7071, "step": 7140 }, { "epoch": 2.0101280067520046, "grad_norm": 4.0625, "learning_rate": 4.6589902345140394e-06, "loss": 1.2759, "step": 7145 }, { "epoch": 2.0115346743564495, "grad_norm": 4.71875, "learning_rate": 4.654145804091223e-06, "loss": 1.6467, "step": 7150 }, { "epoch": 2.012941341960895, "grad_norm": 3.09375, "learning_rate": 4.649300387612436e-06, "loss": 1.4278, "step": 7155 }, { "epoch": 2.0143480095653397, "grad_norm": 3.34375, "learning_rate": 4.644453992381633e-06, "loss": 1.4842, "step": 7160 }, { "epoch": 2.015754677169785, "grad_norm": 3.3125, "learning_rate": 4.639606625704249e-06, "loss": 1.9023, "step": 7165 }, { "epoch": 2.01716134477423, "grad_norm": 2.84375, "learning_rate": 4.634758294887175e-06, "loss": 1.6057, "step": 7170 }, { "epoch": 2.018568012378675, "grad_norm": 3.90625, "learning_rate": 4.629909007238762e-06, "loss": 1.7244, "step": 7175 }, { "epoch": 2.01997467998312, "grad_norm": 4.0, "learning_rate": 4.6250587700688e-06, "loss": 1.8344, "step": 7180 }, { "epoch": 2.0213813475875653, "grad_norm": 3.515625, "learning_rate": 4.620207590688512e-06, "loss": 1.5223, "step": 7185 }, { "epoch": 2.02278801519201, "grad_norm": 3.203125, "learning_rate": 4.61535547641054e-06, "loss": 1.7127, "step": 7190 }, { "epoch": 2.024194682796455, "grad_norm": 3.25, "learning_rate": 4.610502434548934e-06, "loss": 1.8786, "step": 7195 }, { "epoch": 2.0256013504009003, "grad_norm": 3.765625, "learning_rate": 4.6056484724191476e-06, "loss": 1.5157, "step": 7200 }, { "epoch": 2.027008018005345, "grad_norm": 3.5625, "learning_rate": 4.600793597338015e-06, "loss": 1.624, "step": 7205 }, { "epoch": 2.0284146856097904, "grad_norm": 3.765625, "learning_rate": 4.59593781662375e-06, "loss": 1.6772, "step": 7210 }, { "epoch": 2.0298213532142353, "grad_norm": 3.375, "learning_rate": 4.591081137595933e-06, "loss": 1.3959, "step": 7215 }, { "epoch": 2.0312280208186806, "grad_norm": 5.09375, "learning_rate": 4.5862235675754935e-06, "loss": 1.5757, "step": 7220 }, { "epoch": 2.0326346884231254, "grad_norm": 2.671875, "learning_rate": 4.58136511388471e-06, "loss": 1.3308, "step": 7225 }, { "epoch": 2.0340413560275707, "grad_norm": 3.09375, "learning_rate": 4.5765057838471884e-06, "loss": 1.685, "step": 7230 }, { "epoch": 2.0354480236320156, "grad_norm": 2.34375, "learning_rate": 4.571645584787858e-06, "loss": 1.6677, "step": 7235 }, { "epoch": 2.036854691236461, "grad_norm": 3.34375, "learning_rate": 4.566784524032958e-06, "loss": 1.5998, "step": 7240 }, { "epoch": 2.0382613588409058, "grad_norm": 2.609375, "learning_rate": 4.561922608910025e-06, "loss": 1.4078, "step": 7245 }, { "epoch": 2.039668026445351, "grad_norm": 3.34375, "learning_rate": 4.557059846747886e-06, "loss": 1.5471, "step": 7250 }, { "epoch": 2.041074694049796, "grad_norm": 3.84375, "learning_rate": 4.5521962448766416e-06, "loss": 1.4645, "step": 7255 }, { "epoch": 2.042481361654241, "grad_norm": 3.703125, "learning_rate": 4.547331810627661e-06, "loss": 1.4794, "step": 7260 }, { "epoch": 2.043888029258686, "grad_norm": 3.734375, "learning_rate": 4.542466551333568e-06, "loss": 1.672, "step": 7265 }, { "epoch": 2.0452946968631314, "grad_norm": 2.3125, "learning_rate": 4.5376004743282255e-06, "loss": 1.573, "step": 7270 }, { "epoch": 2.0467013644675762, "grad_norm": 3.0625, "learning_rate": 4.532733586946736e-06, "loss": 1.6004, "step": 7275 }, { "epoch": 2.0481080320720215, "grad_norm": 4.1875, "learning_rate": 4.527865896525419e-06, "loss": 1.7544, "step": 7280 }, { "epoch": 2.0495146996764664, "grad_norm": 3.1875, "learning_rate": 4.522997410401805e-06, "loss": 1.4427, "step": 7285 }, { "epoch": 2.0509213672809117, "grad_norm": 2.828125, "learning_rate": 4.518128135914625e-06, "loss": 1.5777, "step": 7290 }, { "epoch": 2.0523280348853565, "grad_norm": 3.359375, "learning_rate": 4.5132580804037984e-06, "loss": 1.3477, "step": 7295 }, { "epoch": 2.053734702489802, "grad_norm": 3.03125, "learning_rate": 4.50838725121042e-06, "loss": 1.6699, "step": 7300 }, { "epoch": 2.0551413700942467, "grad_norm": 3.890625, "learning_rate": 4.5035156556767555e-06, "loss": 1.5229, "step": 7305 }, { "epoch": 2.056548037698692, "grad_norm": 2.5625, "learning_rate": 4.498643301146219e-06, "loss": 1.7442, "step": 7310 }, { "epoch": 2.057954705303137, "grad_norm": 2.859375, "learning_rate": 4.493770194963374e-06, "loss": 1.4243, "step": 7315 }, { "epoch": 2.059361372907582, "grad_norm": 3.921875, "learning_rate": 4.488896344473914e-06, "loss": 1.653, "step": 7320 }, { "epoch": 2.060768040512027, "grad_norm": 3.53125, "learning_rate": 4.484021757024658e-06, "loss": 1.6292, "step": 7325 }, { "epoch": 2.062174708116472, "grad_norm": 3.0, "learning_rate": 4.479146439963533e-06, "loss": 1.4996, "step": 7330 }, { "epoch": 2.063581375720917, "grad_norm": 4.15625, "learning_rate": 4.474270400639565e-06, "loss": 1.7773, "step": 7335 }, { "epoch": 2.064988043325362, "grad_norm": 2.25, "learning_rate": 4.469393646402872e-06, "loss": 1.7348, "step": 7340 }, { "epoch": 2.0663947109298073, "grad_norm": 3.359375, "learning_rate": 4.4645161846046465e-06, "loss": 1.351, "step": 7345 }, { "epoch": 2.067801378534252, "grad_norm": 3.875, "learning_rate": 4.459638022597149e-06, "loss": 1.1414, "step": 7350 }, { "epoch": 2.0692080461386975, "grad_norm": 3.078125, "learning_rate": 4.454759167733697e-06, "loss": 1.4292, "step": 7355 }, { "epoch": 2.0706147137431423, "grad_norm": 3.375, "learning_rate": 4.449879627368649e-06, "loss": 1.608, "step": 7360 }, { "epoch": 2.0720213813475876, "grad_norm": 3.3125, "learning_rate": 4.4449994088574e-06, "loss": 1.5892, "step": 7365 }, { "epoch": 2.0734280489520325, "grad_norm": 4.875, "learning_rate": 4.440118519556366e-06, "loss": 1.5495, "step": 7370 }, { "epoch": 2.0748347165564778, "grad_norm": 4.4375, "learning_rate": 4.435236966822972e-06, "loss": 1.7177, "step": 7375 }, { "epoch": 2.0762413841609226, "grad_norm": 3.8125, "learning_rate": 4.430354758015648e-06, "loss": 1.4, "step": 7380 }, { "epoch": 2.077648051765368, "grad_norm": 3.484375, "learning_rate": 4.425471900493806e-06, "loss": 1.6965, "step": 7385 }, { "epoch": 2.079054719369813, "grad_norm": 3.53125, "learning_rate": 4.420588401617845e-06, "loss": 1.4151, "step": 7390 }, { "epoch": 2.080461386974258, "grad_norm": 3.546875, "learning_rate": 4.415704268749123e-06, "loss": 1.5518, "step": 7395 }, { "epoch": 2.081868054578703, "grad_norm": 2.484375, "learning_rate": 4.410819509249956e-06, "loss": 1.4657, "step": 7400 }, { "epoch": 2.0832747221831482, "grad_norm": 3.25, "learning_rate": 4.405934130483606e-06, "loss": 1.5352, "step": 7405 }, { "epoch": 2.084681389787593, "grad_norm": 3.640625, "learning_rate": 4.401048139814268e-06, "loss": 1.6914, "step": 7410 }, { "epoch": 2.0860880573920384, "grad_norm": 2.78125, "learning_rate": 4.3961615446070564e-06, "loss": 1.5646, "step": 7415 }, { "epoch": 2.0874947249964833, "grad_norm": 3.421875, "learning_rate": 4.391274352228002e-06, "loss": 1.5388, "step": 7420 }, { "epoch": 2.0889013926009286, "grad_norm": 2.84375, "learning_rate": 4.3863865700440316e-06, "loss": 1.5648, "step": 7425 }, { "epoch": 2.0903080602053734, "grad_norm": 3.546875, "learning_rate": 4.3814982054229604e-06, "loss": 1.455, "step": 7430 }, { "epoch": 2.0917147278098187, "grad_norm": 4.125, "learning_rate": 4.37660926573349e-06, "loss": 1.5373, "step": 7435 }, { "epoch": 2.0931213954142636, "grad_norm": 2.765625, "learning_rate": 4.371719758345176e-06, "loss": 1.5629, "step": 7440 }, { "epoch": 2.094528063018709, "grad_norm": 2.984375, "learning_rate": 4.366829690628439e-06, "loss": 1.7871, "step": 7445 }, { "epoch": 2.0959347306231537, "grad_norm": 3.28125, "learning_rate": 4.3619390699545425e-06, "loss": 1.4415, "step": 7450 }, { "epoch": 2.097341398227599, "grad_norm": 3.0, "learning_rate": 4.357047903695582e-06, "loss": 1.4951, "step": 7455 }, { "epoch": 2.098748065832044, "grad_norm": 3.125, "learning_rate": 4.352156199224474e-06, "loss": 1.7415, "step": 7460 }, { "epoch": 2.100154733436489, "grad_norm": 3.28125, "learning_rate": 4.347263963914951e-06, "loss": 1.5393, "step": 7465 }, { "epoch": 2.101561401040934, "grad_norm": 2.859375, "learning_rate": 4.3423712051415415e-06, "loss": 1.5584, "step": 7470 }, { "epoch": 2.102968068645379, "grad_norm": 2.859375, "learning_rate": 4.337477930279565e-06, "loss": 1.4683, "step": 7475 }, { "epoch": 2.104374736249824, "grad_norm": 3.328125, "learning_rate": 4.332584146705119e-06, "loss": 1.7479, "step": 7480 }, { "epoch": 2.105781403854269, "grad_norm": 4.65625, "learning_rate": 4.327689861795066e-06, "loss": 1.6146, "step": 7485 }, { "epoch": 2.1071880714587143, "grad_norm": 3.65625, "learning_rate": 4.322795082927027e-06, "loss": 1.5614, "step": 7490 }, { "epoch": 2.108594739063159, "grad_norm": 3.078125, "learning_rate": 4.317899817479363e-06, "loss": 1.4357, "step": 7495 }, { "epoch": 2.1100014066676045, "grad_norm": 3.546875, "learning_rate": 4.313004072831177e-06, "loss": 1.6396, "step": 7500 }, { "epoch": 2.1114080742720494, "grad_norm": 4.0625, "learning_rate": 4.308107856362284e-06, "loss": 1.4383, "step": 7505 }, { "epoch": 2.1128147418764947, "grad_norm": 3.34375, "learning_rate": 4.303211175453216e-06, "loss": 1.3966, "step": 7510 }, { "epoch": 2.1142214094809395, "grad_norm": 3.421875, "learning_rate": 4.2983140374852076e-06, "loss": 1.3908, "step": 7515 }, { "epoch": 2.115628077085385, "grad_norm": 2.59375, "learning_rate": 4.293416449840175e-06, "loss": 1.6366, "step": 7520 }, { "epoch": 2.1170347446898297, "grad_norm": 2.890625, "learning_rate": 4.288518419900718e-06, "loss": 1.596, "step": 7525 }, { "epoch": 2.118441412294275, "grad_norm": 2.796875, "learning_rate": 4.2836199550501e-06, "loss": 1.7149, "step": 7530 }, { "epoch": 2.11984807989872, "grad_norm": 4.0625, "learning_rate": 4.278721062672244e-06, "loss": 1.4818, "step": 7535 }, { "epoch": 2.121254747503165, "grad_norm": 4.21875, "learning_rate": 4.273821750151712e-06, "loss": 1.4746, "step": 7540 }, { "epoch": 2.12266141510761, "grad_norm": 2.59375, "learning_rate": 4.268922024873705e-06, "loss": 1.7616, "step": 7545 }, { "epoch": 2.1240680827120553, "grad_norm": 2.78125, "learning_rate": 4.264021894224042e-06, "loss": 1.6073, "step": 7550 }, { "epoch": 2.1254747503165, "grad_norm": 3.40625, "learning_rate": 4.259121365589152e-06, "loss": 1.4738, "step": 7555 }, { "epoch": 2.1268814179209454, "grad_norm": 3.359375, "learning_rate": 4.25422044635607e-06, "loss": 1.7426, "step": 7560 }, { "epoch": 2.1282880855253903, "grad_norm": 4.25, "learning_rate": 4.249319143912415e-06, "loss": 1.595, "step": 7565 }, { "epoch": 2.1296947531298356, "grad_norm": 2.671875, "learning_rate": 4.244417465646382e-06, "loss": 1.7493, "step": 7570 }, { "epoch": 2.1311014207342804, "grad_norm": 3.9375, "learning_rate": 4.239515418946739e-06, "loss": 1.5547, "step": 7575 }, { "epoch": 2.1325080883387257, "grad_norm": 3.5625, "learning_rate": 4.234613011202804e-06, "loss": 1.4594, "step": 7580 }, { "epoch": 2.1339147559431706, "grad_norm": 2.859375, "learning_rate": 4.22971024980444e-06, "loss": 1.7132, "step": 7585 }, { "epoch": 2.135321423547616, "grad_norm": 3.15625, "learning_rate": 4.2248071421420445e-06, "loss": 1.5946, "step": 7590 }, { "epoch": 2.1367280911520607, "grad_norm": 3.5, "learning_rate": 4.219903695606538e-06, "loss": 1.3981, "step": 7595 }, { "epoch": 2.1381347587565056, "grad_norm": 3.15625, "learning_rate": 4.214999917589347e-06, "loss": 1.463, "step": 7600 }, { "epoch": 2.139541426360951, "grad_norm": 3.59375, "learning_rate": 4.210095815482404e-06, "loss": 1.3421, "step": 7605 }, { "epoch": 2.140948093965396, "grad_norm": 2.484375, "learning_rate": 4.205191396678126e-06, "loss": 1.2721, "step": 7610 }, { "epoch": 2.142354761569841, "grad_norm": 3.75, "learning_rate": 4.200286668569407e-06, "loss": 1.6508, "step": 7615 }, { "epoch": 2.143761429174286, "grad_norm": 3.484375, "learning_rate": 4.195381638549609e-06, "loss": 1.569, "step": 7620 }, { "epoch": 2.145168096778731, "grad_norm": 2.890625, "learning_rate": 4.190476314012551e-06, "loss": 1.5305, "step": 7625 }, { "epoch": 2.146574764383176, "grad_norm": 3.03125, "learning_rate": 4.185570702352491e-06, "loss": 1.4491, "step": 7630 }, { "epoch": 2.1479814319876214, "grad_norm": 3.203125, "learning_rate": 4.180664810964121e-06, "loss": 1.656, "step": 7635 }, { "epoch": 2.1493880995920662, "grad_norm": 2.578125, "learning_rate": 4.175758647242561e-06, "loss": 1.4181, "step": 7640 }, { "epoch": 2.1507947671965115, "grad_norm": 3.078125, "learning_rate": 4.170852218583333e-06, "loss": 1.3543, "step": 7645 }, { "epoch": 2.1522014348009564, "grad_norm": 3.46875, "learning_rate": 4.1659455323823615e-06, "loss": 1.6262, "step": 7650 }, { "epoch": 2.1536081024054017, "grad_norm": 2.953125, "learning_rate": 4.161038596035963e-06, "loss": 1.6097, "step": 7655 }, { "epoch": 2.1550147700098465, "grad_norm": 3.15625, "learning_rate": 4.156131416940824e-06, "loss": 1.6947, "step": 7660 }, { "epoch": 2.156421437614292, "grad_norm": 4.03125, "learning_rate": 4.151224002494002e-06, "loss": 1.5401, "step": 7665 }, { "epoch": 2.1578281052187367, "grad_norm": 3.78125, "learning_rate": 4.146316360092909e-06, "loss": 1.6713, "step": 7670 }, { "epoch": 2.159234772823182, "grad_norm": 4.0, "learning_rate": 4.141408497135299e-06, "loss": 1.5029, "step": 7675 }, { "epoch": 2.160641440427627, "grad_norm": 3.484375, "learning_rate": 4.136500421019258e-06, "loss": 1.5932, "step": 7680 }, { "epoch": 2.162048108032072, "grad_norm": 2.59375, "learning_rate": 4.131592139143195e-06, "loss": 1.6317, "step": 7685 }, { "epoch": 2.163454775636517, "grad_norm": 3.46875, "learning_rate": 4.126683658905829e-06, "loss": 1.603, "step": 7690 }, { "epoch": 2.1648614432409623, "grad_norm": 4.3125, "learning_rate": 4.121774987706177e-06, "loss": 1.5748, "step": 7695 }, { "epoch": 2.166268110845407, "grad_norm": 3.65625, "learning_rate": 4.116866132943544e-06, "loss": 1.4496, "step": 7700 }, { "epoch": 2.1676747784498525, "grad_norm": 2.859375, "learning_rate": 4.111957102017513e-06, "loss": 1.3999, "step": 7705 }, { "epoch": 2.1690814460542973, "grad_norm": 3.140625, "learning_rate": 4.10704790232793e-06, "loss": 1.6404, "step": 7710 }, { "epoch": 2.1704881136587426, "grad_norm": 2.765625, "learning_rate": 4.102138541274898e-06, "loss": 1.6583, "step": 7715 }, { "epoch": 2.1718947812631875, "grad_norm": 3.234375, "learning_rate": 4.097229026258762e-06, "loss": 1.6281, "step": 7720 }, { "epoch": 2.1733014488676328, "grad_norm": 2.6875, "learning_rate": 4.092319364680101e-06, "loss": 1.6493, "step": 7725 }, { "epoch": 2.1747081164720776, "grad_norm": 2.5625, "learning_rate": 4.08740956393971e-06, "loss": 1.7934, "step": 7730 }, { "epoch": 2.176114784076523, "grad_norm": 3.78125, "learning_rate": 4.082499631438599e-06, "loss": 1.6603, "step": 7735 }, { "epoch": 2.177521451680968, "grad_norm": 4.25, "learning_rate": 4.077589574577975e-06, "loss": 1.5522, "step": 7740 }, { "epoch": 2.1789281192854126, "grad_norm": 4.84375, "learning_rate": 4.07267940075923e-06, "loss": 1.6806, "step": 7745 }, { "epoch": 2.180334786889858, "grad_norm": 4.375, "learning_rate": 4.067769117383936e-06, "loss": 1.5831, "step": 7750 }, { "epoch": 2.181741454494303, "grad_norm": 3.71875, "learning_rate": 4.0628587318538295e-06, "loss": 1.5568, "step": 7755 }, { "epoch": 2.183148122098748, "grad_norm": 2.875, "learning_rate": 4.057948251570798e-06, "loss": 1.6057, "step": 7760 }, { "epoch": 2.184554789703193, "grad_norm": 8.8125, "learning_rate": 4.053037683936875e-06, "loss": 1.3439, "step": 7765 }, { "epoch": 2.1859614573076382, "grad_norm": 3.53125, "learning_rate": 4.048127036354224e-06, "loss": 1.4215, "step": 7770 }, { "epoch": 2.187368124912083, "grad_norm": 3.078125, "learning_rate": 4.0432163162251295e-06, "loss": 1.4706, "step": 7775 }, { "epoch": 2.1887747925165284, "grad_norm": 3.5625, "learning_rate": 4.038305530951986e-06, "loss": 1.5168, "step": 7780 }, { "epoch": 2.1901814601209733, "grad_norm": 3.890625, "learning_rate": 4.033394687937284e-06, "loss": 1.7687, "step": 7785 }, { "epoch": 2.1915881277254186, "grad_norm": 3.40625, "learning_rate": 4.028483794583606e-06, "loss": 1.6538, "step": 7790 }, { "epoch": 2.1929947953298634, "grad_norm": 4.21875, "learning_rate": 4.023572858293602e-06, "loss": 1.6807, "step": 7795 }, { "epoch": 2.1944014629343087, "grad_norm": 2.78125, "learning_rate": 4.018661886469996e-06, "loss": 1.5059, "step": 7800 }, { "epoch": 2.1958081305387536, "grad_norm": 3.953125, "learning_rate": 4.01375088651556e-06, "loss": 1.5728, "step": 7805 }, { "epoch": 2.197214798143199, "grad_norm": 3.484375, "learning_rate": 4.008839865833108e-06, "loss": 1.5053, "step": 7810 }, { "epoch": 2.1986214657476437, "grad_norm": 3.0, "learning_rate": 4.0039288318254895e-06, "loss": 1.544, "step": 7815 }, { "epoch": 2.200028133352089, "grad_norm": 3.28125, "learning_rate": 3.999017791895571e-06, "loss": 1.4862, "step": 7820 }, { "epoch": 2.201434800956534, "grad_norm": 3.171875, "learning_rate": 3.994106753446225e-06, "loss": 1.6569, "step": 7825 }, { "epoch": 2.202841468560979, "grad_norm": 2.859375, "learning_rate": 3.989195723880332e-06, "loss": 1.484, "step": 7830 }, { "epoch": 2.204248136165424, "grad_norm": 2.375, "learning_rate": 3.984284710600746e-06, "loss": 1.4287, "step": 7835 }, { "epoch": 2.2056548037698693, "grad_norm": 2.890625, "learning_rate": 3.979373721010306e-06, "loss": 1.7917, "step": 7840 }, { "epoch": 2.207061471374314, "grad_norm": 3.015625, "learning_rate": 3.97446276251181e-06, "loss": 1.5958, "step": 7845 }, { "epoch": 2.2084681389787595, "grad_norm": 2.703125, "learning_rate": 3.969551842508014e-06, "loss": 1.7798, "step": 7850 }, { "epoch": 2.2098748065832043, "grad_norm": 3.40625, "learning_rate": 3.964640968401612e-06, "loss": 1.6493, "step": 7855 }, { "epoch": 2.2112814741876496, "grad_norm": 3.734375, "learning_rate": 3.959730147595228e-06, "loss": 1.3387, "step": 7860 }, { "epoch": 2.2126881417920945, "grad_norm": 5.8125, "learning_rate": 3.954819387491411e-06, "loss": 1.6065, "step": 7865 }, { "epoch": 2.21409480939654, "grad_norm": 3.828125, "learning_rate": 3.949908695492612e-06, "loss": 1.4857, "step": 7870 }, { "epoch": 2.2155014770009847, "grad_norm": 3.0625, "learning_rate": 3.944998079001185e-06, "loss": 1.6135, "step": 7875 }, { "epoch": 2.21690814460543, "grad_norm": 3.625, "learning_rate": 3.940087545419365e-06, "loss": 1.4992, "step": 7880 }, { "epoch": 2.218314812209875, "grad_norm": 5.03125, "learning_rate": 3.9351771021492686e-06, "loss": 1.4218, "step": 7885 }, { "epoch": 2.2197214798143197, "grad_norm": 3.28125, "learning_rate": 3.9302667565928676e-06, "loss": 1.6094, "step": 7890 }, { "epoch": 2.221128147418765, "grad_norm": 2.375, "learning_rate": 3.925356516151996e-06, "loss": 1.4345, "step": 7895 }, { "epoch": 2.22253481502321, "grad_norm": 5.0625, "learning_rate": 3.920446388228319e-06, "loss": 1.3853, "step": 7900 }, { "epoch": 2.223941482627655, "grad_norm": 3.15625, "learning_rate": 3.915536380223344e-06, "loss": 1.6986, "step": 7905 }, { "epoch": 2.2253481502321, "grad_norm": 2.96875, "learning_rate": 3.910626499538387e-06, "loss": 1.8287, "step": 7910 }, { "epoch": 2.2267548178365453, "grad_norm": 3.15625, "learning_rate": 3.9057167535745795e-06, "loss": 1.453, "step": 7915 }, { "epoch": 2.22816148544099, "grad_norm": 2.515625, "learning_rate": 3.900807149732843e-06, "loss": 1.5434, "step": 7920 }, { "epoch": 2.2295681530454354, "grad_norm": 3.234375, "learning_rate": 3.895897695413892e-06, "loss": 1.6757, "step": 7925 }, { "epoch": 2.2309748206498803, "grad_norm": 3.25, "learning_rate": 3.890988398018212e-06, "loss": 1.4654, "step": 7930 }, { "epoch": 2.2323814882543256, "grad_norm": 3.9375, "learning_rate": 3.886079264946052e-06, "loss": 1.4485, "step": 7935 }, { "epoch": 2.2337881558587704, "grad_norm": 2.765625, "learning_rate": 3.881170303597412e-06, "loss": 1.5495, "step": 7940 }, { "epoch": 2.2351948234632157, "grad_norm": 2.703125, "learning_rate": 3.8762615213720365e-06, "loss": 1.4623, "step": 7945 }, { "epoch": 2.2366014910676606, "grad_norm": 2.9375, "learning_rate": 3.871352925669398e-06, "loss": 1.7347, "step": 7950 }, { "epoch": 2.238008158672106, "grad_norm": 4.96875, "learning_rate": 3.866444523888687e-06, "loss": 1.5133, "step": 7955 }, { "epoch": 2.2394148262765508, "grad_norm": 3.234375, "learning_rate": 3.861536323428805e-06, "loss": 1.6838, "step": 7960 }, { "epoch": 2.240821493880996, "grad_norm": 3.15625, "learning_rate": 3.856628331688346e-06, "loss": 1.3434, "step": 7965 }, { "epoch": 2.242228161485441, "grad_norm": 2.34375, "learning_rate": 3.8517205560655895e-06, "loss": 1.6505, "step": 7970 }, { "epoch": 2.243634829089886, "grad_norm": 3.359375, "learning_rate": 3.846813003958493e-06, "loss": 1.5625, "step": 7975 }, { "epoch": 2.245041496694331, "grad_norm": 2.375, "learning_rate": 3.841905682764676e-06, "loss": 1.6773, "step": 7980 }, { "epoch": 2.2464481642987764, "grad_norm": 3.53125, "learning_rate": 3.836998599881406e-06, "loss": 1.6122, "step": 7985 }, { "epoch": 2.247854831903221, "grad_norm": 3.359375, "learning_rate": 3.832091762705595e-06, "loss": 1.705, "step": 7990 }, { "epoch": 2.2492614995076665, "grad_norm": 3.0, "learning_rate": 3.827185178633787e-06, "loss": 1.5123, "step": 7995 }, { "epoch": 2.2506681671121114, "grad_norm": 4.03125, "learning_rate": 3.822278855062136e-06, "loss": 1.4593, "step": 8000 }, { "epoch": 2.2520748347165567, "grad_norm": 3.421875, "learning_rate": 3.8173727993864115e-06, "loss": 1.4751, "step": 8005 }, { "epoch": 2.2534815023210015, "grad_norm": 3.34375, "learning_rate": 3.8124670190019755e-06, "loss": 1.3753, "step": 8010 }, { "epoch": 2.2548881699254464, "grad_norm": 3.0625, "learning_rate": 3.807561521303777e-06, "loss": 1.6663, "step": 8015 }, { "epoch": 2.2562948375298917, "grad_norm": 3.0, "learning_rate": 3.802656313686336e-06, "loss": 1.5228, "step": 8020 }, { "epoch": 2.257701505134337, "grad_norm": 2.828125, "learning_rate": 3.7977514035437383e-06, "loss": 1.6107, "step": 8025 }, { "epoch": 2.259108172738782, "grad_norm": 3.25, "learning_rate": 3.7928467982696174e-06, "loss": 1.6885, "step": 8030 }, { "epoch": 2.2605148403432267, "grad_norm": 3.359375, "learning_rate": 3.7879425052571525e-06, "loss": 1.5311, "step": 8035 }, { "epoch": 2.261921507947672, "grad_norm": 3.09375, "learning_rate": 3.783038531899047e-06, "loss": 1.681, "step": 8040 }, { "epoch": 2.263328175552117, "grad_norm": 3.03125, "learning_rate": 3.7781348855875263e-06, "loss": 1.1735, "step": 8045 }, { "epoch": 2.264734843156562, "grad_norm": 3.34375, "learning_rate": 3.7732315737143205e-06, "loss": 1.615, "step": 8050 }, { "epoch": 2.266141510761007, "grad_norm": 3.640625, "learning_rate": 3.768328603670658e-06, "loss": 1.4219, "step": 8055 }, { "epoch": 2.2675481783654523, "grad_norm": 3.9375, "learning_rate": 3.7634259828472467e-06, "loss": 1.5921, "step": 8060 }, { "epoch": 2.268954845969897, "grad_norm": 4.125, "learning_rate": 3.7585237186342743e-06, "loss": 1.6932, "step": 8065 }, { "epoch": 2.2703615135743425, "grad_norm": 3.65625, "learning_rate": 3.753621818421388e-06, "loss": 1.7495, "step": 8070 }, { "epoch": 2.2717681811787873, "grad_norm": 3.78125, "learning_rate": 3.7487202895976864e-06, "loss": 1.6282, "step": 8075 }, { "epoch": 2.2731748487832326, "grad_norm": 3.140625, "learning_rate": 3.743819139551708e-06, "loss": 1.6892, "step": 8080 }, { "epoch": 2.2745815163876775, "grad_norm": 2.984375, "learning_rate": 3.7389183756714207e-06, "loss": 1.5165, "step": 8085 }, { "epoch": 2.2759881839921228, "grad_norm": 3.453125, "learning_rate": 3.7340180053442127e-06, "loss": 1.6025, "step": 8090 }, { "epoch": 2.2773948515965676, "grad_norm": 4.3125, "learning_rate": 3.7291180359568735e-06, "loss": 1.7114, "step": 8095 }, { "epoch": 2.278801519201013, "grad_norm": 4.03125, "learning_rate": 3.724218474895593e-06, "loss": 1.6153, "step": 8100 }, { "epoch": 2.280208186805458, "grad_norm": 2.875, "learning_rate": 3.719319329545943e-06, "loss": 1.5714, "step": 8105 }, { "epoch": 2.281614854409903, "grad_norm": 3.234375, "learning_rate": 3.7144206072928704e-06, "loss": 1.7677, "step": 8110 }, { "epoch": 2.283021522014348, "grad_norm": 3.3125, "learning_rate": 3.709522315520683e-06, "loss": 1.4741, "step": 8115 }, { "epoch": 2.2844281896187932, "grad_norm": 3.734375, "learning_rate": 3.704624461613043e-06, "loss": 1.7835, "step": 8120 }, { "epoch": 2.285834857223238, "grad_norm": 3.765625, "learning_rate": 3.6997270529529445e-06, "loss": 1.8235, "step": 8125 }, { "epoch": 2.2872415248276834, "grad_norm": 4.25, "learning_rate": 3.69483009692272e-06, "loss": 1.5382, "step": 8130 }, { "epoch": 2.2886481924321282, "grad_norm": 5.4375, "learning_rate": 3.6899336009040132e-06, "loss": 1.5006, "step": 8135 }, { "epoch": 2.2900548600365735, "grad_norm": 2.96875, "learning_rate": 3.685037572277778e-06, "loss": 1.8186, "step": 8140 }, { "epoch": 2.2914615276410184, "grad_norm": 2.515625, "learning_rate": 3.6801420184242626e-06, "loss": 1.7269, "step": 8145 }, { "epoch": 2.2928681952454637, "grad_norm": 3.0625, "learning_rate": 3.6752469467229975e-06, "loss": 1.5189, "step": 8150 }, { "epoch": 2.2942748628499086, "grad_norm": 3.75, "learning_rate": 3.6703523645527915e-06, "loss": 1.6616, "step": 8155 }, { "epoch": 2.2956815304543534, "grad_norm": 4.09375, "learning_rate": 3.6654582792917074e-06, "loss": 1.6282, "step": 8160 }, { "epoch": 2.2970881980587987, "grad_norm": 3.640625, "learning_rate": 3.660564698317069e-06, "loss": 1.5604, "step": 8165 }, { "epoch": 2.298494865663244, "grad_norm": 4.5625, "learning_rate": 3.6556716290054306e-06, "loss": 1.4532, "step": 8170 }, { "epoch": 2.299901533267689, "grad_norm": 3.59375, "learning_rate": 3.650779078732582e-06, "loss": 1.7224, "step": 8175 }, { "epoch": 2.3013082008721337, "grad_norm": 4.03125, "learning_rate": 3.6458870548735255e-06, "loss": 1.6006, "step": 8180 }, { "epoch": 2.302714868476579, "grad_norm": 4.03125, "learning_rate": 3.6409955648024756e-06, "loss": 1.3733, "step": 8185 }, { "epoch": 2.304121536081024, "grad_norm": 3.109375, "learning_rate": 3.6361046158928343e-06, "loss": 1.6563, "step": 8190 }, { "epoch": 2.305528203685469, "grad_norm": 2.796875, "learning_rate": 3.631214215517198e-06, "loss": 1.7411, "step": 8195 }, { "epoch": 2.306934871289914, "grad_norm": 3.140625, "learning_rate": 3.6263243710473258e-06, "loss": 1.5237, "step": 8200 }, { "epoch": 2.3083415388943593, "grad_norm": 2.921875, "learning_rate": 3.621435089854146e-06, "loss": 1.685, "step": 8205 }, { "epoch": 2.309748206498804, "grad_norm": 3.84375, "learning_rate": 3.616546379307736e-06, "loss": 1.3735, "step": 8210 }, { "epoch": 2.3111548741032495, "grad_norm": 3.265625, "learning_rate": 3.611658246777311e-06, "loss": 1.5922, "step": 8215 }, { "epoch": 2.3125615417076943, "grad_norm": 3.5625, "learning_rate": 3.6067706996312196e-06, "loss": 1.6493, "step": 8220 }, { "epoch": 2.3139682093121396, "grad_norm": 3.21875, "learning_rate": 3.601883745236919e-06, "loss": 1.3882, "step": 8225 }, { "epoch": 2.3153748769165845, "grad_norm": 4.46875, "learning_rate": 3.5969973909609857e-06, "loss": 1.483, "step": 8230 }, { "epoch": 2.31678154452103, "grad_norm": 3.984375, "learning_rate": 3.592111644169079e-06, "loss": 1.3885, "step": 8235 }, { "epoch": 2.3181882121254747, "grad_norm": 3.53125, "learning_rate": 3.5872265122259517e-06, "loss": 1.4347, "step": 8240 }, { "epoch": 2.31959487972992, "grad_norm": 2.96875, "learning_rate": 3.5823420024954233e-06, "loss": 1.7898, "step": 8245 }, { "epoch": 2.321001547334365, "grad_norm": 3.125, "learning_rate": 3.577458122340382e-06, "loss": 1.4103, "step": 8250 }, { "epoch": 2.32240821493881, "grad_norm": 3.8125, "learning_rate": 3.572574879122758e-06, "loss": 1.644, "step": 8255 }, { "epoch": 2.323814882543255, "grad_norm": 3.046875, "learning_rate": 3.5676922802035324e-06, "loss": 1.5583, "step": 8260 }, { "epoch": 2.3252215501477003, "grad_norm": 2.609375, "learning_rate": 3.562810332942705e-06, "loss": 1.4209, "step": 8265 }, { "epoch": 2.326628217752145, "grad_norm": 4.75, "learning_rate": 3.5579290446992996e-06, "loss": 1.476, "step": 8270 }, { "epoch": 2.3280348853565904, "grad_norm": 3.875, "learning_rate": 3.553048422831344e-06, "loss": 1.5637, "step": 8275 }, { "epoch": 2.3294415529610353, "grad_norm": 3.796875, "learning_rate": 3.548168474695862e-06, "loss": 1.4261, "step": 8280 }, { "epoch": 2.33084822056548, "grad_norm": 2.765625, "learning_rate": 3.5432892076488636e-06, "loss": 1.4481, "step": 8285 }, { "epoch": 2.3322548881699254, "grad_norm": 3.25, "learning_rate": 3.5384106290453275e-06, "loss": 1.6372, "step": 8290 }, { "epoch": 2.3336615557743707, "grad_norm": 3.0625, "learning_rate": 3.5335327462392014e-06, "loss": 1.6052, "step": 8295 }, { "epoch": 2.3350682233788156, "grad_norm": 3.8125, "learning_rate": 3.5286555665833763e-06, "loss": 1.5632, "step": 8300 }, { "epoch": 2.3364748909832604, "grad_norm": 2.078125, "learning_rate": 3.52377909742969e-06, "loss": 1.6063, "step": 8305 }, { "epoch": 2.3378815585877057, "grad_norm": 3.40625, "learning_rate": 3.5189033461289057e-06, "loss": 1.1783, "step": 8310 }, { "epoch": 2.339288226192151, "grad_norm": 3.5625, "learning_rate": 3.514028320030706e-06, "loss": 1.6411, "step": 8315 }, { "epoch": 2.340694893796596, "grad_norm": 2.515625, "learning_rate": 3.5091540264836788e-06, "loss": 1.5097, "step": 8320 }, { "epoch": 2.3421015614010408, "grad_norm": 5.40625, "learning_rate": 3.5042804728353112e-06, "loss": 1.6188, "step": 8325 }, { "epoch": 2.343508229005486, "grad_norm": 3.875, "learning_rate": 3.499407666431969e-06, "loss": 1.5711, "step": 8330 }, { "epoch": 2.344914896609931, "grad_norm": 4.03125, "learning_rate": 3.4945356146188977e-06, "loss": 1.2004, "step": 8335 }, { "epoch": 2.346321564214376, "grad_norm": 3.953125, "learning_rate": 3.489664324740201e-06, "loss": 1.7899, "step": 8340 }, { "epoch": 2.347728231818821, "grad_norm": 4.8125, "learning_rate": 3.4847938041388376e-06, "loss": 1.6106, "step": 8345 }, { "epoch": 2.3491348994232664, "grad_norm": 2.40625, "learning_rate": 3.4799240601566036e-06, "loss": 1.4558, "step": 8350 }, { "epoch": 2.350541567027711, "grad_norm": 3.25, "learning_rate": 3.4750551001341257e-06, "loss": 1.4972, "step": 8355 }, { "epoch": 2.3519482346321565, "grad_norm": 3.96875, "learning_rate": 3.4701869314108503e-06, "loss": 1.4276, "step": 8360 }, { "epoch": 2.3533549022366014, "grad_norm": 2.90625, "learning_rate": 3.465319561325027e-06, "loss": 1.6449, "step": 8365 }, { "epoch": 2.3547615698410467, "grad_norm": 3.53125, "learning_rate": 3.460452997213707e-06, "loss": 1.6125, "step": 8370 }, { "epoch": 2.3561682374454915, "grad_norm": 3.8125, "learning_rate": 3.4555872464127207e-06, "loss": 1.443, "step": 8375 }, { "epoch": 2.357574905049937, "grad_norm": 3.390625, "learning_rate": 3.4507223162566776e-06, "loss": 1.5425, "step": 8380 }, { "epoch": 2.3589815726543817, "grad_norm": 3.890625, "learning_rate": 3.445858214078946e-06, "loss": 1.5239, "step": 8385 }, { "epoch": 2.360388240258827, "grad_norm": 4.28125, "learning_rate": 3.440994947211652e-06, "loss": 1.3468, "step": 8390 }, { "epoch": 2.361794907863272, "grad_norm": 3.140625, "learning_rate": 3.4361325229856537e-06, "loss": 1.7437, "step": 8395 }, { "epoch": 2.363201575467717, "grad_norm": 2.96875, "learning_rate": 3.4312709487305474e-06, "loss": 1.6956, "step": 8400 }, { "epoch": 2.364608243072162, "grad_norm": 5.5, "learning_rate": 3.4264102317746424e-06, "loss": 1.631, "step": 8405 }, { "epoch": 2.3660149106766073, "grad_norm": 2.96875, "learning_rate": 3.4215503794449613e-06, "loss": 1.6996, "step": 8410 }, { "epoch": 2.367421578281052, "grad_norm": 2.734375, "learning_rate": 3.416691399067217e-06, "loss": 1.6483, "step": 8415 }, { "epoch": 2.3688282458854975, "grad_norm": 2.546875, "learning_rate": 3.4118332979658116e-06, "loss": 1.5546, "step": 8420 }, { "epoch": 2.3702349134899423, "grad_norm": 2.953125, "learning_rate": 3.406976083463824e-06, "loss": 1.7836, "step": 8425 }, { "epoch": 2.371641581094387, "grad_norm": 4.21875, "learning_rate": 3.4021197628829902e-06, "loss": 1.5147, "step": 8430 }, { "epoch": 2.3730482486988325, "grad_norm": 2.71875, "learning_rate": 3.3972643435437062e-06, "loss": 1.6279, "step": 8435 }, { "epoch": 2.3744549163032778, "grad_norm": 2.359375, "learning_rate": 3.392409832765002e-06, "loss": 1.8905, "step": 8440 }, { "epoch": 2.3758615839077226, "grad_norm": 3.296875, "learning_rate": 3.387556237864545e-06, "loss": 1.6089, "step": 8445 }, { "epoch": 2.3772682515121675, "grad_norm": 2.609375, "learning_rate": 3.3827035661586165e-06, "loss": 1.3313, "step": 8450 }, { "epoch": 2.3786749191166128, "grad_norm": 3.15625, "learning_rate": 3.3778518249621117e-06, "loss": 1.5232, "step": 8455 }, { "epoch": 2.3800815867210576, "grad_norm": 2.90625, "learning_rate": 3.3730010215885155e-06, "loss": 1.6252, "step": 8460 }, { "epoch": 2.381488254325503, "grad_norm": 3.234375, "learning_rate": 3.368151163349907e-06, "loss": 1.397, "step": 8465 }, { "epoch": 2.382894921929948, "grad_norm": 3.828125, "learning_rate": 3.363302257556935e-06, "loss": 1.6473, "step": 8470 }, { "epoch": 2.384301589534393, "grad_norm": 3.765625, "learning_rate": 3.3584543115188167e-06, "loss": 1.6578, "step": 8475 }, { "epoch": 2.385708257138838, "grad_norm": 2.421875, "learning_rate": 3.353607332543319e-06, "loss": 1.4938, "step": 8480 }, { "epoch": 2.3871149247432832, "grad_norm": 2.375, "learning_rate": 3.348761327936755e-06, "loss": 1.5833, "step": 8485 }, { "epoch": 2.388521592347728, "grad_norm": 3.125, "learning_rate": 3.3439163050039637e-06, "loss": 1.5118, "step": 8490 }, { "epoch": 2.3899282599521734, "grad_norm": 5.875, "learning_rate": 3.339072271048308e-06, "loss": 1.635, "step": 8495 }, { "epoch": 2.3913349275566183, "grad_norm": 3.0, "learning_rate": 3.3342292333716626e-06, "loss": 1.5718, "step": 8500 }, { "epoch": 2.3927415951610636, "grad_norm": 3.171875, "learning_rate": 3.3293871992743935e-06, "loss": 1.6902, "step": 8505 }, { "epoch": 2.3941482627655084, "grad_norm": 3.609375, "learning_rate": 3.32454617605536e-06, "loss": 1.6936, "step": 8510 }, { "epoch": 2.3955549303699537, "grad_norm": 3.859375, "learning_rate": 3.3197061710118926e-06, "loss": 1.6683, "step": 8515 }, { "epoch": 2.3969615979743986, "grad_norm": 2.515625, "learning_rate": 3.314867191439794e-06, "loss": 1.5576, "step": 8520 }, { "epoch": 2.398368265578844, "grad_norm": 2.59375, "learning_rate": 3.3100292446333103e-06, "loss": 1.737, "step": 8525 }, { "epoch": 2.3997749331832887, "grad_norm": 2.859375, "learning_rate": 3.305192337885144e-06, "loss": 1.6273, "step": 8530 }, { "epoch": 2.401181600787734, "grad_norm": 3.0625, "learning_rate": 3.3003564784864185e-06, "loss": 1.8476, "step": 8535 }, { "epoch": 2.402588268392179, "grad_norm": 2.75, "learning_rate": 3.2955216737266854e-06, "loss": 1.6457, "step": 8540 }, { "epoch": 2.403994935996624, "grad_norm": 3.828125, "learning_rate": 3.2906879308939024e-06, "loss": 1.6365, "step": 8545 }, { "epoch": 2.405401603601069, "grad_norm": 2.75, "learning_rate": 3.2858552572744306e-06, "loss": 1.7155, "step": 8550 }, { "epoch": 2.4068082712055143, "grad_norm": 4.40625, "learning_rate": 3.2810236601530134e-06, "loss": 1.7192, "step": 8555 }, { "epoch": 2.408214938809959, "grad_norm": 3.171875, "learning_rate": 3.2761931468127777e-06, "loss": 1.5524, "step": 8560 }, { "epoch": 2.4096216064144045, "grad_norm": 5.9375, "learning_rate": 3.2713637245352154e-06, "loss": 1.4309, "step": 8565 }, { "epoch": 2.4110282740188493, "grad_norm": 9.5625, "learning_rate": 3.2665354006001687e-06, "loss": 1.5507, "step": 8570 }, { "epoch": 2.412434941623294, "grad_norm": 4.375, "learning_rate": 3.2617081822858303e-06, "loss": 1.7103, "step": 8575 }, { "epoch": 2.4138416092277395, "grad_norm": 3.921875, "learning_rate": 3.256882076868723e-06, "loss": 1.2869, "step": 8580 }, { "epoch": 2.415248276832185, "grad_norm": 4.5, "learning_rate": 3.252057091623695e-06, "loss": 1.554, "step": 8585 }, { "epoch": 2.4166549444366296, "grad_norm": 4.21875, "learning_rate": 3.2472332338238994e-06, "loss": 1.5645, "step": 8590 }, { "epoch": 2.4180616120410745, "grad_norm": 2.9375, "learning_rate": 3.2424105107407996e-06, "loss": 1.6879, "step": 8595 }, { "epoch": 2.41946827964552, "grad_norm": 4.03125, "learning_rate": 3.237588929644139e-06, "loss": 1.507, "step": 8600 }, { "epoch": 2.4208749472499647, "grad_norm": 2.984375, "learning_rate": 3.2327684978019464e-06, "loss": 1.4155, "step": 8605 }, { "epoch": 2.42228161485441, "grad_norm": 3.34375, "learning_rate": 3.227949222480513e-06, "loss": 1.5608, "step": 8610 }, { "epoch": 2.423688282458855, "grad_norm": 3.375, "learning_rate": 3.223131110944393e-06, "loss": 1.6058, "step": 8615 }, { "epoch": 2.4250949500633, "grad_norm": 4.125, "learning_rate": 3.218314170456378e-06, "loss": 1.5107, "step": 8620 }, { "epoch": 2.426501617667745, "grad_norm": 2.140625, "learning_rate": 3.2134984082775036e-06, "loss": 1.86, "step": 8625 }, { "epoch": 2.4279082852721903, "grad_norm": 2.75, "learning_rate": 3.2086838316670204e-06, "loss": 1.5441, "step": 8630 }, { "epoch": 2.429314952876635, "grad_norm": 3.3125, "learning_rate": 3.2038704478823983e-06, "loss": 1.6657, "step": 8635 }, { "epoch": 2.4307216204810804, "grad_norm": 4.03125, "learning_rate": 3.1990582641793078e-06, "loss": 1.6059, "step": 8640 }, { "epoch": 2.4321282880855253, "grad_norm": 3.1875, "learning_rate": 3.1942472878116066e-06, "loss": 1.5391, "step": 8645 }, { "epoch": 2.4335349556899706, "grad_norm": 3.703125, "learning_rate": 3.1894375260313384e-06, "loss": 1.4821, "step": 8650 }, { "epoch": 2.4349416232944154, "grad_norm": 3.734375, "learning_rate": 3.18462898608871e-06, "loss": 1.581, "step": 8655 }, { "epoch": 2.4363482908988607, "grad_norm": 2.59375, "learning_rate": 3.1798216752320934e-06, "loss": 1.6751, "step": 8660 }, { "epoch": 2.4377549585033056, "grad_norm": 3.78125, "learning_rate": 3.175015600707999e-06, "loss": 1.4178, "step": 8665 }, { "epoch": 2.439161626107751, "grad_norm": 3.078125, "learning_rate": 3.1702107697610825e-06, "loss": 1.4115, "step": 8670 }, { "epoch": 2.4405682937121957, "grad_norm": 4.34375, "learning_rate": 3.1654071896341184e-06, "loss": 1.354, "step": 8675 }, { "epoch": 2.441974961316641, "grad_norm": 3.0, "learning_rate": 3.1606048675680002e-06, "loss": 1.7495, "step": 8680 }, { "epoch": 2.443381628921086, "grad_norm": 3.15625, "learning_rate": 3.1558038108017213e-06, "loss": 1.6442, "step": 8685 }, { "epoch": 2.444788296525531, "grad_norm": 3.78125, "learning_rate": 3.151004026572372e-06, "loss": 1.5592, "step": 8690 }, { "epoch": 2.446194964129976, "grad_norm": 2.75, "learning_rate": 3.146205522115119e-06, "loss": 1.7754, "step": 8695 }, { "epoch": 2.447601631734421, "grad_norm": 2.828125, "learning_rate": 3.141408304663205e-06, "loss": 1.461, "step": 8700 }, { "epoch": 2.449008299338866, "grad_norm": 3.21875, "learning_rate": 3.1366123814479293e-06, "loss": 1.3845, "step": 8705 }, { "epoch": 2.4504149669433115, "grad_norm": 3.5625, "learning_rate": 3.1318177596986425e-06, "loss": 1.4798, "step": 8710 }, { "epoch": 2.4518216345477564, "grad_norm": 2.859375, "learning_rate": 3.127024446642732e-06, "loss": 1.702, "step": 8715 }, { "epoch": 2.4532283021522012, "grad_norm": 4.96875, "learning_rate": 3.1222324495056124e-06, "loss": 1.8042, "step": 8720 }, { "epoch": 2.4546349697566465, "grad_norm": 2.640625, "learning_rate": 3.1174417755107177e-06, "loss": 1.5827, "step": 8725 }, { "epoch": 2.456041637361092, "grad_norm": 3.515625, "learning_rate": 3.112652431879481e-06, "loss": 1.7237, "step": 8730 }, { "epoch": 2.4574483049655367, "grad_norm": 3.234375, "learning_rate": 3.1078644258313365e-06, "loss": 1.5673, "step": 8735 }, { "epoch": 2.4588549725699815, "grad_norm": 2.859375, "learning_rate": 3.1030777645836974e-06, "loss": 1.5594, "step": 8740 }, { "epoch": 2.460261640174427, "grad_norm": 3.09375, "learning_rate": 3.0982924553519548e-06, "loss": 1.4208, "step": 8745 }, { "epoch": 2.4616683077788717, "grad_norm": 3.046875, "learning_rate": 3.0935085053494557e-06, "loss": 1.5434, "step": 8750 }, { "epoch": 2.463074975383317, "grad_norm": 3.390625, "learning_rate": 3.088725921787505e-06, "loss": 1.6963, "step": 8755 }, { "epoch": 2.464481642987762, "grad_norm": 3.34375, "learning_rate": 3.0839447118753407e-06, "loss": 1.5322, "step": 8760 }, { "epoch": 2.465888310592207, "grad_norm": 4.75, "learning_rate": 3.0791648828201354e-06, "loss": 1.6891, "step": 8765 }, { "epoch": 2.467294978196652, "grad_norm": 3.203125, "learning_rate": 3.0743864418269777e-06, "loss": 1.652, "step": 8770 }, { "epoch": 2.4687016458010973, "grad_norm": 3.0625, "learning_rate": 3.069609396098865e-06, "loss": 1.5267, "step": 8775 }, { "epoch": 2.470108313405542, "grad_norm": 3.6875, "learning_rate": 3.064833752836692e-06, "loss": 1.4356, "step": 8780 }, { "epoch": 2.4715149810099875, "grad_norm": 3.546875, "learning_rate": 3.0600595192392364e-06, "loss": 1.6106, "step": 8785 }, { "epoch": 2.4729216486144323, "grad_norm": 2.90625, "learning_rate": 3.055286702503156e-06, "loss": 1.4059, "step": 8790 }, { "epoch": 2.4743283162188776, "grad_norm": 3.875, "learning_rate": 3.050515309822966e-06, "loss": 1.5044, "step": 8795 }, { "epoch": 2.4757349838233225, "grad_norm": 2.703125, "learning_rate": 3.0457453483910417e-06, "loss": 1.5502, "step": 8800 }, { "epoch": 2.4771416514277678, "grad_norm": 3.421875, "learning_rate": 3.0409768253975967e-06, "loss": 1.6319, "step": 8805 }, { "epoch": 2.4785483190322126, "grad_norm": 3.15625, "learning_rate": 3.0362097480306787e-06, "loss": 1.4717, "step": 8810 }, { "epoch": 2.479954986636658, "grad_norm": 3.0, "learning_rate": 3.031444123476154e-06, "loss": 1.702, "step": 8815 }, { "epoch": 2.4813616542411028, "grad_norm": 4.3125, "learning_rate": 3.0266799589177023e-06, "loss": 1.7863, "step": 8820 }, { "epoch": 2.482768321845548, "grad_norm": 3.9375, "learning_rate": 3.021917261536797e-06, "loss": 1.1797, "step": 8825 }, { "epoch": 2.484174989449993, "grad_norm": 3.5625, "learning_rate": 3.0171560385127066e-06, "loss": 1.608, "step": 8830 }, { "epoch": 2.4855816570544382, "grad_norm": 3.609375, "learning_rate": 3.012396297022471e-06, "loss": 1.2161, "step": 8835 }, { "epoch": 2.486988324658883, "grad_norm": 3.1875, "learning_rate": 3.0076380442409023e-06, "loss": 1.6565, "step": 8840 }, { "epoch": 2.488394992263328, "grad_norm": 2.1875, "learning_rate": 3.0028812873405636e-06, "loss": 1.745, "step": 8845 }, { "epoch": 2.4898016598677732, "grad_norm": 3.03125, "learning_rate": 2.9981260334917666e-06, "loss": 1.6035, "step": 8850 }, { "epoch": 2.4912083274722185, "grad_norm": 3.3125, "learning_rate": 2.9933722898625575e-06, "loss": 1.558, "step": 8855 }, { "epoch": 2.4926149950766634, "grad_norm": 3.40625, "learning_rate": 2.988620063618701e-06, "loss": 1.6606, "step": 8860 }, { "epoch": 2.4940216626811083, "grad_norm": 4.25, "learning_rate": 2.9838693619236823e-06, "loss": 1.5781, "step": 8865 }, { "epoch": 2.4954283302855536, "grad_norm": 3.203125, "learning_rate": 2.9791201919386807e-06, "loss": 1.6853, "step": 8870 }, { "epoch": 2.4968349978899984, "grad_norm": 2.8125, "learning_rate": 2.974372560822573e-06, "loss": 1.5243, "step": 8875 }, { "epoch": 2.4982416654944437, "grad_norm": 4.8125, "learning_rate": 2.9696264757319113e-06, "loss": 1.5861, "step": 8880 }, { "epoch": 2.4996483330988886, "grad_norm": 4.21875, "learning_rate": 2.9648819438209228e-06, "loss": 1.6032, "step": 8885 }, { "epoch": 2.501055000703334, "grad_norm": 2.765625, "learning_rate": 2.960138972241485e-06, "loss": 1.6981, "step": 8890 }, { "epoch": 2.5024616683077787, "grad_norm": 2.859375, "learning_rate": 2.955397568143134e-06, "loss": 1.5882, "step": 8895 }, { "epoch": 2.503868335912224, "grad_norm": 3.171875, "learning_rate": 2.950657738673033e-06, "loss": 1.5093, "step": 8900 }, { "epoch": 2.505275003516669, "grad_norm": 3.0625, "learning_rate": 2.945919490975979e-06, "loss": 1.7241, "step": 8905 }, { "epoch": 2.506681671121114, "grad_norm": 3.703125, "learning_rate": 2.9411828321943804e-06, "loss": 1.546, "step": 8910 }, { "epoch": 2.508088338725559, "grad_norm": 3.109375, "learning_rate": 2.9364477694682546e-06, "loss": 1.6981, "step": 8915 }, { "epoch": 2.5094950063300043, "grad_norm": 3.015625, "learning_rate": 2.9317143099352056e-06, "loss": 1.5693, "step": 8920 }, { "epoch": 2.510901673934449, "grad_norm": 3.078125, "learning_rate": 2.926982460730429e-06, "loss": 1.436, "step": 8925 }, { "epoch": 2.5123083415388945, "grad_norm": 3.625, "learning_rate": 2.922252228986691e-06, "loss": 1.7639, "step": 8930 }, { "epoch": 2.5137150091433393, "grad_norm": 3.453125, "learning_rate": 2.917523621834314e-06, "loss": 1.568, "step": 8935 }, { "epoch": 2.5151216767477846, "grad_norm": 2.765625, "learning_rate": 2.9127966464011787e-06, "loss": 1.7766, "step": 8940 }, { "epoch": 2.5165283443522295, "grad_norm": 3.578125, "learning_rate": 2.908071309812702e-06, "loss": 1.4208, "step": 8945 }, { "epoch": 2.517935011956675, "grad_norm": 3.265625, "learning_rate": 2.9033476191918338e-06, "loss": 1.7117, "step": 8950 }, { "epoch": 2.5193416795611197, "grad_norm": 3.203125, "learning_rate": 2.8986255816590365e-06, "loss": 1.8485, "step": 8955 }, { "epoch": 2.520748347165565, "grad_norm": 3.34375, "learning_rate": 2.8939052043322895e-06, "loss": 1.6935, "step": 8960 }, { "epoch": 2.52215501477001, "grad_norm": 3.4375, "learning_rate": 2.8891864943270603e-06, "loss": 1.6999, "step": 8965 }, { "epoch": 2.5235616823744547, "grad_norm": 3.671875, "learning_rate": 2.884469458756312e-06, "loss": 1.4383, "step": 8970 }, { "epoch": 2.5249683499789, "grad_norm": 3.390625, "learning_rate": 2.8797541047304764e-06, "loss": 1.5988, "step": 8975 }, { "epoch": 2.5263750175833453, "grad_norm": 3.59375, "learning_rate": 2.875040439357456e-06, "loss": 1.6122, "step": 8980 }, { "epoch": 2.52778168518779, "grad_norm": 4.5625, "learning_rate": 2.8703284697426015e-06, "loss": 1.5492, "step": 8985 }, { "epoch": 2.529188352792235, "grad_norm": 3.71875, "learning_rate": 2.8656182029887148e-06, "loss": 1.2839, "step": 8990 }, { "epoch": 2.5305950203966803, "grad_norm": 2.40625, "learning_rate": 2.8609096461960276e-06, "loss": 1.8007, "step": 8995 }, { "epoch": 2.5320016880011256, "grad_norm": 3.3125, "learning_rate": 2.8562028064621917e-06, "loss": 1.4047, "step": 9000 }, { "epoch": 2.5334083556055704, "grad_norm": 3.21875, "learning_rate": 2.851497690882274e-06, "loss": 1.5034, "step": 9005 }, { "epoch": 2.5348150232100153, "grad_norm": 2.953125, "learning_rate": 2.84679430654874e-06, "loss": 1.6142, "step": 9010 }, { "epoch": 2.5362216908144606, "grad_norm": 3.9375, "learning_rate": 2.842092660551448e-06, "loss": 1.3421, "step": 9015 }, { "epoch": 2.537628358418906, "grad_norm": 2.984375, "learning_rate": 2.837392759977634e-06, "loss": 1.6882, "step": 9020 }, { "epoch": 2.5390350260233507, "grad_norm": 4.34375, "learning_rate": 2.832694611911905e-06, "loss": 1.4796, "step": 9025 }, { "epoch": 2.5404416936277956, "grad_norm": 4.09375, "learning_rate": 2.8279982234362223e-06, "loss": 1.4512, "step": 9030 }, { "epoch": 2.541848361232241, "grad_norm": 5.0, "learning_rate": 2.8233036016299e-06, "loss": 1.6511, "step": 9035 }, { "epoch": 2.5432550288366857, "grad_norm": 2.796875, "learning_rate": 2.818610753569583e-06, "loss": 1.5149, "step": 9040 }, { "epoch": 2.544661696441131, "grad_norm": 3.40625, "learning_rate": 2.8139196863292497e-06, "loss": 1.4205, "step": 9045 }, { "epoch": 2.546068364045576, "grad_norm": 3.921875, "learning_rate": 2.8092304069801875e-06, "loss": 1.4803, "step": 9050 }, { "epoch": 2.547475031650021, "grad_norm": 3.84375, "learning_rate": 2.8045429225909953e-06, "loss": 1.5202, "step": 9055 }, { "epoch": 2.548881699254466, "grad_norm": 2.90625, "learning_rate": 2.799857240227558e-06, "loss": 1.5887, "step": 9060 }, { "epoch": 2.5502883668589114, "grad_norm": 2.734375, "learning_rate": 2.795173366953051e-06, "loss": 1.6058, "step": 9065 }, { "epoch": 2.551695034463356, "grad_norm": 3.796875, "learning_rate": 2.7904913098279213e-06, "loss": 1.6265, "step": 9070 }, { "epoch": 2.5531017020678015, "grad_norm": 3.703125, "learning_rate": 2.7858110759098753e-06, "loss": 1.4793, "step": 9075 }, { "epoch": 2.5545083696722464, "grad_norm": 3.4375, "learning_rate": 2.7811326722538755e-06, "loss": 1.5202, "step": 9080 }, { "epoch": 2.5559150372766917, "grad_norm": 2.6875, "learning_rate": 2.776456105912121e-06, "loss": 1.6495, "step": 9085 }, { "epoch": 2.5573217048811365, "grad_norm": 3.390625, "learning_rate": 2.771781383934046e-06, "loss": 1.6192, "step": 9090 }, { "epoch": 2.5587283724855814, "grad_norm": 3.53125, "learning_rate": 2.767108513366299e-06, "loss": 1.4899, "step": 9095 }, { "epoch": 2.5601350400900267, "grad_norm": 3.390625, "learning_rate": 2.7624375012527423e-06, "loss": 1.6238, "step": 9100 }, { "epoch": 2.561541707694472, "grad_norm": 3.453125, "learning_rate": 2.757768354634435e-06, "loss": 1.142, "step": 9105 }, { "epoch": 2.562948375298917, "grad_norm": 3.359375, "learning_rate": 2.7531010805496245e-06, "loss": 1.5075, "step": 9110 }, { "epoch": 2.5643550429033617, "grad_norm": 2.703125, "learning_rate": 2.748435686033735e-06, "loss": 1.5947, "step": 9115 }, { "epoch": 2.565761710507807, "grad_norm": 4.71875, "learning_rate": 2.7437721781193596e-06, "loss": 1.5573, "step": 9120 }, { "epoch": 2.5671683781122523, "grad_norm": 2.828125, "learning_rate": 2.7391105638362422e-06, "loss": 1.4712, "step": 9125 }, { "epoch": 2.568575045716697, "grad_norm": 4.53125, "learning_rate": 2.734450850211278e-06, "loss": 1.5379, "step": 9130 }, { "epoch": 2.569981713321142, "grad_norm": 3.0, "learning_rate": 2.7297930442684958e-06, "loss": 1.2337, "step": 9135 }, { "epoch": 2.5713883809255873, "grad_norm": 3.296875, "learning_rate": 2.7251371530290464e-06, "loss": 1.5338, "step": 9140 }, { "epoch": 2.5727950485300326, "grad_norm": 3.609375, "learning_rate": 2.720483183511197e-06, "loss": 1.3647, "step": 9145 }, { "epoch": 2.5742017161344775, "grad_norm": 3.625, "learning_rate": 2.715831142730316e-06, "loss": 1.4631, "step": 9150 }, { "epoch": 2.5756083837389223, "grad_norm": 4.34375, "learning_rate": 2.711181037698867e-06, "loss": 1.4525, "step": 9155 }, { "epoch": 2.5770150513433676, "grad_norm": 4.53125, "learning_rate": 2.706532875426392e-06, "loss": 1.6094, "step": 9160 }, { "epoch": 2.5784217189478125, "grad_norm": 3.984375, "learning_rate": 2.7018866629195077e-06, "loss": 1.4726, "step": 9165 }, { "epoch": 2.5798283865522578, "grad_norm": 2.953125, "learning_rate": 2.69724240718189e-06, "loss": 1.507, "step": 9170 }, { "epoch": 2.5812350541567026, "grad_norm": 2.5625, "learning_rate": 2.692600115214267e-06, "loss": 1.586, "step": 9175 }, { "epoch": 2.582641721761148, "grad_norm": 3.71875, "learning_rate": 2.6879597940144038e-06, "loss": 1.5724, "step": 9180 }, { "epoch": 2.584048389365593, "grad_norm": 4.5625, "learning_rate": 2.683321450577098e-06, "loss": 1.6215, "step": 9185 }, { "epoch": 2.585455056970038, "grad_norm": 3.1875, "learning_rate": 2.678685091894162e-06, "loss": 1.6285, "step": 9190 }, { "epoch": 2.586861724574483, "grad_norm": 3.671875, "learning_rate": 2.674050724954421e-06, "loss": 1.4316, "step": 9195 }, { "epoch": 2.5882683921789282, "grad_norm": 3.265625, "learning_rate": 2.6694183567436936e-06, "loss": 1.4817, "step": 9200 }, { "epoch": 2.589675059783373, "grad_norm": 2.65625, "learning_rate": 2.664787994244788e-06, "loss": 1.5046, "step": 9205 }, { "epoch": 2.5910817273878184, "grad_norm": 3.234375, "learning_rate": 2.66015964443749e-06, "loss": 1.4744, "step": 9210 }, { "epoch": 2.5924883949922632, "grad_norm": 3.234375, "learning_rate": 2.655533314298548e-06, "loss": 1.6461, "step": 9215 }, { "epoch": 2.5938950625967085, "grad_norm": 2.953125, "learning_rate": 2.6509090108016707e-06, "loss": 1.6825, "step": 9220 }, { "epoch": 2.5953017302011534, "grad_norm": 4.3125, "learning_rate": 2.646286740917504e-06, "loss": 1.5424, "step": 9225 }, { "epoch": 2.5967083978055987, "grad_norm": 10.125, "learning_rate": 2.641666511613639e-06, "loss": 1.5151, "step": 9230 }, { "epoch": 2.5981150654100436, "grad_norm": 3.1875, "learning_rate": 2.637048329854581e-06, "loss": 1.6234, "step": 9235 }, { "epoch": 2.5995217330144884, "grad_norm": 3.0, "learning_rate": 2.632432202601755e-06, "loss": 1.7191, "step": 9240 }, { "epoch": 2.6009284006189337, "grad_norm": 4.09375, "learning_rate": 2.6278181368134873e-06, "loss": 1.7231, "step": 9245 }, { "epoch": 2.602335068223379, "grad_norm": 3.046875, "learning_rate": 2.623206139444997e-06, "loss": 1.6375, "step": 9250 }, { "epoch": 2.603741735827824, "grad_norm": 3.46875, "learning_rate": 2.6185962174483815e-06, "loss": 1.4945, "step": 9255 }, { "epoch": 2.6051484034322687, "grad_norm": 2.828125, "learning_rate": 2.6139883777726185e-06, "loss": 1.7805, "step": 9260 }, { "epoch": 2.606555071036714, "grad_norm": 2.640625, "learning_rate": 2.609382627363536e-06, "loss": 1.6192, "step": 9265 }, { "epoch": 2.6079617386411593, "grad_norm": 4.09375, "learning_rate": 2.6047789731638224e-06, "loss": 1.4136, "step": 9270 }, { "epoch": 2.609368406245604, "grad_norm": 3.9375, "learning_rate": 2.600177422112999e-06, "loss": 1.5048, "step": 9275 }, { "epoch": 2.610775073850049, "grad_norm": 3.078125, "learning_rate": 2.5955779811474213e-06, "loss": 1.6703, "step": 9280 }, { "epoch": 2.6121817414544943, "grad_norm": 3.34375, "learning_rate": 2.5909806572002634e-06, "loss": 1.6786, "step": 9285 }, { "epoch": 2.6135884090589396, "grad_norm": 4.875, "learning_rate": 2.5863854572015057e-06, "loss": 1.6156, "step": 9290 }, { "epoch": 2.6149950766633845, "grad_norm": 3.0, "learning_rate": 2.5817923880779308e-06, "loss": 1.6928, "step": 9295 }, { "epoch": 2.6164017442678293, "grad_norm": 2.875, "learning_rate": 2.577201456753104e-06, "loss": 1.6488, "step": 9300 }, { "epoch": 2.6178084118722746, "grad_norm": 2.65625, "learning_rate": 2.572612670147374e-06, "loss": 1.6353, "step": 9305 }, { "epoch": 2.6192150794767195, "grad_norm": 3.125, "learning_rate": 2.5680260351778523e-06, "loss": 1.4599, "step": 9310 }, { "epoch": 2.620621747081165, "grad_norm": 2.265625, "learning_rate": 2.56344155875841e-06, "loss": 1.4838, "step": 9315 }, { "epoch": 2.6220284146856097, "grad_norm": 3.34375, "learning_rate": 2.55885924779966e-06, "loss": 1.5812, "step": 9320 }, { "epoch": 2.623435082290055, "grad_norm": 3.234375, "learning_rate": 2.5542791092089586e-06, "loss": 1.4303, "step": 9325 }, { "epoch": 2.6248417498945, "grad_norm": 2.828125, "learning_rate": 2.549701149890377e-06, "loss": 1.6144, "step": 9330 }, { "epoch": 2.626248417498945, "grad_norm": 3.4375, "learning_rate": 2.545125376744712e-06, "loss": 1.588, "step": 9335 }, { "epoch": 2.62765508510339, "grad_norm": 3.046875, "learning_rate": 2.540551796669457e-06, "loss": 1.6687, "step": 9340 }, { "epoch": 2.6290617527078353, "grad_norm": 4.21875, "learning_rate": 2.535980416558804e-06, "loss": 1.5527, "step": 9345 }, { "epoch": 2.63046842031228, "grad_norm": 2.453125, "learning_rate": 2.531411243303629e-06, "loss": 1.429, "step": 9350 }, { "epoch": 2.6318750879167254, "grad_norm": 2.859375, "learning_rate": 2.526844283791477e-06, "loss": 1.4638, "step": 9355 }, { "epoch": 2.6332817555211703, "grad_norm": 4.0, "learning_rate": 2.5222795449065623e-06, "loss": 1.8405, "step": 9360 }, { "epoch": 2.6346884231256156, "grad_norm": 3.15625, "learning_rate": 2.5177170335297445e-06, "loss": 1.6266, "step": 9365 }, { "epoch": 2.6360950907300604, "grad_norm": 4.09375, "learning_rate": 2.5131567565385327e-06, "loss": 1.5076, "step": 9370 }, { "epoch": 2.6375017583345057, "grad_norm": 2.515625, "learning_rate": 2.5085987208070628e-06, "loss": 1.6445, "step": 9375 }, { "epoch": 2.6389084259389506, "grad_norm": 2.703125, "learning_rate": 2.5040429332060953e-06, "loss": 1.6817, "step": 9380 }, { "epoch": 2.6403150935433954, "grad_norm": 3.375, "learning_rate": 2.499489400602999e-06, "loss": 1.7791, "step": 9385 }, { "epoch": 2.6417217611478407, "grad_norm": 3.171875, "learning_rate": 2.4949381298617478e-06, "loss": 1.5416, "step": 9390 }, { "epoch": 2.643128428752286, "grad_norm": 2.96875, "learning_rate": 2.4903891278429002e-06, "loss": 1.7715, "step": 9395 }, { "epoch": 2.644535096356731, "grad_norm": 2.609375, "learning_rate": 2.485842401403601e-06, "loss": 1.6767, "step": 9400 }, { "epoch": 2.6459417639611758, "grad_norm": 3.109375, "learning_rate": 2.4812979573975595e-06, "loss": 1.7507, "step": 9405 }, { "epoch": 2.647348431565621, "grad_norm": 2.953125, "learning_rate": 2.476755802675049e-06, "loss": 1.608, "step": 9410 }, { "epoch": 2.6487550991700664, "grad_norm": 3.25, "learning_rate": 2.4722159440828877e-06, "loss": 1.5923, "step": 9415 }, { "epoch": 2.650161766774511, "grad_norm": 3.4375, "learning_rate": 2.467678388464436e-06, "loss": 1.4541, "step": 9420 }, { "epoch": 2.651568434378956, "grad_norm": 3.796875, "learning_rate": 2.4631431426595826e-06, "loss": 1.6113, "step": 9425 }, { "epoch": 2.6529751019834014, "grad_norm": 2.65625, "learning_rate": 2.4586102135047314e-06, "loss": 1.6558, "step": 9430 }, { "epoch": 2.6543817695878467, "grad_norm": 3.0625, "learning_rate": 2.4540796078327966e-06, "loss": 1.7034, "step": 9435 }, { "epoch": 2.6557884371922915, "grad_norm": 3.71875, "learning_rate": 2.4495513324731897e-06, "loss": 1.68, "step": 9440 }, { "epoch": 2.6571951047967364, "grad_norm": 3.140625, "learning_rate": 2.4450253942518105e-06, "loss": 1.675, "step": 9445 }, { "epoch": 2.6586017724011817, "grad_norm": 2.359375, "learning_rate": 2.4405017999910324e-06, "loss": 1.6803, "step": 9450 }, { "epoch": 2.6600084400056265, "grad_norm": 3.796875, "learning_rate": 2.4359805565097006e-06, "loss": 1.5876, "step": 9455 }, { "epoch": 2.661415107610072, "grad_norm": 3.34375, "learning_rate": 2.431461670623111e-06, "loss": 1.7244, "step": 9460 }, { "epoch": 2.6628217752145167, "grad_norm": 3.0, "learning_rate": 2.4269451491430103e-06, "loss": 1.7627, "step": 9465 }, { "epoch": 2.664228442818962, "grad_norm": 4.84375, "learning_rate": 2.422430998877578e-06, "loss": 1.4034, "step": 9470 }, { "epoch": 2.665635110423407, "grad_norm": 3.734375, "learning_rate": 2.417919226631423e-06, "loss": 1.5969, "step": 9475 }, { "epoch": 2.667041778027852, "grad_norm": 4.4375, "learning_rate": 2.413409839205565e-06, "loss": 1.4532, "step": 9480 }, { "epoch": 2.668448445632297, "grad_norm": 2.5, "learning_rate": 2.4089028433974335e-06, "loss": 1.346, "step": 9485 }, { "epoch": 2.6698551132367423, "grad_norm": 2.765625, "learning_rate": 2.4043982460008466e-06, "loss": 1.3301, "step": 9490 }, { "epoch": 2.671261780841187, "grad_norm": 3.875, "learning_rate": 2.3998960538060138e-06, "loss": 1.5903, "step": 9495 }, { "epoch": 2.6726684484456324, "grad_norm": 4.65625, "learning_rate": 2.3953962735995167e-06, "loss": 1.4014, "step": 9500 }, { "epoch": 2.6740751160500773, "grad_norm": 2.53125, "learning_rate": 2.390898912164298e-06, "loss": 1.8069, "step": 9505 }, { "epoch": 2.675481783654522, "grad_norm": 3.359375, "learning_rate": 2.3864039762796583e-06, "loss": 1.5562, "step": 9510 }, { "epoch": 2.6768884512589675, "grad_norm": 3.328125, "learning_rate": 2.38191147272124e-06, "loss": 1.7332, "step": 9515 }, { "epoch": 2.6782951188634128, "grad_norm": 2.890625, "learning_rate": 2.3774214082610217e-06, "loss": 1.5451, "step": 9520 }, { "epoch": 2.6797017864678576, "grad_norm": 3.78125, "learning_rate": 2.3729337896672996e-06, "loss": 1.3584, "step": 9525 }, { "epoch": 2.6811084540723025, "grad_norm": 4.8125, "learning_rate": 2.3684486237046886e-06, "loss": 1.5757, "step": 9530 }, { "epoch": 2.6825151216767478, "grad_norm": 3.125, "learning_rate": 2.3639659171341036e-06, "loss": 1.617, "step": 9535 }, { "epoch": 2.683921789281193, "grad_norm": 4.09375, "learning_rate": 2.3594856767127542e-06, "loss": 1.6402, "step": 9540 }, { "epoch": 2.685328456885638, "grad_norm": 3.375, "learning_rate": 2.35500790919413e-06, "loss": 1.7177, "step": 9545 }, { "epoch": 2.686735124490083, "grad_norm": 3.671875, "learning_rate": 2.3505326213279964e-06, "loss": 1.5701, "step": 9550 }, { "epoch": 2.688141792094528, "grad_norm": 3.515625, "learning_rate": 2.346059819860376e-06, "loss": 1.5764, "step": 9555 }, { "epoch": 2.6895484596989734, "grad_norm": 2.5, "learning_rate": 2.3415895115335477e-06, "loss": 1.7206, "step": 9560 }, { "epoch": 2.6909551273034182, "grad_norm": 2.859375, "learning_rate": 2.3371217030860337e-06, "loss": 1.7077, "step": 9565 }, { "epoch": 2.692361794907863, "grad_norm": 2.984375, "learning_rate": 2.3326564012525804e-06, "loss": 1.6132, "step": 9570 }, { "epoch": 2.6937684625123084, "grad_norm": 2.8125, "learning_rate": 2.3281936127641644e-06, "loss": 1.5924, "step": 9575 }, { "epoch": 2.6951751301167532, "grad_norm": 4.09375, "learning_rate": 2.3237333443479676e-06, "loss": 1.3612, "step": 9580 }, { "epoch": 2.6965817977211985, "grad_norm": 3.125, "learning_rate": 2.3192756027273766e-06, "loss": 1.6446, "step": 9585 }, { "epoch": 2.6979884653256434, "grad_norm": 3.21875, "learning_rate": 2.3148203946219644e-06, "loss": 1.9269, "step": 9590 }, { "epoch": 2.6993951329300887, "grad_norm": 3.5625, "learning_rate": 2.3103677267474934e-06, "loss": 1.5727, "step": 9595 }, { "epoch": 2.7008018005345336, "grad_norm": 3.0625, "learning_rate": 2.3059176058158897e-06, "loss": 1.6316, "step": 9600 }, { "epoch": 2.702208468138979, "grad_norm": 4.4375, "learning_rate": 2.3014700385352425e-06, "loss": 1.2899, "step": 9605 }, { "epoch": 2.7036151357434237, "grad_norm": 3.90625, "learning_rate": 2.2970250316097914e-06, "loss": 1.5956, "step": 9610 }, { "epoch": 2.705021803347869, "grad_norm": 2.609375, "learning_rate": 2.292582591739916e-06, "loss": 1.4859, "step": 9615 }, { "epoch": 2.706428470952314, "grad_norm": 3.984375, "learning_rate": 2.2881427256221263e-06, "loss": 1.5503, "step": 9620 }, { "epoch": 2.707835138556759, "grad_norm": 2.5, "learning_rate": 2.283705439949056e-06, "loss": 1.4448, "step": 9625 }, { "epoch": 2.709241806161204, "grad_norm": 3.78125, "learning_rate": 2.2792707414094447e-06, "loss": 1.5078, "step": 9630 }, { "epoch": 2.7106484737656493, "grad_norm": 4.09375, "learning_rate": 2.2748386366881327e-06, "loss": 1.5553, "step": 9635 }, { "epoch": 2.712055141370094, "grad_norm": 3.921875, "learning_rate": 2.2704091324660557e-06, "loss": 1.6245, "step": 9640 }, { "epoch": 2.7134618089745395, "grad_norm": 2.484375, "learning_rate": 2.26598223542022e-06, "loss": 1.4752, "step": 9645 }, { "epoch": 2.7148684765789843, "grad_norm": 4.09375, "learning_rate": 2.2615579522237103e-06, "loss": 1.6861, "step": 9650 }, { "epoch": 2.716275144183429, "grad_norm": 4.28125, "learning_rate": 2.2571362895456673e-06, "loss": 1.3737, "step": 9655 }, { "epoch": 2.7176818117878745, "grad_norm": 4.03125, "learning_rate": 2.2527172540512817e-06, "loss": 1.7256, "step": 9660 }, { "epoch": 2.71908847939232, "grad_norm": 3.421875, "learning_rate": 2.248300852401784e-06, "loss": 1.4467, "step": 9665 }, { "epoch": 2.7204951469967646, "grad_norm": 2.5625, "learning_rate": 2.2438870912544386e-06, "loss": 1.7083, "step": 9670 }, { "epoch": 2.7219018146012095, "grad_norm": 2.703125, "learning_rate": 2.239475977262521e-06, "loss": 1.6565, "step": 9675 }, { "epoch": 2.723308482205655, "grad_norm": 3.765625, "learning_rate": 2.2350675170753247e-06, "loss": 1.4021, "step": 9680 }, { "epoch": 2.7247151498101, "grad_norm": 4.03125, "learning_rate": 2.230661717338138e-06, "loss": 1.5423, "step": 9685 }, { "epoch": 2.726121817414545, "grad_norm": 2.9375, "learning_rate": 2.2262585846922418e-06, "loss": 1.4289, "step": 9690 }, { "epoch": 2.72752848501899, "grad_norm": 3.84375, "learning_rate": 2.2218581257748927e-06, "loss": 1.4817, "step": 9695 }, { "epoch": 2.728935152623435, "grad_norm": 4.09375, "learning_rate": 2.2174603472193224e-06, "loss": 1.3842, "step": 9700 }, { "epoch": 2.7303418202278804, "grad_norm": 3.84375, "learning_rate": 2.213065255654719e-06, "loss": 1.5652, "step": 9705 }, { "epoch": 2.7317484878323253, "grad_norm": 2.828125, "learning_rate": 2.2086728577062178e-06, "loss": 1.3789, "step": 9710 }, { "epoch": 2.73315515543677, "grad_norm": 4.1875, "learning_rate": 2.204283159994902e-06, "loss": 1.3959, "step": 9715 }, { "epoch": 2.7345618230412154, "grad_norm": 3.78125, "learning_rate": 2.199896169137772e-06, "loss": 1.7312, "step": 9720 }, { "epoch": 2.7359684906456603, "grad_norm": 4.5, "learning_rate": 2.19551189174776e-06, "loss": 1.4778, "step": 9725 }, { "epoch": 2.7373751582501056, "grad_norm": 3.6875, "learning_rate": 2.1911303344337014e-06, "loss": 1.4665, "step": 9730 }, { "epoch": 2.7387818258545504, "grad_norm": 2.9375, "learning_rate": 2.186751503800332e-06, "loss": 1.5714, "step": 9735 }, { "epoch": 2.7401884934589957, "grad_norm": 3.703125, "learning_rate": 2.1823754064482786e-06, "loss": 1.5682, "step": 9740 }, { "epoch": 2.7415951610634406, "grad_norm": 3.890625, "learning_rate": 2.1780020489740506e-06, "loss": 1.4708, "step": 9745 }, { "epoch": 2.743001828667886, "grad_norm": 3.015625, "learning_rate": 2.1736314379700177e-06, "loss": 1.5423, "step": 9750 }, { "epoch": 2.7444084962723307, "grad_norm": 3.203125, "learning_rate": 2.1692635800244222e-06, "loss": 1.7049, "step": 9755 }, { "epoch": 2.745815163876776, "grad_norm": 4.625, "learning_rate": 2.164898481721348e-06, "loss": 1.4801, "step": 9760 }, { "epoch": 2.747221831481221, "grad_norm": 3.546875, "learning_rate": 2.160536149640721e-06, "loss": 1.6165, "step": 9765 }, { "epoch": 2.748628499085666, "grad_norm": 4.90625, "learning_rate": 2.1561765903582985e-06, "loss": 1.4022, "step": 9770 }, { "epoch": 2.750035166690111, "grad_norm": 6.375, "learning_rate": 2.151819810445656e-06, "loss": 1.3853, "step": 9775 }, { "epoch": 2.7514418342945564, "grad_norm": 3.4375, "learning_rate": 2.147465816470183e-06, "loss": 1.5349, "step": 9780 }, { "epoch": 2.752848501899001, "grad_norm": 2.765625, "learning_rate": 2.1431146149950673e-06, "loss": 1.575, "step": 9785 }, { "epoch": 2.7542551695034465, "grad_norm": 2.953125, "learning_rate": 2.138766212579286e-06, "loss": 1.4022, "step": 9790 }, { "epoch": 2.7556618371078914, "grad_norm": 3.484375, "learning_rate": 2.1344206157775963e-06, "loss": 1.5342, "step": 9795 }, { "epoch": 2.757068504712336, "grad_norm": 3.984375, "learning_rate": 2.130077831140534e-06, "loss": 1.6823, "step": 9800 }, { "epoch": 2.7584751723167815, "grad_norm": 3.6875, "learning_rate": 2.125737865214383e-06, "loss": 1.5458, "step": 9805 }, { "epoch": 2.759881839921227, "grad_norm": 3.46875, "learning_rate": 2.12140072454119e-06, "loss": 1.5436, "step": 9810 }, { "epoch": 2.7612885075256717, "grad_norm": 3.453125, "learning_rate": 2.1170664156587374e-06, "loss": 1.4426, "step": 9815 }, { "epoch": 2.7626951751301165, "grad_norm": 2.3125, "learning_rate": 2.1127349451005387e-06, "loss": 1.6613, "step": 9820 }, { "epoch": 2.764101842734562, "grad_norm": 3.515625, "learning_rate": 2.1084063193958292e-06, "loss": 1.522, "step": 9825 }, { "epoch": 2.765508510339007, "grad_norm": 3.4375, "learning_rate": 2.104080545069561e-06, "loss": 1.6962, "step": 9830 }, { "epoch": 2.766915177943452, "grad_norm": 4.4375, "learning_rate": 2.0997576286423773e-06, "loss": 1.5804, "step": 9835 }, { "epoch": 2.768321845547897, "grad_norm": 3.25, "learning_rate": 2.0954375766306256e-06, "loss": 1.5818, "step": 9840 }, { "epoch": 2.769728513152342, "grad_norm": 3.28125, "learning_rate": 2.0911203955463262e-06, "loss": 1.7594, "step": 9845 }, { "epoch": 2.7711351807567874, "grad_norm": 3.5, "learning_rate": 2.0868060918971754e-06, "loss": 1.541, "step": 9850 }, { "epoch": 2.7725418483612323, "grad_norm": 3.71875, "learning_rate": 2.082494672186535e-06, "loss": 1.6876, "step": 9855 }, { "epoch": 2.773948515965677, "grad_norm": 2.828125, "learning_rate": 2.078186142913414e-06, "loss": 1.4451, "step": 9860 }, { "epoch": 2.7753551835701225, "grad_norm": 3.71875, "learning_rate": 2.0738805105724676e-06, "loss": 1.8509, "step": 9865 }, { "epoch": 2.7767618511745673, "grad_norm": 5.75, "learning_rate": 2.069577781653982e-06, "loss": 1.648, "step": 9870 }, { "epoch": 2.7781685187790126, "grad_norm": 5.15625, "learning_rate": 2.065277962643873e-06, "loss": 1.5538, "step": 9875 }, { "epoch": 2.7795751863834575, "grad_norm": 3.5, "learning_rate": 2.0609810600236586e-06, "loss": 1.4278, "step": 9880 }, { "epoch": 2.7809818539879028, "grad_norm": 3.75, "learning_rate": 2.056687080270473e-06, "loss": 1.5183, "step": 9885 }, { "epoch": 2.7823885215923476, "grad_norm": 3.859375, "learning_rate": 2.0523960298570368e-06, "loss": 1.4949, "step": 9890 }, { "epoch": 2.783795189196793, "grad_norm": 2.734375, "learning_rate": 2.0481079152516564e-06, "loss": 1.5487, "step": 9895 }, { "epoch": 2.7852018568012378, "grad_norm": 3.59375, "learning_rate": 2.043822742918212e-06, "loss": 1.6569, "step": 9900 }, { "epoch": 2.786608524405683, "grad_norm": 4.53125, "learning_rate": 2.0395405193161557e-06, "loss": 1.5651, "step": 9905 }, { "epoch": 2.788015192010128, "grad_norm": 3.625, "learning_rate": 2.0352612509004816e-06, "loss": 1.7325, "step": 9910 }, { "epoch": 2.7894218596145732, "grad_norm": 3.421875, "learning_rate": 2.030984944121742e-06, "loss": 1.6342, "step": 9915 }, { "epoch": 2.790828527219018, "grad_norm": 3.015625, "learning_rate": 2.0267116054260174e-06, "loss": 1.6308, "step": 9920 }, { "epoch": 2.792235194823463, "grad_norm": 2.5625, "learning_rate": 2.0224412412549153e-06, "loss": 1.5591, "step": 9925 }, { "epoch": 2.7936418624279082, "grad_norm": 3.546875, "learning_rate": 2.0181738580455626e-06, "loss": 1.6227, "step": 9930 }, { "epoch": 2.7950485300323535, "grad_norm": 3.1875, "learning_rate": 2.013909462230589e-06, "loss": 1.6837, "step": 9935 }, { "epoch": 2.7964551976367984, "grad_norm": 5.125, "learning_rate": 2.009648060238123e-06, "loss": 1.5786, "step": 9940 }, { "epoch": 2.7978618652412433, "grad_norm": 3.515625, "learning_rate": 2.0053896584917804e-06, "loss": 1.5091, "step": 9945 }, { "epoch": 2.7992685328456886, "grad_norm": 3.359375, "learning_rate": 2.001134263410652e-06, "loss": 1.6791, "step": 9950 }, { "epoch": 2.800675200450134, "grad_norm": 4.09375, "learning_rate": 1.9968818814092975e-06, "loss": 1.6616, "step": 9955 }, { "epoch": 2.8020818680545787, "grad_norm": 2.3125, "learning_rate": 1.9926325188977382e-06, "loss": 1.5081, "step": 9960 }, { "epoch": 2.8034885356590236, "grad_norm": 3.625, "learning_rate": 1.98838618228144e-06, "loss": 1.5836, "step": 9965 }, { "epoch": 2.804895203263469, "grad_norm": 2.9375, "learning_rate": 1.9841428779613085e-06, "loss": 1.8027, "step": 9970 }, { "epoch": 2.806301870867914, "grad_norm": 3.5625, "learning_rate": 1.979902612333678e-06, "loss": 1.6843, "step": 9975 }, { "epoch": 2.807708538472359, "grad_norm": 3.640625, "learning_rate": 1.9756653917903024e-06, "loss": 1.6057, "step": 9980 }, { "epoch": 2.809115206076804, "grad_norm": 3.625, "learning_rate": 1.9714312227183448e-06, "loss": 1.4281, "step": 9985 }, { "epoch": 2.810521873681249, "grad_norm": 3.4375, "learning_rate": 1.9672001115003734e-06, "loss": 1.5506, "step": 9990 }, { "epoch": 2.811928541285694, "grad_norm": 3.140625, "learning_rate": 1.96297206451434e-06, "loss": 1.4472, "step": 9995 }, { "epoch": 2.8133352088901393, "grad_norm": 2.390625, "learning_rate": 1.95874708813358e-06, "loss": 1.7408, "step": 10000 }, { "epoch": 2.814741876494584, "grad_norm": 2.796875, "learning_rate": 1.9545251887268055e-06, "loss": 1.8876, "step": 10005 }, { "epoch": 2.8161485440990295, "grad_norm": 3.125, "learning_rate": 1.9503063726580794e-06, "loss": 1.6134, "step": 10010 }, { "epoch": 2.8175552117034743, "grad_norm": 4.125, "learning_rate": 1.9460906462868266e-06, "loss": 1.3413, "step": 10015 }, { "epoch": 2.8189618793079196, "grad_norm": 3.875, "learning_rate": 1.941878015967811e-06, "loss": 1.5506, "step": 10020 }, { "epoch": 2.8203685469123645, "grad_norm": 3.75, "learning_rate": 1.9376684880511283e-06, "loss": 1.7976, "step": 10025 }, { "epoch": 2.82177521451681, "grad_norm": 2.28125, "learning_rate": 1.9334620688821986e-06, "loss": 1.5492, "step": 10030 }, { "epoch": 2.8231818821212546, "grad_norm": 2.84375, "learning_rate": 1.9292587648017597e-06, "loss": 1.6118, "step": 10035 }, { "epoch": 2.8245885497257, "grad_norm": 5.40625, "learning_rate": 1.925058582145844e-06, "loss": 1.4627, "step": 10040 }, { "epoch": 2.825995217330145, "grad_norm": 3.171875, "learning_rate": 1.9208615272457907e-06, "loss": 1.9225, "step": 10045 }, { "epoch": 2.82740188493459, "grad_norm": 3.375, "learning_rate": 1.916667606428216e-06, "loss": 1.4313, "step": 10050 }, { "epoch": 2.828808552539035, "grad_norm": 3.28125, "learning_rate": 1.9124768260150144e-06, "loss": 1.7128, "step": 10055 }, { "epoch": 2.8302152201434803, "grad_norm": 3.984375, "learning_rate": 1.9082891923233453e-06, "loss": 1.4724, "step": 10060 }, { "epoch": 2.831621887747925, "grad_norm": 4.15625, "learning_rate": 1.9041047116656279e-06, "loss": 1.5864, "step": 10065 }, { "epoch": 2.83302855535237, "grad_norm": 3.859375, "learning_rate": 1.8999233903495262e-06, "loss": 1.5653, "step": 10070 }, { "epoch": 2.8344352229568153, "grad_norm": 3.765625, "learning_rate": 1.8957452346779399e-06, "loss": 1.681, "step": 10075 }, { "epoch": 2.8358418905612606, "grad_norm": 2.265625, "learning_rate": 1.8915702509490035e-06, "loss": 1.5478, "step": 10080 }, { "epoch": 2.8372485581657054, "grad_norm": 2.953125, "learning_rate": 1.88739844545606e-06, "loss": 1.6975, "step": 10085 }, { "epoch": 2.8386552257701503, "grad_norm": 3.140625, "learning_rate": 1.8832298244876718e-06, "loss": 1.618, "step": 10090 }, { "epoch": 2.8400618933745956, "grad_norm": 3.546875, "learning_rate": 1.8790643943275946e-06, "loss": 1.4996, "step": 10095 }, { "epoch": 2.841468560979041, "grad_norm": 2.359375, "learning_rate": 1.8749021612547762e-06, "loss": 1.6171, "step": 10100 }, { "epoch": 2.8428752285834857, "grad_norm": 3.390625, "learning_rate": 1.8707431315433433e-06, "loss": 1.4352, "step": 10105 }, { "epoch": 2.8442818961879306, "grad_norm": 3.609375, "learning_rate": 1.8665873114626001e-06, "loss": 1.4832, "step": 10110 }, { "epoch": 2.845688563792376, "grad_norm": 2.5, "learning_rate": 1.8624347072770026e-06, "loss": 1.6562, "step": 10115 }, { "epoch": 2.847095231396821, "grad_norm": 4.0, "learning_rate": 1.8582853252461686e-06, "loss": 1.5724, "step": 10120 }, { "epoch": 2.848501899001266, "grad_norm": 3.453125, "learning_rate": 1.8541391716248533e-06, "loss": 1.6138, "step": 10125 }, { "epoch": 2.849908566605711, "grad_norm": 2.953125, "learning_rate": 1.849996252662946e-06, "loss": 1.6924, "step": 10130 }, { "epoch": 2.851315234210156, "grad_norm": 3.015625, "learning_rate": 1.8458565746054657e-06, "loss": 1.5711, "step": 10135 }, { "epoch": 2.852721901814601, "grad_norm": 3.796875, "learning_rate": 1.8417201436925352e-06, "loss": 1.399, "step": 10140 }, { "epoch": 2.8541285694190464, "grad_norm": 5.0, "learning_rate": 1.8375869661593933e-06, "loss": 1.5267, "step": 10145 }, { "epoch": 2.855535237023491, "grad_norm": 2.890625, "learning_rate": 1.8334570482363687e-06, "loss": 1.6057, "step": 10150 }, { "epoch": 2.8569419046279365, "grad_norm": 3.21875, "learning_rate": 1.8293303961488783e-06, "loss": 1.5148, "step": 10155 }, { "epoch": 2.8583485722323814, "grad_norm": 3.125, "learning_rate": 1.8252070161174142e-06, "loss": 1.6119, "step": 10160 }, { "epoch": 2.8597552398368267, "grad_norm": 4.25, "learning_rate": 1.8210869143575432e-06, "loss": 1.5562, "step": 10165 }, { "epoch": 2.8611619074412715, "grad_norm": 3.3125, "learning_rate": 1.8169700970798777e-06, "loss": 1.6375, "step": 10170 }, { "epoch": 2.862568575045717, "grad_norm": 4.0, "learning_rate": 1.8128565704900925e-06, "loss": 1.536, "step": 10175 }, { "epoch": 2.8639752426501617, "grad_norm": 3.140625, "learning_rate": 1.8087463407888942e-06, "loss": 1.7519, "step": 10180 }, { "epoch": 2.865381910254607, "grad_norm": 5.0625, "learning_rate": 1.8046394141720208e-06, "loss": 1.4386, "step": 10185 }, { "epoch": 2.866788577859052, "grad_norm": 3.703125, "learning_rate": 1.8005357968302318e-06, "loss": 1.7448, "step": 10190 }, { "epoch": 2.868195245463497, "grad_norm": 3.453125, "learning_rate": 1.796435494949302e-06, "loss": 1.4544, "step": 10195 }, { "epoch": 2.869601913067942, "grad_norm": 6.90625, "learning_rate": 1.7923385147099999e-06, "loss": 1.4657, "step": 10200 }, { "epoch": 2.8710085806723873, "grad_norm": 2.890625, "learning_rate": 1.7882448622880943e-06, "loss": 1.6822, "step": 10205 }, { "epoch": 2.872415248276832, "grad_norm": 2.640625, "learning_rate": 1.7841545438543392e-06, "loss": 1.5488, "step": 10210 }, { "epoch": 2.873821915881277, "grad_norm": 4.65625, "learning_rate": 1.7800675655744528e-06, "loss": 1.5145, "step": 10215 }, { "epoch": 2.8752285834857223, "grad_norm": 2.953125, "learning_rate": 1.7759839336091296e-06, "loss": 1.6704, "step": 10220 }, { "epoch": 2.8766352510901676, "grad_norm": 2.875, "learning_rate": 1.771903654114013e-06, "loss": 1.5645, "step": 10225 }, { "epoch": 2.8780419186946125, "grad_norm": 3.234375, "learning_rate": 1.7678267332396958e-06, "loss": 1.727, "step": 10230 }, { "epoch": 2.8794485862990573, "grad_norm": 3.046875, "learning_rate": 1.7637531771317056e-06, "loss": 1.647, "step": 10235 }, { "epoch": 2.8808552539035026, "grad_norm": 3.328125, "learning_rate": 1.7596829919305037e-06, "loss": 1.4213, "step": 10240 }, { "epoch": 2.882261921507948, "grad_norm": 5.28125, "learning_rate": 1.7556161837714606e-06, "loss": 1.4697, "step": 10245 }, { "epoch": 2.8836685891123928, "grad_norm": 3.703125, "learning_rate": 1.7515527587848652e-06, "loss": 1.6989, "step": 10250 }, { "epoch": 2.8850752567168376, "grad_norm": 2.6875, "learning_rate": 1.7474927230959025e-06, "loss": 1.7053, "step": 10255 }, { "epoch": 2.886481924321283, "grad_norm": 3.46875, "learning_rate": 1.7434360828246488e-06, "loss": 1.4548, "step": 10260 }, { "epoch": 2.887888591925728, "grad_norm": 3.890625, "learning_rate": 1.7393828440860613e-06, "loss": 1.575, "step": 10265 }, { "epoch": 2.889295259530173, "grad_norm": 5.0, "learning_rate": 1.7353330129899736e-06, "loss": 1.7149, "step": 10270 }, { "epoch": 2.890701927134618, "grad_norm": 3.5, "learning_rate": 1.731286595641078e-06, "loss": 1.7091, "step": 10275 }, { "epoch": 2.8921085947390632, "grad_norm": 3.25, "learning_rate": 1.7272435981389237e-06, "loss": 1.5249, "step": 10280 }, { "epoch": 2.893515262343508, "grad_norm": 3.765625, "learning_rate": 1.7232040265779038e-06, "loss": 1.5553, "step": 10285 }, { "epoch": 2.8949219299479534, "grad_norm": 4.875, "learning_rate": 1.7191678870472459e-06, "loss": 1.3392, "step": 10290 }, { "epoch": 2.8963285975523982, "grad_norm": 4.0625, "learning_rate": 1.715135185631008e-06, "loss": 1.6271, "step": 10295 }, { "epoch": 2.8977352651568435, "grad_norm": 4.1875, "learning_rate": 1.7111059284080627e-06, "loss": 1.7061, "step": 10300 }, { "epoch": 2.8991419327612884, "grad_norm": 2.671875, "learning_rate": 1.7070801214520904e-06, "loss": 1.4828, "step": 10305 }, { "epoch": 2.9005486003657337, "grad_norm": 3.1875, "learning_rate": 1.703057770831572e-06, "loss": 1.6617, "step": 10310 }, { "epoch": 2.9019552679701786, "grad_norm": 3.28125, "learning_rate": 1.699038882609778e-06, "loss": 1.6372, "step": 10315 }, { "epoch": 2.903361935574624, "grad_norm": 2.8125, "learning_rate": 1.695023462844757e-06, "loss": 1.6865, "step": 10320 }, { "epoch": 2.9047686031790687, "grad_norm": 3.359375, "learning_rate": 1.6910115175893362e-06, "loss": 1.5584, "step": 10325 }, { "epoch": 2.906175270783514, "grad_norm": 3.125, "learning_rate": 1.6870030528910983e-06, "loss": 1.6264, "step": 10330 }, { "epoch": 2.907581938387959, "grad_norm": 3.28125, "learning_rate": 1.6829980747923828e-06, "loss": 1.6237, "step": 10335 }, { "epoch": 2.9089886059924037, "grad_norm": 2.796875, "learning_rate": 1.6789965893302723e-06, "loss": 1.7661, "step": 10340 }, { "epoch": 2.910395273596849, "grad_norm": 3.609375, "learning_rate": 1.6749986025365836e-06, "loss": 1.5507, "step": 10345 }, { "epoch": 2.9118019412012943, "grad_norm": 4.09375, "learning_rate": 1.6710041204378649e-06, "loss": 1.525, "step": 10350 }, { "epoch": 2.913208608805739, "grad_norm": 3.546875, "learning_rate": 1.667013149055375e-06, "loss": 1.4026, "step": 10355 }, { "epoch": 2.914615276410184, "grad_norm": 3.25, "learning_rate": 1.6630256944050842e-06, "loss": 1.7627, "step": 10360 }, { "epoch": 2.9160219440146293, "grad_norm": 3.453125, "learning_rate": 1.659041762497659e-06, "loss": 1.6459, "step": 10365 }, { "epoch": 2.9174286116190746, "grad_norm": 3.25, "learning_rate": 1.6550613593384614e-06, "loss": 1.6321, "step": 10370 }, { "epoch": 2.9188352792235195, "grad_norm": 3.765625, "learning_rate": 1.6510844909275257e-06, "loss": 1.5042, "step": 10375 }, { "epoch": 2.9202419468279643, "grad_norm": 3.53125, "learning_rate": 1.6471111632595665e-06, "loss": 1.4446, "step": 10380 }, { "epoch": 2.9216486144324096, "grad_norm": 3.65625, "learning_rate": 1.6431413823239551e-06, "loss": 1.6827, "step": 10385 }, { "epoch": 2.923055282036855, "grad_norm": 3.765625, "learning_rate": 1.6391751541047189e-06, "loss": 1.4172, "step": 10390 }, { "epoch": 2.9244619496413, "grad_norm": 3.671875, "learning_rate": 1.6352124845805286e-06, "loss": 1.4812, "step": 10395 }, { "epoch": 2.9258686172457447, "grad_norm": 3.90625, "learning_rate": 1.6312533797246957e-06, "loss": 1.4395, "step": 10400 }, { "epoch": 2.92727528485019, "grad_norm": 2.78125, "learning_rate": 1.627297845505148e-06, "loss": 1.4606, "step": 10405 }, { "epoch": 2.928681952454635, "grad_norm": 2.71875, "learning_rate": 1.6233458878844418e-06, "loss": 1.7314, "step": 10410 }, { "epoch": 2.93008862005908, "grad_norm": 2.5625, "learning_rate": 1.6193975128197356e-06, "loss": 1.7334, "step": 10415 }, { "epoch": 2.931495287663525, "grad_norm": 4.75, "learning_rate": 1.6154527262627889e-06, "loss": 1.5945, "step": 10420 }, { "epoch": 2.9329019552679703, "grad_norm": 4.78125, "learning_rate": 1.6115115341599542e-06, "loss": 1.5073, "step": 10425 }, { "epoch": 2.934308622872415, "grad_norm": 3.984375, "learning_rate": 1.6075739424521623e-06, "loss": 1.8863, "step": 10430 }, { "epoch": 2.9357152904768604, "grad_norm": 3.359375, "learning_rate": 1.6036399570749194e-06, "loss": 1.7013, "step": 10435 }, { "epoch": 2.9371219580813053, "grad_norm": 2.625, "learning_rate": 1.5997095839582927e-06, "loss": 1.7134, "step": 10440 }, { "epoch": 2.9385286256857506, "grad_norm": 3.703125, "learning_rate": 1.59578282902691e-06, "loss": 1.5742, "step": 10445 }, { "epoch": 2.9399352932901954, "grad_norm": 2.734375, "learning_rate": 1.5918596981999359e-06, "loss": 1.5151, "step": 10450 }, { "epoch": 2.9413419608946407, "grad_norm": 7.09375, "learning_rate": 1.5879401973910813e-06, "loss": 1.6329, "step": 10455 }, { "epoch": 2.9427486284990856, "grad_norm": 3.078125, "learning_rate": 1.58402433250858e-06, "loss": 1.4513, "step": 10460 }, { "epoch": 2.944155296103531, "grad_norm": 2.875, "learning_rate": 1.5801121094551863e-06, "loss": 1.5627, "step": 10465 }, { "epoch": 2.9455619637079757, "grad_norm": 2.5, "learning_rate": 1.5762035341281634e-06, "loss": 1.7602, "step": 10470 }, { "epoch": 2.946968631312421, "grad_norm": 3.34375, "learning_rate": 1.5722986124192813e-06, "loss": 1.486, "step": 10475 }, { "epoch": 2.948375298916866, "grad_norm": 6.3125, "learning_rate": 1.5683973502147936e-06, "loss": 1.6301, "step": 10480 }, { "epoch": 2.9497819665213107, "grad_norm": 4.5625, "learning_rate": 1.564499753395446e-06, "loss": 1.4134, "step": 10485 }, { "epoch": 2.951188634125756, "grad_norm": 3.28125, "learning_rate": 1.5606058278364546e-06, "loss": 1.5875, "step": 10490 }, { "epoch": 2.9525953017302013, "grad_norm": 2.625, "learning_rate": 1.5567155794075016e-06, "loss": 1.6243, "step": 10495 }, { "epoch": 2.954001969334646, "grad_norm": 3.25, "learning_rate": 1.55282901397273e-06, "loss": 1.5925, "step": 10500 }, { "epoch": 2.955408636939091, "grad_norm": 2.96875, "learning_rate": 1.548946137390724e-06, "loss": 1.4384, "step": 10505 }, { "epoch": 2.9568153045435364, "grad_norm": 3.515625, "learning_rate": 1.5450669555145153e-06, "loss": 1.4555, "step": 10510 }, { "epoch": 2.9582219721479817, "grad_norm": 2.84375, "learning_rate": 1.54119147419156e-06, "loss": 1.7412, "step": 10515 }, { "epoch": 2.9596286397524265, "grad_norm": 3.234375, "learning_rate": 1.5373196992637403e-06, "loss": 1.3866, "step": 10520 }, { "epoch": 2.9610353073568714, "grad_norm": 3.34375, "learning_rate": 1.5334516365673462e-06, "loss": 1.6634, "step": 10525 }, { "epoch": 2.9624419749613167, "grad_norm": 3.65625, "learning_rate": 1.529587291933081e-06, "loss": 1.5429, "step": 10530 }, { "epoch": 2.963848642565762, "grad_norm": 4.4375, "learning_rate": 1.5257266711860308e-06, "loss": 1.6514, "step": 10535 }, { "epoch": 2.965255310170207, "grad_norm": 2.46875, "learning_rate": 1.5218697801456802e-06, "loss": 1.5855, "step": 10540 }, { "epoch": 2.9666619777746517, "grad_norm": 3.796875, "learning_rate": 1.5180166246258846e-06, "loss": 1.5531, "step": 10545 }, { "epoch": 2.968068645379097, "grad_norm": 2.84375, "learning_rate": 1.5141672104348708e-06, "loss": 1.6195, "step": 10550 }, { "epoch": 2.969475312983542, "grad_norm": 3.171875, "learning_rate": 1.5103215433752245e-06, "loss": 1.616, "step": 10555 }, { "epoch": 2.970881980587987, "grad_norm": 3.6875, "learning_rate": 1.5064796292438868e-06, "loss": 1.5293, "step": 10560 }, { "epoch": 2.972288648192432, "grad_norm": 3.640625, "learning_rate": 1.502641473832137e-06, "loss": 1.4734, "step": 10565 }, { "epoch": 2.9736953157968773, "grad_norm": 2.984375, "learning_rate": 1.4988070829255902e-06, "loss": 1.772, "step": 10570 }, { "epoch": 2.975101983401322, "grad_norm": 3.046875, "learning_rate": 1.4949764623041907e-06, "loss": 1.6811, "step": 10575 }, { "epoch": 2.9765086510057674, "grad_norm": 3.4375, "learning_rate": 1.4911496177421903e-06, "loss": 1.6491, "step": 10580 }, { "epoch": 2.9779153186102123, "grad_norm": 3.515625, "learning_rate": 1.4873265550081593e-06, "loss": 1.5423, "step": 10585 }, { "epoch": 2.9793219862146576, "grad_norm": 2.984375, "learning_rate": 1.4835072798649607e-06, "loss": 1.8184, "step": 10590 }, { "epoch": 2.9807286538191025, "grad_norm": 2.765625, "learning_rate": 1.47969179806975e-06, "loss": 1.658, "step": 10595 }, { "epoch": 2.9821353214235478, "grad_norm": 2.640625, "learning_rate": 1.4758801153739632e-06, "loss": 1.7618, "step": 10600 }, { "epoch": 2.9835419890279926, "grad_norm": 3.890625, "learning_rate": 1.4720722375233154e-06, "loss": 1.693, "step": 10605 }, { "epoch": 2.984948656632438, "grad_norm": 3.25, "learning_rate": 1.4682681702577756e-06, "loss": 1.5824, "step": 10610 }, { "epoch": 2.9863553242368828, "grad_norm": 4.90625, "learning_rate": 1.4644679193115793e-06, "loss": 1.3904, "step": 10615 }, { "epoch": 2.987761991841328, "grad_norm": 4.625, "learning_rate": 1.4606714904132034e-06, "loss": 1.4736, "step": 10620 }, { "epoch": 2.989168659445773, "grad_norm": 3.859375, "learning_rate": 1.4568788892853653e-06, "loss": 1.4967, "step": 10625 }, { "epoch": 2.990575327050218, "grad_norm": 3.3125, "learning_rate": 1.4530901216450113e-06, "loss": 1.5161, "step": 10630 }, { "epoch": 2.991981994654663, "grad_norm": 3.90625, "learning_rate": 1.4493051932033113e-06, "loss": 1.6695, "step": 10635 }, { "epoch": 2.9933886622591084, "grad_norm": 4.5625, "learning_rate": 1.4455241096656466e-06, "loss": 1.4315, "step": 10640 }, { "epoch": 2.9947953298635532, "grad_norm": 2.96875, "learning_rate": 1.4417468767316022e-06, "loss": 1.7368, "step": 10645 }, { "epoch": 2.996201997467998, "grad_norm": 4.53125, "learning_rate": 1.437973500094959e-06, "loss": 1.5735, "step": 10650 }, { "epoch": 2.9976086650724434, "grad_norm": 2.828125, "learning_rate": 1.4342039854436849e-06, "loss": 1.4599, "step": 10655 }, { "epoch": 2.9990153326768887, "grad_norm": 2.8125, "learning_rate": 1.4304383384599281e-06, "loss": 1.7635, "step": 10660 }, { "epoch": 2.9998593332395553, "eval_loss": 1.5767865180969238, "eval_runtime": 330.5515, "eval_samples_per_second": 9.554, "eval_steps_per_second": 4.777, "step": 10663 }, { "epoch": 3.0004220002813335, "grad_norm": 3.015625, "learning_rate": 1.4266765648200045e-06, "loss": 1.6251, "step": 10665 }, { "epoch": 3.0018286678857784, "grad_norm": 2.96875, "learning_rate": 1.4229186701943925e-06, "loss": 1.4799, "step": 10670 }, { "epoch": 3.0032353354902237, "grad_norm": 3.015625, "learning_rate": 1.4191646602477216e-06, "loss": 1.5905, "step": 10675 }, { "epoch": 3.0046420030946686, "grad_norm": 7.09375, "learning_rate": 1.4154145406387681e-06, "loss": 1.5103, "step": 10680 }, { "epoch": 3.006048670699114, "grad_norm": 7.84375, "learning_rate": 1.4116683170204407e-06, "loss": 1.5813, "step": 10685 }, { "epoch": 3.0074553383035587, "grad_norm": 2.640625, "learning_rate": 1.40792599503978e-06, "loss": 1.4847, "step": 10690 }, { "epoch": 3.008862005908004, "grad_norm": 3.65625, "learning_rate": 1.404187580337941e-06, "loss": 1.6993, "step": 10695 }, { "epoch": 3.010268673512449, "grad_norm": 2.875, "learning_rate": 1.40045307855019e-06, "loss": 1.7316, "step": 10700 }, { "epoch": 3.011675341116894, "grad_norm": 3.71875, "learning_rate": 1.3967224953058988e-06, "loss": 1.6364, "step": 10705 }, { "epoch": 3.013082008721339, "grad_norm": 3.59375, "learning_rate": 1.3929958362285242e-06, "loss": 1.6369, "step": 10710 }, { "epoch": 3.0144886763257843, "grad_norm": 5.75, "learning_rate": 1.3892731069356161e-06, "loss": 1.6918, "step": 10715 }, { "epoch": 3.015895343930229, "grad_norm": 3.671875, "learning_rate": 1.3855543130387965e-06, "loss": 1.6818, "step": 10720 }, { "epoch": 3.0173020115346745, "grad_norm": 2.984375, "learning_rate": 1.3818394601437557e-06, "loss": 1.6582, "step": 10725 }, { "epoch": 3.0187086791391193, "grad_norm": 4.21875, "learning_rate": 1.3781285538502418e-06, "loss": 1.522, "step": 10730 }, { "epoch": 3.0201153467435646, "grad_norm": 3.578125, "learning_rate": 1.3744215997520602e-06, "loss": 1.4227, "step": 10735 }, { "epoch": 3.0215220143480095, "grad_norm": 3.046875, "learning_rate": 1.3707186034370484e-06, "loss": 1.6587, "step": 10740 }, { "epoch": 3.022928681952455, "grad_norm": 3.21875, "learning_rate": 1.3670195704870883e-06, "loss": 1.5284, "step": 10745 }, { "epoch": 3.0243353495568996, "grad_norm": 3.390625, "learning_rate": 1.3633245064780803e-06, "loss": 1.6379, "step": 10750 }, { "epoch": 3.025742017161345, "grad_norm": 2.90625, "learning_rate": 1.3596334169799457e-06, "loss": 1.4993, "step": 10755 }, { "epoch": 3.02714868476579, "grad_norm": 3.015625, "learning_rate": 1.355946307556612e-06, "loss": 1.6514, "step": 10760 }, { "epoch": 3.028555352370235, "grad_norm": 4.46875, "learning_rate": 1.3522631837660123e-06, "loss": 1.5571, "step": 10765 }, { "epoch": 3.02996201997468, "grad_norm": 2.71875, "learning_rate": 1.3485840511600636e-06, "loss": 1.47, "step": 10770 }, { "epoch": 3.0313686875791253, "grad_norm": 3.359375, "learning_rate": 1.3449089152846726e-06, "loss": 1.6482, "step": 10775 }, { "epoch": 3.03277535518357, "grad_norm": 3.953125, "learning_rate": 1.341237781679724e-06, "loss": 1.5908, "step": 10780 }, { "epoch": 3.0341820227880154, "grad_norm": 3.015625, "learning_rate": 1.337570655879059e-06, "loss": 1.6287, "step": 10785 }, { "epoch": 3.0355886903924603, "grad_norm": 4.21875, "learning_rate": 1.3339075434104885e-06, "loss": 1.6187, "step": 10790 }, { "epoch": 3.036995357996905, "grad_norm": 3.6875, "learning_rate": 1.3302484497957678e-06, "loss": 1.6408, "step": 10795 }, { "epoch": 3.0384020256013504, "grad_norm": 4.09375, "learning_rate": 1.3265933805505954e-06, "loss": 1.7244, "step": 10800 }, { "epoch": 3.0398086932057953, "grad_norm": 3.453125, "learning_rate": 1.3229423411846018e-06, "loss": 1.8399, "step": 10805 }, { "epoch": 3.0412153608102406, "grad_norm": 3.34375, "learning_rate": 1.319295337201349e-06, "loss": 1.4242, "step": 10810 }, { "epoch": 3.0426220284146854, "grad_norm": 3.609375, "learning_rate": 1.315652374098307e-06, "loss": 1.5995, "step": 10815 }, { "epoch": 3.0440286960191307, "grad_norm": 2.96875, "learning_rate": 1.3120134573668624e-06, "loss": 1.6642, "step": 10820 }, { "epoch": 3.0454353636235756, "grad_norm": 3.25, "learning_rate": 1.3083785924922986e-06, "loss": 1.3545, "step": 10825 }, { "epoch": 3.046842031228021, "grad_norm": 3.953125, "learning_rate": 1.3047477849537916e-06, "loss": 1.3587, "step": 10830 }, { "epoch": 3.0482486988324657, "grad_norm": 3.328125, "learning_rate": 1.3011210402244008e-06, "loss": 1.6701, "step": 10835 }, { "epoch": 3.049655366436911, "grad_norm": 3.0, "learning_rate": 1.2974983637710644e-06, "loss": 1.5038, "step": 10840 }, { "epoch": 3.051062034041356, "grad_norm": 3.078125, "learning_rate": 1.293879761054585e-06, "loss": 1.3233, "step": 10845 }, { "epoch": 3.052468701645801, "grad_norm": 3.421875, "learning_rate": 1.2902652375296255e-06, "loss": 1.3758, "step": 10850 }, { "epoch": 3.053875369250246, "grad_norm": 3.171875, "learning_rate": 1.2866547986446993e-06, "loss": 1.495, "step": 10855 }, { "epoch": 3.0552820368546914, "grad_norm": 3.09375, "learning_rate": 1.283048449842162e-06, "loss": 1.4547, "step": 10860 }, { "epoch": 3.056688704459136, "grad_norm": 3.421875, "learning_rate": 1.2794461965582098e-06, "loss": 1.5248, "step": 10865 }, { "epoch": 3.0580953720635815, "grad_norm": 3.25, "learning_rate": 1.275848044222854e-06, "loss": 1.5124, "step": 10870 }, { "epoch": 3.0595020396680264, "grad_norm": 3.1875, "learning_rate": 1.2722539982599352e-06, "loss": 1.4788, "step": 10875 }, { "epoch": 3.0609087072724717, "grad_norm": 3.265625, "learning_rate": 1.268664064087098e-06, "loss": 1.5596, "step": 10880 }, { "epoch": 3.0623153748769165, "grad_norm": 2.90625, "learning_rate": 1.2650782471157904e-06, "loss": 1.6641, "step": 10885 }, { "epoch": 3.063722042481362, "grad_norm": 3.375, "learning_rate": 1.2614965527512533e-06, "loss": 1.4911, "step": 10890 }, { "epoch": 3.0651287100858067, "grad_norm": 2.46875, "learning_rate": 1.2579189863925175e-06, "loss": 1.5126, "step": 10895 }, { "epoch": 3.066535377690252, "grad_norm": 2.453125, "learning_rate": 1.2543455534323828e-06, "loss": 1.3572, "step": 10900 }, { "epoch": 3.067942045294697, "grad_norm": 4.0625, "learning_rate": 1.2507762592574272e-06, "loss": 1.7074, "step": 10905 }, { "epoch": 3.069348712899142, "grad_norm": 2.9375, "learning_rate": 1.2472111092479853e-06, "loss": 1.5994, "step": 10910 }, { "epoch": 3.070755380503587, "grad_norm": 3.3125, "learning_rate": 1.2436501087781435e-06, "loss": 1.6489, "step": 10915 }, { "epoch": 3.0721620481080323, "grad_norm": 4.90625, "learning_rate": 1.2400932632157389e-06, "loss": 1.4295, "step": 10920 }, { "epoch": 3.073568715712477, "grad_norm": 3.765625, "learning_rate": 1.2365405779223395e-06, "loss": 1.6276, "step": 10925 }, { "epoch": 3.0749753833169224, "grad_norm": 2.390625, "learning_rate": 1.2329920582532451e-06, "loss": 1.5665, "step": 10930 }, { "epoch": 3.0763820509213673, "grad_norm": 3.328125, "learning_rate": 1.229447709557475e-06, "loss": 1.7107, "step": 10935 }, { "epoch": 3.077788718525812, "grad_norm": 2.9375, "learning_rate": 1.2259075371777648e-06, "loss": 1.6144, "step": 10940 }, { "epoch": 3.0791953861302575, "grad_norm": 3.890625, "learning_rate": 1.2223715464505473e-06, "loss": 1.7557, "step": 10945 }, { "epoch": 3.0806020537347023, "grad_norm": 4.21875, "learning_rate": 1.2188397427059607e-06, "loss": 1.5287, "step": 10950 }, { "epoch": 3.0820087213391476, "grad_norm": 3.125, "learning_rate": 1.215312131267825e-06, "loss": 1.8295, "step": 10955 }, { "epoch": 3.0834153889435925, "grad_norm": 2.65625, "learning_rate": 1.2117887174536444e-06, "loss": 1.646, "step": 10960 }, { "epoch": 3.0848220565480378, "grad_norm": 4.375, "learning_rate": 1.2082695065745925e-06, "loss": 1.6581, "step": 10965 }, { "epoch": 3.0862287241524826, "grad_norm": 3.15625, "learning_rate": 1.2047545039355141e-06, "loss": 1.7364, "step": 10970 }, { "epoch": 3.087635391756928, "grad_norm": 4.53125, "learning_rate": 1.2012437148348994e-06, "loss": 1.4243, "step": 10975 }, { "epoch": 3.0890420593613728, "grad_norm": 3.546875, "learning_rate": 1.1977371445648988e-06, "loss": 1.5345, "step": 10980 }, { "epoch": 3.090448726965818, "grad_norm": 4.3125, "learning_rate": 1.1942347984112959e-06, "loss": 1.6182, "step": 10985 }, { "epoch": 3.091855394570263, "grad_norm": 2.34375, "learning_rate": 1.1907366816535076e-06, "loss": 1.686, "step": 10990 }, { "epoch": 3.0932620621747082, "grad_norm": 3.34375, "learning_rate": 1.1872427995645803e-06, "loss": 1.6519, "step": 10995 }, { "epoch": 3.094668729779153, "grad_norm": 2.515625, "learning_rate": 1.183753157411171e-06, "loss": 1.5121, "step": 11000 }, { "epoch": 3.0960753973835984, "grad_norm": 2.90625, "learning_rate": 1.1802677604535496e-06, "loss": 1.5297, "step": 11005 }, { "epoch": 3.0974820649880432, "grad_norm": 3.328125, "learning_rate": 1.176786613945584e-06, "loss": 1.3636, "step": 11010 }, { "epoch": 3.0988887325924885, "grad_norm": 3.484375, "learning_rate": 1.1733097231347372e-06, "loss": 1.6959, "step": 11015 }, { "epoch": 3.1002954001969334, "grad_norm": 2.875, "learning_rate": 1.1698370932620538e-06, "loss": 1.5115, "step": 11020 }, { "epoch": 3.1017020678013787, "grad_norm": 3.90625, "learning_rate": 1.1663687295621621e-06, "loss": 1.5369, "step": 11025 }, { "epoch": 3.1031087354058235, "grad_norm": 3.59375, "learning_rate": 1.1629046372632524e-06, "loss": 1.5513, "step": 11030 }, { "epoch": 3.104515403010269, "grad_norm": 3.484375, "learning_rate": 1.1594448215870812e-06, "loss": 1.4403, "step": 11035 }, { "epoch": 3.1059220706147137, "grad_norm": 3.859375, "learning_rate": 1.1559892877489548e-06, "loss": 1.6092, "step": 11040 }, { "epoch": 3.107328738219159, "grad_norm": 3.109375, "learning_rate": 1.1525380409577282e-06, "loss": 1.6913, "step": 11045 }, { "epoch": 3.108735405823604, "grad_norm": 3.015625, "learning_rate": 1.1490910864157907e-06, "loss": 1.4355, "step": 11050 }, { "epoch": 3.110142073428049, "grad_norm": 3.203125, "learning_rate": 1.1456484293190668e-06, "loss": 1.624, "step": 11055 }, { "epoch": 3.111548741032494, "grad_norm": 3.40625, "learning_rate": 1.1422100748569982e-06, "loss": 1.2994, "step": 11060 }, { "epoch": 3.112955408636939, "grad_norm": 3.328125, "learning_rate": 1.1387760282125412e-06, "loss": 1.52, "step": 11065 }, { "epoch": 3.114362076241384, "grad_norm": 2.859375, "learning_rate": 1.1353462945621632e-06, "loss": 1.5788, "step": 11070 }, { "epoch": 3.115768743845829, "grad_norm": 3.046875, "learning_rate": 1.1319208790758223e-06, "loss": 1.542, "step": 11075 }, { "epoch": 3.1171754114502743, "grad_norm": 3.65625, "learning_rate": 1.1284997869169756e-06, "loss": 1.5894, "step": 11080 }, { "epoch": 3.118582079054719, "grad_norm": 3.328125, "learning_rate": 1.125083023242558e-06, "loss": 1.6021, "step": 11085 }, { "epoch": 3.1199887466591645, "grad_norm": 2.9375, "learning_rate": 1.1216705932029816e-06, "loss": 1.7714, "step": 11090 }, { "epoch": 3.1213954142636093, "grad_norm": 3.09375, "learning_rate": 1.1182625019421244e-06, "loss": 1.5162, "step": 11095 }, { "epoch": 3.1228020818680546, "grad_norm": 4.25, "learning_rate": 1.114858754597329e-06, "loss": 1.7289, "step": 11100 }, { "epoch": 3.1242087494724995, "grad_norm": 4.0625, "learning_rate": 1.111459356299381e-06, "loss": 1.5885, "step": 11105 }, { "epoch": 3.125615417076945, "grad_norm": 4.1875, "learning_rate": 1.10806431217252e-06, "loss": 1.5521, "step": 11110 }, { "epoch": 3.1270220846813896, "grad_norm": 3.953125, "learning_rate": 1.104673627334416e-06, "loss": 1.578, "step": 11115 }, { "epoch": 3.128428752285835, "grad_norm": 3.59375, "learning_rate": 1.1012873068961702e-06, "loss": 1.6254, "step": 11120 }, { "epoch": 3.12983541989028, "grad_norm": 4.0, "learning_rate": 1.0979053559623026e-06, "loss": 1.4843, "step": 11125 }, { "epoch": 3.131242087494725, "grad_norm": 3.25, "learning_rate": 1.0945277796307513e-06, "loss": 1.5013, "step": 11130 }, { "epoch": 3.13264875509917, "grad_norm": 4.6875, "learning_rate": 1.0911545829928552e-06, "loss": 1.5731, "step": 11135 }, { "epoch": 3.1340554227036153, "grad_norm": 2.828125, "learning_rate": 1.0877857711333534e-06, "loss": 1.6624, "step": 11140 }, { "epoch": 3.13546209030806, "grad_norm": 2.859375, "learning_rate": 1.0844213491303772e-06, "loss": 1.766, "step": 11145 }, { "epoch": 3.1368687579125054, "grad_norm": 4.34375, "learning_rate": 1.0810613220554356e-06, "loss": 1.6699, "step": 11150 }, { "epoch": 3.1382754255169503, "grad_norm": 3.625, "learning_rate": 1.0777056949734187e-06, "loss": 1.7605, "step": 11155 }, { "epoch": 3.1396820931213956, "grad_norm": 3.5625, "learning_rate": 1.0743544729425802e-06, "loss": 1.6822, "step": 11160 }, { "epoch": 3.1410887607258404, "grad_norm": 4.25, "learning_rate": 1.0710076610145344e-06, "loss": 1.6808, "step": 11165 }, { "epoch": 3.1424954283302857, "grad_norm": 2.890625, "learning_rate": 1.0676652642342471e-06, "loss": 1.5603, "step": 11170 }, { "epoch": 3.1439020959347306, "grad_norm": 3.359375, "learning_rate": 1.064327287640034e-06, "loss": 1.6544, "step": 11175 }, { "epoch": 3.145308763539176, "grad_norm": 3.859375, "learning_rate": 1.0609937362635376e-06, "loss": 1.6709, "step": 11180 }, { "epoch": 3.1467154311436207, "grad_norm": 3.5, "learning_rate": 1.0576646151297404e-06, "loss": 1.6512, "step": 11185 }, { "epoch": 3.148122098748066, "grad_norm": 4.78125, "learning_rate": 1.0543399292569404e-06, "loss": 1.7634, "step": 11190 }, { "epoch": 3.149528766352511, "grad_norm": 4.28125, "learning_rate": 1.0510196836567522e-06, "loss": 1.4378, "step": 11195 }, { "epoch": 3.150935433956956, "grad_norm": 3.046875, "learning_rate": 1.0477038833340964e-06, "loss": 1.5599, "step": 11200 }, { "epoch": 3.152342101561401, "grad_norm": 3.265625, "learning_rate": 1.0443925332871914e-06, "loss": 1.6039, "step": 11205 }, { "epoch": 3.153748769165846, "grad_norm": 3.109375, "learning_rate": 1.0410856385075528e-06, "loss": 1.5945, "step": 11210 }, { "epoch": 3.155155436770291, "grad_norm": 3.796875, "learning_rate": 1.037783203979974e-06, "loss": 1.514, "step": 11215 }, { "epoch": 3.156562104374736, "grad_norm": 3.40625, "learning_rate": 1.0344852346825282e-06, "loss": 1.3017, "step": 11220 }, { "epoch": 3.1579687719791814, "grad_norm": 2.78125, "learning_rate": 1.0311917355865554e-06, "loss": 1.4708, "step": 11225 }, { "epoch": 3.159375439583626, "grad_norm": 3.8125, "learning_rate": 1.027902711656663e-06, "loss": 1.676, "step": 11230 }, { "epoch": 3.1607821071880715, "grad_norm": 4.59375, "learning_rate": 1.0246181678507038e-06, "loss": 1.5518, "step": 11235 }, { "epoch": 3.1621887747925164, "grad_norm": 3.1875, "learning_rate": 1.0213381091197852e-06, "loss": 1.561, "step": 11240 }, { "epoch": 3.1635954423969617, "grad_norm": 2.515625, "learning_rate": 1.0180625404082497e-06, "loss": 1.7335, "step": 11245 }, { "epoch": 3.1650021100014065, "grad_norm": 3.625, "learning_rate": 1.0147914666536718e-06, "loss": 1.5096, "step": 11250 }, { "epoch": 3.166408777605852, "grad_norm": 5.3125, "learning_rate": 1.011524892786851e-06, "loss": 1.5547, "step": 11255 }, { "epoch": 3.1678154452102967, "grad_norm": 4.125, "learning_rate": 1.0082628237318065e-06, "loss": 1.6803, "step": 11260 }, { "epoch": 3.169222112814742, "grad_norm": 3.34375, "learning_rate": 1.0050052644057592e-06, "loss": 1.6371, "step": 11265 }, { "epoch": 3.170628780419187, "grad_norm": 3.640625, "learning_rate": 1.00175221971914e-06, "loss": 1.6701, "step": 11270 }, { "epoch": 3.172035448023632, "grad_norm": 3.59375, "learning_rate": 9.985036945755734e-07, "loss": 1.7211, "step": 11275 }, { "epoch": 3.173442115628077, "grad_norm": 3.609375, "learning_rate": 9.952596938718648e-07, "loss": 1.5572, "step": 11280 }, { "epoch": 3.1748487832325223, "grad_norm": 3.21875, "learning_rate": 9.920202224980072e-07, "loss": 1.5249, "step": 11285 }, { "epoch": 3.176255450836967, "grad_norm": 2.671875, "learning_rate": 9.887852853371615e-07, "loss": 1.4818, "step": 11290 }, { "epoch": 3.1776621184414124, "grad_norm": 4.15625, "learning_rate": 9.855548872656557e-07, "loss": 1.5171, "step": 11295 }, { "epoch": 3.1790687860458573, "grad_norm": 3.5625, "learning_rate": 9.823290331529736e-07, "loss": 1.7529, "step": 11300 }, { "epoch": 3.1804754536503026, "grad_norm": 4.65625, "learning_rate": 9.791077278617538e-07, "loss": 1.546, "step": 11305 }, { "epoch": 3.1818821212547475, "grad_norm": 3.421875, "learning_rate": 9.758909762477717e-07, "loss": 1.6178, "step": 11310 }, { "epoch": 3.1832887888591928, "grad_norm": 3.578125, "learning_rate": 9.726787831599436e-07, "loss": 1.4941, "step": 11315 }, { "epoch": 3.1846954564636376, "grad_norm": 2.734375, "learning_rate": 9.694711534403128e-07, "loss": 1.6382, "step": 11320 }, { "epoch": 3.186102124068083, "grad_norm": 3.390625, "learning_rate": 9.662680919240434e-07, "loss": 1.4399, "step": 11325 }, { "epoch": 3.1875087916725278, "grad_norm": 3.078125, "learning_rate": 9.630696034394118e-07, "loss": 1.2156, "step": 11330 }, { "epoch": 3.1889154592769726, "grad_norm": 5.40625, "learning_rate": 9.598756928078069e-07, "loss": 1.7228, "step": 11335 }, { "epoch": 3.190322126881418, "grad_norm": 2.53125, "learning_rate": 9.56686364843708e-07, "loss": 1.6953, "step": 11340 }, { "epoch": 3.191728794485863, "grad_norm": 3.46875, "learning_rate": 9.535016243546952e-07, "loss": 1.586, "step": 11345 }, { "epoch": 3.193135462090308, "grad_norm": 2.953125, "learning_rate": 9.503214761414277e-07, "loss": 1.7237, "step": 11350 }, { "epoch": 3.194542129694753, "grad_norm": 3.421875, "learning_rate": 9.471459249976446e-07, "loss": 1.4558, "step": 11355 }, { "epoch": 3.1959487972991982, "grad_norm": 4.5, "learning_rate": 9.439749757101561e-07, "loss": 1.5914, "step": 11360 }, { "epoch": 3.197355464903643, "grad_norm": 3.140625, "learning_rate": 9.408086330588343e-07, "loss": 1.6958, "step": 11365 }, { "epoch": 3.1987621325080884, "grad_norm": 4.9375, "learning_rate": 9.376469018166071e-07, "loss": 1.6927, "step": 11370 }, { "epoch": 3.2001688001125332, "grad_norm": 2.8125, "learning_rate": 9.344897867494515e-07, "loss": 1.6201, "step": 11375 }, { "epoch": 3.2015754677169785, "grad_norm": 4.1875, "learning_rate": 9.313372926163867e-07, "loss": 1.5303, "step": 11380 }, { "epoch": 3.2029821353214234, "grad_norm": 4.15625, "learning_rate": 9.28189424169465e-07, "loss": 1.7088, "step": 11385 }, { "epoch": 3.2043888029258687, "grad_norm": 3.359375, "learning_rate": 9.250461861537684e-07, "loss": 1.5421, "step": 11390 }, { "epoch": 3.2057954705303136, "grad_norm": 2.96875, "learning_rate": 9.21907583307397e-07, "loss": 1.6744, "step": 11395 }, { "epoch": 3.207202138134759, "grad_norm": 3.703125, "learning_rate": 9.187736203614638e-07, "loss": 1.5669, "step": 11400 }, { "epoch": 3.2086088057392037, "grad_norm": 3.859375, "learning_rate": 9.156443020400883e-07, "loss": 1.6098, "step": 11405 }, { "epoch": 3.210015473343649, "grad_norm": 4.21875, "learning_rate": 9.125196330603877e-07, "loss": 1.504, "step": 11410 }, { "epoch": 3.211422140948094, "grad_norm": 9.0, "learning_rate": 9.093996181324742e-07, "loss": 1.4803, "step": 11415 }, { "epoch": 3.212828808552539, "grad_norm": 3.296875, "learning_rate": 9.062842619594402e-07, "loss": 1.5536, "step": 11420 }, { "epoch": 3.214235476156984, "grad_norm": 2.3125, "learning_rate": 9.031735692373578e-07, "loss": 1.8039, "step": 11425 }, { "epoch": 3.2156421437614293, "grad_norm": 3.703125, "learning_rate": 9.000675446552679e-07, "loss": 1.7332, "step": 11430 }, { "epoch": 3.217048811365874, "grad_norm": 3.0, "learning_rate": 8.969661928951789e-07, "loss": 1.3582, "step": 11435 }, { "epoch": 3.2184554789703195, "grad_norm": 3.015625, "learning_rate": 8.938695186320475e-07, "loss": 1.5736, "step": 11440 }, { "epoch": 3.2198621465747643, "grad_norm": 2.71875, "learning_rate": 8.90777526533788e-07, "loss": 1.5083, "step": 11445 }, { "epoch": 3.2212688141792096, "grad_norm": 4.59375, "learning_rate": 8.87690221261252e-07, "loss": 1.5167, "step": 11450 }, { "epoch": 3.2226754817836545, "grad_norm": 3.234375, "learning_rate": 8.846076074682276e-07, "loss": 1.7006, "step": 11455 }, { "epoch": 3.2240821493881, "grad_norm": 2.671875, "learning_rate": 8.815296898014293e-07, "loss": 1.5955, "step": 11460 }, { "epoch": 3.2254888169925446, "grad_norm": 3.40625, "learning_rate": 8.784564729004978e-07, "loss": 1.6101, "step": 11465 }, { "epoch": 3.22689548459699, "grad_norm": 3.328125, "learning_rate": 8.753879613979789e-07, "loss": 1.6842, "step": 11470 }, { "epoch": 3.228302152201435, "grad_norm": 4.34375, "learning_rate": 8.723241599193349e-07, "loss": 1.2444, "step": 11475 }, { "epoch": 3.2297088198058796, "grad_norm": 4.46875, "learning_rate": 8.692650730829232e-07, "loss": 1.2935, "step": 11480 }, { "epoch": 3.231115487410325, "grad_norm": 4.375, "learning_rate": 8.662107054999936e-07, "loss": 1.6327, "step": 11485 }, { "epoch": 3.23252215501477, "grad_norm": 2.9375, "learning_rate": 8.631610617746865e-07, "loss": 1.7187, "step": 11490 }, { "epoch": 3.233928822619215, "grad_norm": 2.78125, "learning_rate": 8.601161465040179e-07, "loss": 1.5457, "step": 11495 }, { "epoch": 3.23533549022366, "grad_norm": 2.59375, "learning_rate": 8.570759642778766e-07, "loss": 1.6327, "step": 11500 }, { "epoch": 3.2367421578281053, "grad_norm": 3.53125, "learning_rate": 8.54040519679017e-07, "loss": 1.6916, "step": 11505 }, { "epoch": 3.23814882543255, "grad_norm": 2.953125, "learning_rate": 8.510098172830553e-07, "loss": 1.7347, "step": 11510 }, { "epoch": 3.2395554930369954, "grad_norm": 3.265625, "learning_rate": 8.479838616584523e-07, "loss": 1.763, "step": 11515 }, { "epoch": 3.2409621606414403, "grad_norm": 2.796875, "learning_rate": 8.449626573665209e-07, "loss": 1.791, "step": 11520 }, { "epoch": 3.2423688282458856, "grad_norm": 3.359375, "learning_rate": 8.419462089614073e-07, "loss": 1.5492, "step": 11525 }, { "epoch": 3.2437754958503304, "grad_norm": 2.6875, "learning_rate": 8.389345209900907e-07, "loss": 1.3123, "step": 11530 }, { "epoch": 3.2451821634547757, "grad_norm": 4.53125, "learning_rate": 8.359275979923723e-07, "loss": 1.6477, "step": 11535 }, { "epoch": 3.2465888310592206, "grad_norm": 4.15625, "learning_rate": 8.329254445008755e-07, "loss": 1.663, "step": 11540 }, { "epoch": 3.247995498663666, "grad_norm": 3.390625, "learning_rate": 8.299280650410265e-07, "loss": 1.3256, "step": 11545 }, { "epoch": 3.2494021662681107, "grad_norm": 4.0625, "learning_rate": 8.269354641310627e-07, "loss": 1.5578, "step": 11550 }, { "epoch": 3.250808833872556, "grad_norm": 3.59375, "learning_rate": 8.239476462820136e-07, "loss": 1.6555, "step": 11555 }, { "epoch": 3.252215501477001, "grad_norm": 2.390625, "learning_rate": 8.209646159976999e-07, "loss": 1.6347, "step": 11560 }, { "epoch": 3.253622169081446, "grad_norm": 3.546875, "learning_rate": 8.179863777747287e-07, "loss": 1.4629, "step": 11565 }, { "epoch": 3.255028836685891, "grad_norm": 3.5, "learning_rate": 8.150129361024762e-07, "loss": 1.6009, "step": 11570 }, { "epoch": 3.2564355042903363, "grad_norm": 3.6875, "learning_rate": 8.120442954630964e-07, "loss": 1.3342, "step": 11575 }, { "epoch": 3.257842171894781, "grad_norm": 3.34375, "learning_rate": 8.090804603315016e-07, "loss": 1.6218, "step": 11580 }, { "epoch": 3.2592488394992265, "grad_norm": 4.75, "learning_rate": 8.061214351753616e-07, "loss": 1.6242, "step": 11585 }, { "epoch": 3.2606555071036714, "grad_norm": 2.40625, "learning_rate": 8.031672244550938e-07, "loss": 1.5137, "step": 11590 }, { "epoch": 3.2620621747081167, "grad_norm": 3.0625, "learning_rate": 8.002178326238636e-07, "loss": 1.6331, "step": 11595 }, { "epoch": 3.2634688423125615, "grad_norm": 4.21875, "learning_rate": 7.972732641275648e-07, "loss": 1.2351, "step": 11600 }, { "epoch": 3.2648755099170064, "grad_norm": 3.3125, "learning_rate": 7.943335234048274e-07, "loss": 1.539, "step": 11605 }, { "epoch": 3.2662821775214517, "grad_norm": 5.375, "learning_rate": 7.91398614887e-07, "loss": 1.53, "step": 11610 }, { "epoch": 3.267688845125897, "grad_norm": 3.53125, "learning_rate": 7.88468542998149e-07, "loss": 1.5524, "step": 11615 }, { "epoch": 3.269095512730342, "grad_norm": 3.109375, "learning_rate": 7.855433121550481e-07, "loss": 1.4712, "step": 11620 }, { "epoch": 3.2705021803347867, "grad_norm": 2.578125, "learning_rate": 7.826229267671771e-07, "loss": 1.4857, "step": 11625 }, { "epoch": 3.271908847939232, "grad_norm": 4.4375, "learning_rate": 7.797073912367085e-07, "loss": 1.6786, "step": 11630 }, { "epoch": 3.2733155155436773, "grad_norm": 3.53125, "learning_rate": 7.767967099585044e-07, "loss": 1.5137, "step": 11635 }, { "epoch": 3.274722183148122, "grad_norm": 4.03125, "learning_rate": 7.73890887320114e-07, "loss": 1.7585, "step": 11640 }, { "epoch": 3.276128850752567, "grad_norm": 2.8125, "learning_rate": 7.709899277017546e-07, "loss": 1.6304, "step": 11645 }, { "epoch": 3.2775355183570123, "grad_norm": 3.296875, "learning_rate": 7.680938354763205e-07, "loss": 1.5074, "step": 11650 }, { "epoch": 3.278942185961457, "grad_norm": 2.90625, "learning_rate": 7.652026150093656e-07, "loss": 1.6919, "step": 11655 }, { "epoch": 3.2803488535659024, "grad_norm": 4.1875, "learning_rate": 7.623162706591002e-07, "loss": 1.5654, "step": 11660 }, { "epoch": 3.2817555211703473, "grad_norm": 3.390625, "learning_rate": 7.594348067763837e-07, "loss": 1.5234, "step": 11665 }, { "epoch": 3.2831621887747926, "grad_norm": 3.203125, "learning_rate": 7.565582277047227e-07, "loss": 1.4964, "step": 11670 }, { "epoch": 3.2845688563792375, "grad_norm": 3.28125, "learning_rate": 7.536865377802532e-07, "loss": 1.5708, "step": 11675 }, { "epoch": 3.2859755239836828, "grad_norm": 4.71875, "learning_rate": 7.508197413317491e-07, "loss": 1.4245, "step": 11680 }, { "epoch": 3.2873821915881276, "grad_norm": 3.234375, "learning_rate": 7.479578426806035e-07, "loss": 1.4793, "step": 11685 }, { "epoch": 3.288788859192573, "grad_norm": 3.109375, "learning_rate": 7.45100846140827e-07, "loss": 1.6957, "step": 11690 }, { "epoch": 3.2901955267970178, "grad_norm": 3.203125, "learning_rate": 7.422487560190407e-07, "loss": 1.4461, "step": 11695 }, { "epoch": 3.291602194401463, "grad_norm": 2.9375, "learning_rate": 7.394015766144717e-07, "loss": 1.7007, "step": 11700 }, { "epoch": 3.293008862005908, "grad_norm": 4.0625, "learning_rate": 7.365593122189428e-07, "loss": 1.4112, "step": 11705 }, { "epoch": 3.294415529610353, "grad_norm": 3.96875, "learning_rate": 7.337219671168689e-07, "loss": 1.5714, "step": 11710 }, { "epoch": 3.295822197214798, "grad_norm": 2.828125, "learning_rate": 7.308895455852484e-07, "loss": 1.5193, "step": 11715 }, { "epoch": 3.2972288648192434, "grad_norm": 3.421875, "learning_rate": 7.280620518936582e-07, "loss": 1.4024, "step": 11720 }, { "epoch": 3.2986355324236882, "grad_norm": 3.078125, "learning_rate": 7.252394903042498e-07, "loss": 1.6127, "step": 11725 }, { "epoch": 3.3000422000281335, "grad_norm": 3.8125, "learning_rate": 7.224218650717361e-07, "loss": 1.302, "step": 11730 }, { "epoch": 3.3014488676325784, "grad_norm": 2.890625, "learning_rate": 7.196091804433911e-07, "loss": 1.7009, "step": 11735 }, { "epoch": 3.3028555352370237, "grad_norm": 3.375, "learning_rate": 7.168014406590405e-07, "loss": 1.6338, "step": 11740 }, { "epoch": 3.3042622028414685, "grad_norm": 3.84375, "learning_rate": 7.139986499510575e-07, "loss": 1.6938, "step": 11745 }, { "epoch": 3.3056688704459134, "grad_norm": 3.5625, "learning_rate": 7.112008125443524e-07, "loss": 1.4994, "step": 11750 }, { "epoch": 3.3070755380503587, "grad_norm": 3.421875, "learning_rate": 7.084079326563728e-07, "loss": 1.5465, "step": 11755 }, { "epoch": 3.308482205654804, "grad_norm": 4.96875, "learning_rate": 7.056200144970907e-07, "loss": 1.2758, "step": 11760 }, { "epoch": 3.309888873259249, "grad_norm": 4.25, "learning_rate": 7.02837062268999e-07, "loss": 1.6104, "step": 11765 }, { "epoch": 3.3112955408636937, "grad_norm": 3.171875, "learning_rate": 7.000590801671049e-07, "loss": 1.3466, "step": 11770 }, { "epoch": 3.312702208468139, "grad_norm": 4.03125, "learning_rate": 6.972860723789243e-07, "loss": 1.7502, "step": 11775 }, { "epoch": 3.314108876072584, "grad_norm": 3.0625, "learning_rate": 6.945180430844754e-07, "loss": 1.7875, "step": 11780 }, { "epoch": 3.315515543677029, "grad_norm": 3.03125, "learning_rate": 6.917549964562712e-07, "loss": 1.503, "step": 11785 }, { "epoch": 3.316922211281474, "grad_norm": 4.09375, "learning_rate": 6.889969366593127e-07, "loss": 1.4491, "step": 11790 }, { "epoch": 3.3183288788859193, "grad_norm": 4.78125, "learning_rate": 6.862438678510849e-07, "loss": 1.2961, "step": 11795 }, { "epoch": 3.319735546490364, "grad_norm": 3.515625, "learning_rate": 6.834957941815518e-07, "loss": 1.6573, "step": 11800 }, { "epoch": 3.3211422140948095, "grad_norm": 3.046875, "learning_rate": 6.807527197931411e-07, "loss": 1.3254, "step": 11805 }, { "epoch": 3.3225488816992543, "grad_norm": 3.21875, "learning_rate": 6.780146488207524e-07, "loss": 1.5744, "step": 11810 }, { "epoch": 3.3239555493036996, "grad_norm": 2.921875, "learning_rate": 6.752815853917377e-07, "loss": 1.5929, "step": 11815 }, { "epoch": 3.3253622169081445, "grad_norm": 3.109375, "learning_rate": 6.725535336259036e-07, "loss": 1.687, "step": 11820 }, { "epoch": 3.32676888451259, "grad_norm": 2.828125, "learning_rate": 6.698304976354992e-07, "loss": 1.5552, "step": 11825 }, { "epoch": 3.3281755521170346, "grad_norm": 3.3125, "learning_rate": 6.671124815252182e-07, "loss": 1.4686, "step": 11830 }, { "epoch": 3.32958221972148, "grad_norm": 3.890625, "learning_rate": 6.643994893921801e-07, "loss": 1.5944, "step": 11835 }, { "epoch": 3.330988887325925, "grad_norm": 3.140625, "learning_rate": 6.616915253259367e-07, "loss": 1.3498, "step": 11840 }, { "epoch": 3.33239555493037, "grad_norm": 3.234375, "learning_rate": 6.589885934084609e-07, "loss": 1.8226, "step": 11845 }, { "epoch": 3.333802222534815, "grad_norm": 3.515625, "learning_rate": 6.562906977141342e-07, "loss": 1.5916, "step": 11850 }, { "epoch": 3.3352088901392603, "grad_norm": 3.75, "learning_rate": 6.535978423097535e-07, "loss": 1.2771, "step": 11855 }, { "epoch": 3.336615557743705, "grad_norm": 3.65625, "learning_rate": 6.509100312545142e-07, "loss": 1.7791, "step": 11860 }, { "epoch": 3.3380222253481504, "grad_norm": 2.84375, "learning_rate": 6.482272686000083e-07, "loss": 1.5699, "step": 11865 }, { "epoch": 3.3394288929525953, "grad_norm": 3.890625, "learning_rate": 6.455495583902175e-07, "loss": 1.5746, "step": 11870 }, { "epoch": 3.3408355605570406, "grad_norm": 4.59375, "learning_rate": 6.428769046615108e-07, "loss": 1.7, "step": 11875 }, { "epoch": 3.3422422281614854, "grad_norm": 3.6875, "learning_rate": 6.402093114426291e-07, "loss": 1.6023, "step": 11880 }, { "epoch": 3.3436488957659307, "grad_norm": 3.859375, "learning_rate": 6.375467827546908e-07, "loss": 1.6378, "step": 11885 }, { "epoch": 3.3450555633703756, "grad_norm": 4.71875, "learning_rate": 6.348893226111775e-07, "loss": 1.561, "step": 11890 }, { "epoch": 3.3464622309748204, "grad_norm": 3.390625, "learning_rate": 6.3223693501793e-07, "loss": 1.6388, "step": 11895 }, { "epoch": 3.3478688985792657, "grad_norm": 3.5625, "learning_rate": 6.29589623973143e-07, "loss": 1.6767, "step": 11900 }, { "epoch": 3.349275566183711, "grad_norm": 2.953125, "learning_rate": 6.269473934673617e-07, "loss": 1.6953, "step": 11905 }, { "epoch": 3.350682233788156, "grad_norm": 4.125, "learning_rate": 6.243102474834679e-07, "loss": 1.6556, "step": 11910 }, { "epoch": 3.3520889013926007, "grad_norm": 3.203125, "learning_rate": 6.21678189996683e-07, "loss": 1.5383, "step": 11915 }, { "epoch": 3.353495568997046, "grad_norm": 4.125, "learning_rate": 6.19051224974557e-07, "loss": 1.2658, "step": 11920 }, { "epoch": 3.354902236601491, "grad_norm": 2.796875, "learning_rate": 6.164293563769618e-07, "loss": 1.6482, "step": 11925 }, { "epoch": 3.356308904205936, "grad_norm": 3.875, "learning_rate": 6.138125881560912e-07, "loss": 1.7493, "step": 11930 }, { "epoch": 3.357715571810381, "grad_norm": 4.5, "learning_rate": 6.112009242564444e-07, "loss": 1.4005, "step": 11935 }, { "epoch": 3.3591222394148263, "grad_norm": 3.0, "learning_rate": 6.085943686148329e-07, "loss": 1.6748, "step": 11940 }, { "epoch": 3.360528907019271, "grad_norm": 2.796875, "learning_rate": 6.059929251603635e-07, "loss": 1.4217, "step": 11945 }, { "epoch": 3.3619355746237165, "grad_norm": 2.796875, "learning_rate": 6.033965978144393e-07, "loss": 1.7179, "step": 11950 }, { "epoch": 3.3633422422281614, "grad_norm": 4.03125, "learning_rate": 6.008053904907489e-07, "loss": 1.5697, "step": 11955 }, { "epoch": 3.3647489098326067, "grad_norm": 3.0, "learning_rate": 5.982193070952677e-07, "loss": 1.4438, "step": 11960 }, { "epoch": 3.3661555774370515, "grad_norm": 4.15625, "learning_rate": 5.956383515262411e-07, "loss": 1.6678, "step": 11965 }, { "epoch": 3.367562245041497, "grad_norm": 3.078125, "learning_rate": 5.930625276741903e-07, "loss": 1.66, "step": 11970 }, { "epoch": 3.3689689126459417, "grad_norm": 3.5625, "learning_rate": 5.904918394218978e-07, "loss": 1.4462, "step": 11975 }, { "epoch": 3.370375580250387, "grad_norm": 3.59375, "learning_rate": 5.879262906444049e-07, "loss": 1.6758, "step": 11980 }, { "epoch": 3.371782247854832, "grad_norm": 3.09375, "learning_rate": 5.853658852090082e-07, "loss": 1.4858, "step": 11985 }, { "epoch": 3.373188915459277, "grad_norm": 3.015625, "learning_rate": 5.828106269752488e-07, "loss": 1.3861, "step": 11990 }, { "epoch": 3.374595583063722, "grad_norm": 2.578125, "learning_rate": 5.802605197949093e-07, "loss": 1.5823, "step": 11995 }, { "epoch": 3.3760022506681673, "grad_norm": 2.984375, "learning_rate": 5.777155675120071e-07, "loss": 1.5136, "step": 12000 }, { "epoch": 3.377408918272612, "grad_norm": 3.765625, "learning_rate": 5.751757739627931e-07, "loss": 1.4484, "step": 12005 }, { "epoch": 3.3788155858770574, "grad_norm": 3.140625, "learning_rate": 5.726411429757347e-07, "loss": 1.6736, "step": 12010 }, { "epoch": 3.3802222534815023, "grad_norm": 15.5625, "learning_rate": 5.701116783715241e-07, "loss": 1.399, "step": 12015 }, { "epoch": 3.381628921085947, "grad_norm": 2.828125, "learning_rate": 5.675873839630627e-07, "loss": 1.7158, "step": 12020 }, { "epoch": 3.3830355886903924, "grad_norm": 3.890625, "learning_rate": 5.65068263555458e-07, "loss": 1.6117, "step": 12025 }, { "epoch": 3.3844422562948377, "grad_norm": 2.96875, "learning_rate": 5.625543209460186e-07, "loss": 1.592, "step": 12030 }, { "epoch": 3.3858489238992826, "grad_norm": 3.203125, "learning_rate": 5.60045559924251e-07, "loss": 1.6539, "step": 12035 }, { "epoch": 3.3872555915037275, "grad_norm": 2.75, "learning_rate": 5.57541984271845e-07, "loss": 1.5038, "step": 12040 }, { "epoch": 3.3886622591081728, "grad_norm": 2.453125, "learning_rate": 5.550435977626797e-07, "loss": 1.4934, "step": 12045 }, { "epoch": 3.390068926712618, "grad_norm": 3.484375, "learning_rate": 5.525504041628095e-07, "loss": 1.4347, "step": 12050 }, { "epoch": 3.391475594317063, "grad_norm": 2.875, "learning_rate": 5.5006240723046e-07, "loss": 1.698, "step": 12055 }, { "epoch": 3.3928822619215078, "grad_norm": 2.953125, "learning_rate": 5.475796107160273e-07, "loss": 1.5576, "step": 12060 }, { "epoch": 3.394288929525953, "grad_norm": 4.25, "learning_rate": 5.451020183620642e-07, "loss": 1.5214, "step": 12065 }, { "epoch": 3.395695597130398, "grad_norm": 4.875, "learning_rate": 5.426296339032812e-07, "loss": 1.5787, "step": 12070 }, { "epoch": 3.3971022647348432, "grad_norm": 3.6875, "learning_rate": 5.401624610665374e-07, "loss": 1.5412, "step": 12075 }, { "epoch": 3.398508932339288, "grad_norm": 3.4375, "learning_rate": 5.377005035708362e-07, "loss": 1.5295, "step": 12080 }, { "epoch": 3.3999155999437334, "grad_norm": 3.296875, "learning_rate": 5.352437651273183e-07, "loss": 1.5275, "step": 12085 }, { "epoch": 3.4013222675481782, "grad_norm": 2.984375, "learning_rate": 5.32792249439261e-07, "loss": 1.542, "step": 12090 }, { "epoch": 3.4027289351526235, "grad_norm": 3.703125, "learning_rate": 5.303459602020646e-07, "loss": 1.3692, "step": 12095 }, { "epoch": 3.4041356027570684, "grad_norm": 1.9296875, "learning_rate": 5.279049011032533e-07, "loss": 1.631, "step": 12100 }, { "epoch": 3.4055422703615137, "grad_norm": 3.671875, "learning_rate": 5.254690758224663e-07, "loss": 1.7059, "step": 12105 }, { "epoch": 3.4069489379659585, "grad_norm": 3.09375, "learning_rate": 5.23038488031454e-07, "loss": 1.3645, "step": 12110 }, { "epoch": 3.408355605570404, "grad_norm": 3.8125, "learning_rate": 5.206131413940711e-07, "loss": 1.4428, "step": 12115 }, { "epoch": 3.4097622731748487, "grad_norm": 2.875, "learning_rate": 5.181930395662744e-07, "loss": 1.6279, "step": 12120 }, { "epoch": 3.411168940779294, "grad_norm": 3.359375, "learning_rate": 5.157781861961115e-07, "loss": 1.7865, "step": 12125 }, { "epoch": 3.412575608383739, "grad_norm": 3.859375, "learning_rate": 5.133685849237191e-07, "loss": 1.6429, "step": 12130 }, { "epoch": 3.413982275988184, "grad_norm": 3.6875, "learning_rate": 5.109642393813201e-07, "loss": 1.6585, "step": 12135 }, { "epoch": 3.415388943592629, "grad_norm": 2.65625, "learning_rate": 5.085651531932087e-07, "loss": 1.6252, "step": 12140 }, { "epoch": 3.4167956111970743, "grad_norm": 2.90625, "learning_rate": 5.061713299757579e-07, "loss": 1.647, "step": 12145 }, { "epoch": 3.418202278801519, "grad_norm": 3.125, "learning_rate": 5.037827733374031e-07, "loss": 1.5837, "step": 12150 }, { "epoch": 3.4196089464059645, "grad_norm": 5.40625, "learning_rate": 5.013994868786429e-07, "loss": 1.3133, "step": 12155 }, { "epoch": 3.4210156140104093, "grad_norm": 3.375, "learning_rate": 4.990214741920287e-07, "loss": 1.81, "step": 12160 }, { "epoch": 3.422422281614854, "grad_norm": 2.875, "learning_rate": 4.966487388621679e-07, "loss": 1.3786, "step": 12165 }, { "epoch": 3.4238289492192995, "grad_norm": 3.515625, "learning_rate": 4.942812844657061e-07, "loss": 1.3051, "step": 12170 }, { "epoch": 3.4252356168237448, "grad_norm": 4.4375, "learning_rate": 4.919191145713335e-07, "loss": 1.7033, "step": 12175 }, { "epoch": 3.4266422844281896, "grad_norm": 2.875, "learning_rate": 4.895622327397722e-07, "loss": 1.54, "step": 12180 }, { "epoch": 3.4280489520326345, "grad_norm": 3.921875, "learning_rate": 4.872106425237734e-07, "loss": 1.4154, "step": 12185 }, { "epoch": 3.42945561963708, "grad_norm": 3.390625, "learning_rate": 4.848643474681115e-07, "loss": 1.3742, "step": 12190 }, { "epoch": 3.4308622872415246, "grad_norm": 3.65625, "learning_rate": 4.82523351109581e-07, "loss": 1.5347, "step": 12195 }, { "epoch": 3.43226895484597, "grad_norm": 3.328125, "learning_rate": 4.801876569769865e-07, "loss": 1.6441, "step": 12200 }, { "epoch": 3.433675622450415, "grad_norm": 2.984375, "learning_rate": 4.778572685911402e-07, "loss": 1.4506, "step": 12205 }, { "epoch": 3.43508229005486, "grad_norm": 2.78125, "learning_rate": 4.7553218946486007e-07, "loss": 1.4641, "step": 12210 }, { "epoch": 3.436488957659305, "grad_norm": 4.1875, "learning_rate": 4.732124231029546e-07, "loss": 1.6343, "step": 12215 }, { "epoch": 3.4378956252637503, "grad_norm": 3.734375, "learning_rate": 4.708979730022307e-07, "loss": 1.3519, "step": 12220 }, { "epoch": 3.439302292868195, "grad_norm": 4.375, "learning_rate": 4.6858884265147705e-07, "loss": 1.6333, "step": 12225 }, { "epoch": 3.4407089604726404, "grad_norm": 3.09375, "learning_rate": 4.662850355314649e-07, "loss": 1.4561, "step": 12230 }, { "epoch": 3.4421156280770853, "grad_norm": 3.9375, "learning_rate": 4.6398655511494e-07, "loss": 1.569, "step": 12235 }, { "epoch": 3.4435222956815306, "grad_norm": 3.3125, "learning_rate": 4.6169340486662234e-07, "loss": 1.684, "step": 12240 }, { "epoch": 3.4449289632859754, "grad_norm": 2.609375, "learning_rate": 4.594055882431913e-07, "loss": 1.3547, "step": 12245 }, { "epoch": 3.4463356308904207, "grad_norm": 3.90625, "learning_rate": 4.571231086932923e-07, "loss": 1.6273, "step": 12250 }, { "epoch": 3.4477422984948656, "grad_norm": 3.234375, "learning_rate": 4.54845969657522e-07, "loss": 1.7027, "step": 12255 }, { "epoch": 3.449148966099311, "grad_norm": 3.46875, "learning_rate": 4.525741745684284e-07, "loss": 1.5842, "step": 12260 }, { "epoch": 3.4505556337037557, "grad_norm": 4.46875, "learning_rate": 4.50307726850502e-07, "loss": 1.6175, "step": 12265 }, { "epoch": 3.451962301308201, "grad_norm": 3.140625, "learning_rate": 4.480466299201766e-07, "loss": 1.4567, "step": 12270 }, { "epoch": 3.453368968912646, "grad_norm": 4.4375, "learning_rate": 4.457908871858169e-07, "loss": 1.4125, "step": 12275 }, { "epoch": 3.454775636517091, "grad_norm": 3.25, "learning_rate": 4.435405020477172e-07, "loss": 1.5601, "step": 12280 }, { "epoch": 3.456182304121536, "grad_norm": 4.28125, "learning_rate": 4.412954778980968e-07, "loss": 1.6602, "step": 12285 }, { "epoch": 3.4575889717259813, "grad_norm": 2.765625, "learning_rate": 4.390558181210928e-07, "loss": 1.8053, "step": 12290 }, { "epoch": 3.458995639330426, "grad_norm": 3.40625, "learning_rate": 4.368215260927588e-07, "loss": 1.5461, "step": 12295 }, { "epoch": 3.4604023069348715, "grad_norm": 3.3125, "learning_rate": 4.3459260518105134e-07, "loss": 1.5155, "step": 12300 }, { "epoch": 3.4618089745393164, "grad_norm": 4.5, "learning_rate": 4.3236905874583704e-07, "loss": 1.3204, "step": 12305 }, { "epoch": 3.463215642143761, "grad_norm": 2.328125, "learning_rate": 4.3015089013887753e-07, "loss": 1.4738, "step": 12310 }, { "epoch": 3.4646223097482065, "grad_norm": 3.390625, "learning_rate": 4.279381027038278e-07, "loss": 1.5011, "step": 12315 }, { "epoch": 3.466028977352652, "grad_norm": 4.59375, "learning_rate": 4.257306997762322e-07, "loss": 1.3831, "step": 12320 }, { "epoch": 3.4674356449570967, "grad_norm": 4.21875, "learning_rate": 4.235286846835202e-07, "loss": 1.5323, "step": 12325 }, { "epoch": 3.4688423125615415, "grad_norm": 3.109375, "learning_rate": 4.2133206074499527e-07, "loss": 1.5963, "step": 12330 }, { "epoch": 3.470248980165987, "grad_norm": 3.875, "learning_rate": 4.191408312718385e-07, "loss": 1.6031, "step": 12335 }, { "epoch": 3.4716556477704317, "grad_norm": 3.453125, "learning_rate": 4.169549995670971e-07, "loss": 1.6792, "step": 12340 }, { "epoch": 3.473062315374877, "grad_norm": 4.375, "learning_rate": 4.147745689256821e-07, "loss": 1.526, "step": 12345 }, { "epoch": 3.474468982979322, "grad_norm": 2.96875, "learning_rate": 4.1259954263436426e-07, "loss": 1.6437, "step": 12350 }, { "epoch": 3.475875650583767, "grad_norm": 4.125, "learning_rate": 4.104299239717668e-07, "loss": 1.5831, "step": 12355 }, { "epoch": 3.477282318188212, "grad_norm": 3.203125, "learning_rate": 4.082657162083607e-07, "loss": 1.6606, "step": 12360 }, { "epoch": 3.4786889857926573, "grad_norm": 5.875, "learning_rate": 4.0610692260646085e-07, "loss": 1.6935, "step": 12365 }, { "epoch": 3.480095653397102, "grad_norm": 3.140625, "learning_rate": 4.039535464202242e-07, "loss": 1.6452, "step": 12370 }, { "epoch": 3.4815023210015474, "grad_norm": 2.75, "learning_rate": 4.018055908956355e-07, "loss": 1.5527, "step": 12375 }, { "epoch": 3.4829089886059923, "grad_norm": 3.4375, "learning_rate": 3.9966305927051416e-07, "loss": 1.5381, "step": 12380 }, { "epoch": 3.4843156562104376, "grad_norm": 2.984375, "learning_rate": 3.975259547744998e-07, "loss": 1.6809, "step": 12385 }, { "epoch": 3.4857223238148825, "grad_norm": 4.28125, "learning_rate": 3.953942806290533e-07, "loss": 1.3621, "step": 12390 }, { "epoch": 3.4871289914193278, "grad_norm": 4.4375, "learning_rate": 3.9326804004744794e-07, "loss": 1.4407, "step": 12395 }, { "epoch": 3.4885356590237726, "grad_norm": 3.609375, "learning_rate": 3.911472362347701e-07, "loss": 1.7222, "step": 12400 }, { "epoch": 3.489942326628218, "grad_norm": 4.375, "learning_rate": 3.8903187238790514e-07, "loss": 1.7223, "step": 12405 }, { "epoch": 3.4913489942326628, "grad_norm": 3.4375, "learning_rate": 3.869219516955442e-07, "loss": 1.7887, "step": 12410 }, { "epoch": 3.492755661837108, "grad_norm": 4.0625, "learning_rate": 3.8481747733816984e-07, "loss": 1.5084, "step": 12415 }, { "epoch": 3.494162329441553, "grad_norm": 3.25, "learning_rate": 3.827184524880542e-07, "loss": 1.6298, "step": 12420 }, { "epoch": 3.495568997045998, "grad_norm": 2.46875, "learning_rate": 3.8062488030925887e-07, "loss": 1.5018, "step": 12425 }, { "epoch": 3.496975664650443, "grad_norm": 3.296875, "learning_rate": 3.785367639576225e-07, "loss": 1.8452, "step": 12430 }, { "epoch": 3.498382332254888, "grad_norm": 3.453125, "learning_rate": 3.764541065807609e-07, "loss": 1.562, "step": 12435 }, { "epoch": 3.4997889998593332, "grad_norm": 3.0625, "learning_rate": 3.7437691131806083e-07, "loss": 1.6698, "step": 12440 }, { "epoch": 3.5011956674637785, "grad_norm": 4.34375, "learning_rate": 3.723051813006752e-07, "loss": 1.2999, "step": 12445 }, { "epoch": 3.5026023350682234, "grad_norm": 3.265625, "learning_rate": 3.7023891965151853e-07, "loss": 1.5353, "step": 12450 }, { "epoch": 3.5040090026726682, "grad_norm": 3.546875, "learning_rate": 3.6817812948526506e-07, "loss": 1.4993, "step": 12455 }, { "epoch": 3.5054156702771135, "grad_norm": 3.15625, "learning_rate": 3.66122813908337e-07, "loss": 1.4794, "step": 12460 }, { "epoch": 3.506822337881559, "grad_norm": 2.953125, "learning_rate": 3.6407297601890763e-07, "loss": 1.6521, "step": 12465 }, { "epoch": 3.5082290054860037, "grad_norm": 4.625, "learning_rate": 3.6202861890689105e-07, "loss": 1.4697, "step": 12470 }, { "epoch": 3.5096356730904485, "grad_norm": 2.765625, "learning_rate": 3.599897456539409e-07, "loss": 1.6966, "step": 12475 }, { "epoch": 3.511042340694894, "grad_norm": 2.75, "learning_rate": 3.5795635933344313e-07, "loss": 1.6049, "step": 12480 }, { "epoch": 3.5124490082993387, "grad_norm": 3.5625, "learning_rate": 3.5592846301051525e-07, "loss": 1.5986, "step": 12485 }, { "epoch": 3.513855675903784, "grad_norm": 4.1875, "learning_rate": 3.5390605974199697e-07, "loss": 1.4704, "step": 12490 }, { "epoch": 3.515262343508229, "grad_norm": 3.203125, "learning_rate": 3.518891525764474e-07, "loss": 1.5354, "step": 12495 }, { "epoch": 3.516669011112674, "grad_norm": 3.40625, "learning_rate": 3.4987774455414434e-07, "loss": 1.3862, "step": 12500 }, { "epoch": 3.518075678717119, "grad_norm": 3.53125, "learning_rate": 3.478718387070705e-07, "loss": 1.7165, "step": 12505 }, { "epoch": 3.5194823463215643, "grad_norm": 5.125, "learning_rate": 3.458714380589205e-07, "loss": 1.5756, "step": 12510 }, { "epoch": 3.520889013926009, "grad_norm": 4.6875, "learning_rate": 3.438765456250867e-07, "loss": 1.5997, "step": 12515 }, { "epoch": 3.5222956815304545, "grad_norm": 3.21875, "learning_rate": 3.418871644126593e-07, "loss": 1.5862, "step": 12520 }, { "epoch": 3.5237023491348993, "grad_norm": 2.890625, "learning_rate": 3.399032974204212e-07, "loss": 1.647, "step": 12525 }, { "epoch": 3.5251090167393446, "grad_norm": 3.484375, "learning_rate": 3.3792494763884527e-07, "loss": 1.6057, "step": 12530 }, { "epoch": 3.5265156843437895, "grad_norm": 2.8125, "learning_rate": 3.3595211805008193e-07, "loss": 1.5112, "step": 12535 }, { "epoch": 3.527922351948235, "grad_norm": 4.03125, "learning_rate": 3.339848116279671e-07, "loss": 1.3215, "step": 12540 }, { "epoch": 3.5293290195526796, "grad_norm": 3.8125, "learning_rate": 3.3202303133800724e-07, "loss": 1.5533, "step": 12545 }, { "epoch": 3.530735687157125, "grad_norm": 3.109375, "learning_rate": 3.300667801373791e-07, "loss": 1.6153, "step": 12550 }, { "epoch": 3.53214235476157, "grad_norm": 2.953125, "learning_rate": 3.281160609749265e-07, "loss": 1.5961, "step": 12555 }, { "epoch": 3.5335490223660146, "grad_norm": 3.265625, "learning_rate": 3.261708767911533e-07, "loss": 1.4118, "step": 12560 }, { "epoch": 3.53495568997046, "grad_norm": 2.5, "learning_rate": 3.242312305182193e-07, "loss": 1.4728, "step": 12565 }, { "epoch": 3.5363623575749052, "grad_norm": 3.9375, "learning_rate": 3.222971250799373e-07, "loss": 1.6069, "step": 12570 }, { "epoch": 3.53776902517935, "grad_norm": 2.40625, "learning_rate": 3.2036856339176897e-07, "loss": 1.6743, "step": 12575 }, { "epoch": 3.539175692783795, "grad_norm": 2.75, "learning_rate": 3.1844554836081596e-07, "loss": 1.7418, "step": 12580 }, { "epoch": 3.5405823603882403, "grad_norm": 3.71875, "learning_rate": 3.16528082885823e-07, "loss": 1.7357, "step": 12585 }, { "epoch": 3.5419890279926856, "grad_norm": 3.640625, "learning_rate": 3.1461616985716655e-07, "loss": 1.7516, "step": 12590 }, { "epoch": 3.5433956955971304, "grad_norm": 3.234375, "learning_rate": 3.12709812156855e-07, "loss": 1.7343, "step": 12595 }, { "epoch": 3.5448023632015753, "grad_norm": 2.921875, "learning_rate": 3.1080901265852034e-07, "loss": 1.5757, "step": 12600 }, { "epoch": 3.5462090308060206, "grad_norm": 2.96875, "learning_rate": 3.0891377422742084e-07, "loss": 1.693, "step": 12605 }, { "epoch": 3.547615698410466, "grad_norm": 3.09375, "learning_rate": 3.070240997204254e-07, "loss": 1.5975, "step": 12610 }, { "epoch": 3.5490223660149107, "grad_norm": 4.625, "learning_rate": 3.051399919860222e-07, "loss": 1.6121, "step": 12615 }, { "epoch": 3.5504290336193556, "grad_norm": 4.09375, "learning_rate": 3.0326145386430433e-07, "loss": 1.4582, "step": 12620 }, { "epoch": 3.551835701223801, "grad_norm": 4.25, "learning_rate": 3.013884881869702e-07, "loss": 1.5848, "step": 12625 }, { "epoch": 3.5532423688282457, "grad_norm": 3.109375, "learning_rate": 2.9952109777731947e-07, "loss": 1.6932, "step": 12630 }, { "epoch": 3.554649036432691, "grad_norm": 4.84375, "learning_rate": 2.97659285450246e-07, "loss": 1.7294, "step": 12635 }, { "epoch": 3.556055704037136, "grad_norm": 2.796875, "learning_rate": 2.958030540122358e-07, "loss": 1.7316, "step": 12640 }, { "epoch": 3.557462371641581, "grad_norm": 2.671875, "learning_rate": 2.9395240626136276e-07, "loss": 1.7024, "step": 12645 }, { "epoch": 3.558869039246026, "grad_norm": 3.203125, "learning_rate": 2.9210734498728375e-07, "loss": 1.6179, "step": 12650 }, { "epoch": 3.5602757068504713, "grad_norm": 3.546875, "learning_rate": 2.902678729712336e-07, "loss": 1.6973, "step": 12655 }, { "epoch": 3.561682374454916, "grad_norm": 3.140625, "learning_rate": 2.8843399298602446e-07, "loss": 1.3544, "step": 12660 }, { "epoch": 3.5630890420593615, "grad_norm": 3.5625, "learning_rate": 2.866057077960353e-07, "loss": 1.7027, "step": 12665 }, { "epoch": 3.5644957096638064, "grad_norm": 3.09375, "learning_rate": 2.847830201572159e-07, "loss": 1.9461, "step": 12670 }, { "epoch": 3.5659023772682517, "grad_norm": 4.03125, "learning_rate": 2.8296593281707457e-07, "loss": 1.4954, "step": 12675 }, { "epoch": 3.5673090448726965, "grad_norm": 4.09375, "learning_rate": 2.811544485146804e-07, "loss": 1.6632, "step": 12680 }, { "epoch": 3.568715712477142, "grad_norm": 3.65625, "learning_rate": 2.793485699806535e-07, "loss": 1.5274, "step": 12685 }, { "epoch": 3.5701223800815867, "grad_norm": 4.46875, "learning_rate": 2.7754829993716876e-07, "loss": 1.5179, "step": 12690 }, { "epoch": 3.571529047686032, "grad_norm": 2.703125, "learning_rate": 2.757536410979404e-07, "loss": 1.404, "step": 12695 }, { "epoch": 3.572935715290477, "grad_norm": 4.21875, "learning_rate": 2.7396459616822974e-07, "loss": 1.4433, "step": 12700 }, { "epoch": 3.5743423828949217, "grad_norm": 3.65625, "learning_rate": 2.721811678448347e-07, "loss": 1.7107, "step": 12705 }, { "epoch": 3.575749050499367, "grad_norm": 3.96875, "learning_rate": 2.70403358816083e-07, "loss": 1.3703, "step": 12710 }, { "epoch": 3.5771557181038123, "grad_norm": 4.21875, "learning_rate": 2.6863117176183727e-07, "loss": 1.4843, "step": 12715 }, { "epoch": 3.578562385708257, "grad_norm": 3.140625, "learning_rate": 2.6686460935348187e-07, "loss": 1.2866, "step": 12720 }, { "epoch": 3.579969053312702, "grad_norm": 3.296875, "learning_rate": 2.651036742539241e-07, "loss": 1.511, "step": 12725 }, { "epoch": 3.5813757209171473, "grad_norm": 4.0625, "learning_rate": 2.633483691175877e-07, "loss": 1.5051, "step": 12730 }, { "epoch": 3.5827823885215926, "grad_norm": 3.1875, "learning_rate": 2.6159869659041176e-07, "loss": 1.5787, "step": 12735 }, { "epoch": 3.5841890561260374, "grad_norm": 5.59375, "learning_rate": 2.5985465930984163e-07, "loss": 1.4837, "step": 12740 }, { "epoch": 3.5855957237304823, "grad_norm": 3.453125, "learning_rate": 2.5811625990483164e-07, "loss": 1.4975, "step": 12745 }, { "epoch": 3.5870023913349276, "grad_norm": 3.703125, "learning_rate": 2.563835009958355e-07, "loss": 1.624, "step": 12750 }, { "epoch": 3.588409058939373, "grad_norm": 3.015625, "learning_rate": 2.546563851948047e-07, "loss": 1.5078, "step": 12755 }, { "epoch": 3.5898157265438178, "grad_norm": 3.34375, "learning_rate": 2.5293491510518425e-07, "loss": 1.5151, "step": 12760 }, { "epoch": 3.5912223941482626, "grad_norm": 3.640625, "learning_rate": 2.5121909332191047e-07, "loss": 1.4745, "step": 12765 }, { "epoch": 3.592629061752708, "grad_norm": 3.90625, "learning_rate": 2.4950892243140285e-07, "loss": 1.6577, "step": 12770 }, { "epoch": 3.5940357293571528, "grad_norm": 2.203125, "learning_rate": 2.478044050115646e-07, "loss": 1.5741, "step": 12775 }, { "epoch": 3.595442396961598, "grad_norm": 2.921875, "learning_rate": 2.4610554363177647e-07, "loss": 1.4498, "step": 12780 }, { "epoch": 3.596849064566043, "grad_norm": 4.21875, "learning_rate": 2.4441234085289264e-07, "loss": 1.7303, "step": 12785 }, { "epoch": 3.598255732170488, "grad_norm": 3.40625, "learning_rate": 2.4272479922723897e-07, "loss": 1.6942, "step": 12790 }, { "epoch": 3.599662399774933, "grad_norm": 3.0, "learning_rate": 2.410429212986065e-07, "loss": 1.4784, "step": 12795 }, { "epoch": 3.6010690673793784, "grad_norm": 3.03125, "learning_rate": 2.3936670960224935e-07, "loss": 1.6887, "step": 12800 }, { "epoch": 3.6024757349838232, "grad_norm": 3.671875, "learning_rate": 2.3769616666488024e-07, "loss": 1.6325, "step": 12805 }, { "epoch": 3.6038824025882685, "grad_norm": 2.828125, "learning_rate": 2.36031295004667e-07, "loss": 1.6284, "step": 12810 }, { "epoch": 3.6052890701927134, "grad_norm": 4.4375, "learning_rate": 2.3437209713122707e-07, "loss": 1.458, "step": 12815 }, { "epoch": 3.6066957377971587, "grad_norm": 4.46875, "learning_rate": 2.3271857554562914e-07, "loss": 1.4742, "step": 12820 }, { "epoch": 3.6081024054016035, "grad_norm": 3.46875, "learning_rate": 2.3107073274038157e-07, "loss": 1.5134, "step": 12825 }, { "epoch": 3.6095090730060484, "grad_norm": 3.375, "learning_rate": 2.2942857119943392e-07, "loss": 1.6525, "step": 12830 }, { "epoch": 3.6109157406104937, "grad_norm": 2.71875, "learning_rate": 2.277920933981723e-07, "loss": 1.8801, "step": 12835 }, { "epoch": 3.612322408214939, "grad_norm": 3.84375, "learning_rate": 2.2616130180341408e-07, "loss": 1.4974, "step": 12840 }, { "epoch": 3.613729075819384, "grad_norm": 2.3125, "learning_rate": 2.245361988734076e-07, "loss": 1.6507, "step": 12845 }, { "epoch": 3.6151357434238287, "grad_norm": 3.15625, "learning_rate": 2.2291678705782303e-07, "loss": 1.811, "step": 12850 }, { "epoch": 3.616542411028274, "grad_norm": 3.859375, "learning_rate": 2.2130306879775396e-07, "loss": 1.6014, "step": 12855 }, { "epoch": 3.6179490786327193, "grad_norm": 3.28125, "learning_rate": 2.1969504652571014e-07, "loss": 1.7031, "step": 12860 }, { "epoch": 3.619355746237164, "grad_norm": 2.625, "learning_rate": 2.1809272266561796e-07, "loss": 1.7339, "step": 12865 }, { "epoch": 3.620762413841609, "grad_norm": 4.34375, "learning_rate": 2.1649609963280892e-07, "loss": 1.6556, "step": 12870 }, { "epoch": 3.6221690814460543, "grad_norm": 2.390625, "learning_rate": 2.1490517983402667e-07, "loss": 1.6822, "step": 12875 }, { "epoch": 3.6235757490504996, "grad_norm": 3.640625, "learning_rate": 2.1331996566741473e-07, "loss": 1.4878, "step": 12880 }, { "epoch": 3.6249824166549445, "grad_norm": 2.71875, "learning_rate": 2.1174045952251674e-07, "loss": 1.7963, "step": 12885 }, { "epoch": 3.6263890842593893, "grad_norm": 2.796875, "learning_rate": 2.1016666378027127e-07, "loss": 1.6533, "step": 12890 }, { "epoch": 3.6277957518638346, "grad_norm": 3.734375, "learning_rate": 2.085985808130113e-07, "loss": 1.6496, "step": 12895 }, { "epoch": 3.6292024194682795, "grad_norm": 3.5625, "learning_rate": 2.070362129844554e-07, "loss": 1.4314, "step": 12900 }, { "epoch": 3.630609087072725, "grad_norm": 3.984375, "learning_rate": 2.0547956264970946e-07, "loss": 1.3795, "step": 12905 }, { "epoch": 3.6320157546771696, "grad_norm": 3.140625, "learning_rate": 2.0392863215525957e-07, "loss": 1.4565, "step": 12910 }, { "epoch": 3.633422422281615, "grad_norm": 3.15625, "learning_rate": 2.0238342383897032e-07, "loss": 1.4031, "step": 12915 }, { "epoch": 3.63482908988606, "grad_norm": 4.09375, "learning_rate": 2.0084394003008165e-07, "loss": 1.5884, "step": 12920 }, { "epoch": 3.636235757490505, "grad_norm": 4.0625, "learning_rate": 1.9931018304920256e-07, "loss": 1.7368, "step": 12925 }, { "epoch": 3.63764242509495, "grad_norm": 2.9375, "learning_rate": 1.9778215520831076e-07, "loss": 1.5598, "step": 12930 }, { "epoch": 3.6390490926993952, "grad_norm": 3.46875, "learning_rate": 1.9625985881074603e-07, "loss": 1.4654, "step": 12935 }, { "epoch": 3.64045576030384, "grad_norm": 2.96875, "learning_rate": 1.9474329615121232e-07, "loss": 1.6227, "step": 12940 }, { "epoch": 3.6418624279082854, "grad_norm": 3.5625, "learning_rate": 1.9323246951576633e-07, "loss": 1.4179, "step": 12945 }, { "epoch": 3.6432690955127303, "grad_norm": 4.0625, "learning_rate": 1.9172738118182098e-07, "loss": 1.417, "step": 12950 }, { "epoch": 3.6446757631171756, "grad_norm": 3.359375, "learning_rate": 1.902280334181392e-07, "loss": 1.65, "step": 12955 }, { "epoch": 3.6460824307216204, "grad_norm": 3.21875, "learning_rate": 1.8873442848482868e-07, "loss": 1.5986, "step": 12960 }, { "epoch": 3.6474890983260657, "grad_norm": 3.65625, "learning_rate": 1.872465686333422e-07, "loss": 1.6104, "step": 12965 }, { "epoch": 3.6488957659305106, "grad_norm": 3.703125, "learning_rate": 1.857644561064733e-07, "loss": 1.5107, "step": 12970 }, { "epoch": 3.6503024335349554, "grad_norm": 3.453125, "learning_rate": 1.842880931383477e-07, "loss": 1.5454, "step": 12975 }, { "epoch": 3.6517091011394007, "grad_norm": 3.28125, "learning_rate": 1.8281748195443015e-07, "loss": 1.416, "step": 12980 }, { "epoch": 3.653115768743846, "grad_norm": 3.46875, "learning_rate": 1.8135262477151092e-07, "loss": 1.6158, "step": 12985 }, { "epoch": 3.654522436348291, "grad_norm": 3.046875, "learning_rate": 1.7989352379770773e-07, "loss": 1.5509, "step": 12990 }, { "epoch": 3.6559291039527357, "grad_norm": 3.65625, "learning_rate": 1.7844018123246295e-07, "loss": 1.493, "step": 12995 }, { "epoch": 3.657335771557181, "grad_norm": 2.734375, "learning_rate": 1.7699259926653665e-07, "loss": 1.7625, "step": 13000 }, { "epoch": 3.6587424391616263, "grad_norm": 4.96875, "learning_rate": 1.7555078008200685e-07, "loss": 1.3462, "step": 13005 }, { "epoch": 3.660149106766071, "grad_norm": 2.90625, "learning_rate": 1.741147258522635e-07, "loss": 1.5288, "step": 13010 }, { "epoch": 3.661555774370516, "grad_norm": 3.21875, "learning_rate": 1.7268443874200834e-07, "loss": 1.7067, "step": 13015 }, { "epoch": 3.6629624419749613, "grad_norm": 2.75, "learning_rate": 1.712599209072474e-07, "loss": 1.7385, "step": 13020 }, { "epoch": 3.6643691095794066, "grad_norm": 3.984375, "learning_rate": 1.6984117449529324e-07, "loss": 1.5687, "step": 13025 }, { "epoch": 3.6657757771838515, "grad_norm": 4.03125, "learning_rate": 1.684282016447547e-07, "loss": 1.7137, "step": 13030 }, { "epoch": 3.6671824447882964, "grad_norm": 3.484375, "learning_rate": 1.670210044855409e-07, "loss": 1.5094, "step": 13035 }, { "epoch": 3.6685891123927417, "grad_norm": 2.4375, "learning_rate": 1.6561958513885332e-07, "loss": 1.7145, "step": 13040 }, { "epoch": 3.6699957799971865, "grad_norm": 3.828125, "learning_rate": 1.6422394571718435e-07, "loss": 1.4869, "step": 13045 }, { "epoch": 3.671402447601632, "grad_norm": 2.984375, "learning_rate": 1.628340883243129e-07, "loss": 1.6588, "step": 13050 }, { "epoch": 3.6728091152060767, "grad_norm": 3.140625, "learning_rate": 1.6145001505530353e-07, "loss": 1.6234, "step": 13055 }, { "epoch": 3.674215782810522, "grad_norm": 3.03125, "learning_rate": 1.6007172799650027e-07, "loss": 1.4941, "step": 13060 }, { "epoch": 3.675622450414967, "grad_norm": 4.6875, "learning_rate": 1.5869922922552649e-07, "loss": 1.6677, "step": 13065 }, { "epoch": 3.677029118019412, "grad_norm": 3.15625, "learning_rate": 1.573325208112801e-07, "loss": 1.5113, "step": 13070 }, { "epoch": 3.678435785623857, "grad_norm": 3.09375, "learning_rate": 1.5597160481392834e-07, "loss": 1.4624, "step": 13075 }, { "epoch": 3.6798424532283023, "grad_norm": 2.96875, "learning_rate": 1.5461648328491106e-07, "loss": 1.333, "step": 13080 }, { "epoch": 3.681249120832747, "grad_norm": 3.8125, "learning_rate": 1.5326715826693027e-07, "loss": 1.5943, "step": 13085 }, { "epoch": 3.6826557884371924, "grad_norm": 3.65625, "learning_rate": 1.519236317939514e-07, "loss": 1.5913, "step": 13090 }, { "epoch": 3.6840624560416373, "grad_norm": 4.03125, "learning_rate": 1.5058590589119936e-07, "loss": 1.59, "step": 13095 }, { "epoch": 3.6854691236460826, "grad_norm": 3.359375, "learning_rate": 1.492539825751562e-07, "loss": 1.6069, "step": 13100 }, { "epoch": 3.6868757912505274, "grad_norm": 3.25, "learning_rate": 1.4792786385355415e-07, "loss": 1.8708, "step": 13105 }, { "epoch": 3.6882824588549727, "grad_norm": 3.21875, "learning_rate": 1.4660755172537953e-07, "loss": 1.4281, "step": 13110 }, { "epoch": 3.6896891264594176, "grad_norm": 2.484375, "learning_rate": 1.4529304818086297e-07, "loss": 1.8313, "step": 13115 }, { "epoch": 3.6910957940638625, "grad_norm": 3.375, "learning_rate": 1.4398435520147988e-07, "loss": 1.4211, "step": 13120 }, { "epoch": 3.6925024616683078, "grad_norm": 4.03125, "learning_rate": 1.426814747599483e-07, "loss": 1.5624, "step": 13125 }, { "epoch": 3.693909129272753, "grad_norm": 3.578125, "learning_rate": 1.4138440882022297e-07, "loss": 1.5928, "step": 13130 }, { "epoch": 3.695315796877198, "grad_norm": 2.9375, "learning_rate": 1.4009315933749411e-07, "loss": 1.6827, "step": 13135 }, { "epoch": 3.6967224644816428, "grad_norm": 3.109375, "learning_rate": 1.388077282581852e-07, "loss": 1.6348, "step": 13140 }, { "epoch": 3.698129132086088, "grad_norm": 3.71875, "learning_rate": 1.375281175199472e-07, "loss": 1.5618, "step": 13145 }, { "epoch": 3.6995357996905334, "grad_norm": 3.671875, "learning_rate": 1.362543290516589e-07, "loss": 1.621, "step": 13150 }, { "epoch": 3.700942467294978, "grad_norm": 3.78125, "learning_rate": 1.3498636477342307e-07, "loss": 1.6355, "step": 13155 }, { "epoch": 3.702349134899423, "grad_norm": 3.375, "learning_rate": 1.337242265965619e-07, "loss": 1.584, "step": 13160 }, { "epoch": 3.7037558025038684, "grad_norm": 3.640625, "learning_rate": 1.3246791642361622e-07, "loss": 1.5461, "step": 13165 }, { "epoch": 3.7051624701083137, "grad_norm": 3.90625, "learning_rate": 1.3121743614834135e-07, "loss": 1.7194, "step": 13170 }, { "epoch": 3.7065691377127585, "grad_norm": 4.59375, "learning_rate": 1.2997278765570463e-07, "loss": 1.1595, "step": 13175 }, { "epoch": 3.7079758053172034, "grad_norm": 2.9375, "learning_rate": 1.2873397282188215e-07, "loss": 1.7544, "step": 13180 }, { "epoch": 3.7093824729216487, "grad_norm": 2.15625, "learning_rate": 1.2750099351425792e-07, "loss": 1.6543, "step": 13185 }, { "epoch": 3.7107891405260935, "grad_norm": 6.375, "learning_rate": 1.2627385159141812e-07, "loss": 1.4372, "step": 13190 }, { "epoch": 3.712195808130539, "grad_norm": 3.109375, "learning_rate": 1.250525489031493e-07, "loss": 1.525, "step": 13195 }, { "epoch": 3.7136024757349837, "grad_norm": 3.78125, "learning_rate": 1.2383708729043886e-07, "loss": 1.7147, "step": 13200 }, { "epoch": 3.715009143339429, "grad_norm": 3.75, "learning_rate": 1.2262746858546468e-07, "loss": 1.4541, "step": 13205 }, { "epoch": 3.716415810943874, "grad_norm": 2.9375, "learning_rate": 1.214236946116012e-07, "loss": 1.696, "step": 13210 }, { "epoch": 3.717822478548319, "grad_norm": 3.1875, "learning_rate": 1.2022576718341104e-07, "loss": 1.614, "step": 13215 }, { "epoch": 3.719229146152764, "grad_norm": 3.03125, "learning_rate": 1.1903368810664315e-07, "loss": 1.7279, "step": 13220 }, { "epoch": 3.7206358137572093, "grad_norm": 6.03125, "learning_rate": 1.1784745917823169e-07, "loss": 1.4992, "step": 13225 }, { "epoch": 3.722042481361654, "grad_norm": 2.515625, "learning_rate": 1.1666708218629206e-07, "loss": 1.4643, "step": 13230 }, { "epoch": 3.7234491489660995, "grad_norm": 3.375, "learning_rate": 1.1549255891011788e-07, "loss": 1.3531, "step": 13235 }, { "epoch": 3.7248558165705443, "grad_norm": 2.984375, "learning_rate": 1.1432389112017959e-07, "loss": 1.5313, "step": 13240 }, { "epoch": 3.726262484174989, "grad_norm": 3.03125, "learning_rate": 1.1316108057812135e-07, "loss": 1.5671, "step": 13245 }, { "epoch": 3.7276691517794345, "grad_norm": 3.34375, "learning_rate": 1.1200412903675749e-07, "loss": 1.5695, "step": 13250 }, { "epoch": 3.7290758193838798, "grad_norm": 3.703125, "learning_rate": 1.1085303824006986e-07, "loss": 1.505, "step": 13255 }, { "epoch": 3.7304824869883246, "grad_norm": 4.15625, "learning_rate": 1.0970780992320871e-07, "loss": 1.4891, "step": 13260 }, { "epoch": 3.7318891545927695, "grad_norm": 3.390625, "learning_rate": 1.0856844581248292e-07, "loss": 1.6268, "step": 13265 }, { "epoch": 3.733295822197215, "grad_norm": 3.78125, "learning_rate": 1.0743494762536486e-07, "loss": 1.4452, "step": 13270 }, { "epoch": 3.73470248980166, "grad_norm": 3.75, "learning_rate": 1.0630731707048513e-07, "loss": 1.5661, "step": 13275 }, { "epoch": 3.736109157406105, "grad_norm": 3.5625, "learning_rate": 1.0518555584762578e-07, "loss": 1.5987, "step": 13280 }, { "epoch": 3.73751582501055, "grad_norm": 3.40625, "learning_rate": 1.0406966564772578e-07, "loss": 1.6404, "step": 13285 }, { "epoch": 3.738922492614995, "grad_norm": 3.953125, "learning_rate": 1.0295964815287117e-07, "loss": 1.5056, "step": 13290 }, { "epoch": 3.7403291602194404, "grad_norm": 3.140625, "learning_rate": 1.0185550503629725e-07, "loss": 1.595, "step": 13295 }, { "epoch": 3.7417358278238853, "grad_norm": 3.171875, "learning_rate": 1.0075723796238244e-07, "loss": 1.3048, "step": 13300 }, { "epoch": 3.74314249542833, "grad_norm": 3.703125, "learning_rate": 9.966484858665003e-08, "loss": 1.5691, "step": 13305 }, { "epoch": 3.7445491630327754, "grad_norm": 3.0625, "learning_rate": 9.857833855576103e-08, "loss": 1.504, "step": 13310 }, { "epoch": 3.7459558306372203, "grad_norm": 3.203125, "learning_rate": 9.749770950751601e-08, "loss": 1.6968, "step": 13315 }, { "epoch": 3.7473624982416656, "grad_norm": 3.140625, "learning_rate": 9.642296307084885e-08, "loss": 1.7236, "step": 13320 }, { "epoch": 3.7487691658461104, "grad_norm": 3.609375, "learning_rate": 9.535410086582718e-08, "loss": 1.3874, "step": 13325 }, { "epoch": 3.7501758334505557, "grad_norm": 4.78125, "learning_rate": 9.429112450364707e-08, "loss": 1.319, "step": 13330 }, { "epoch": 3.7515825010550006, "grad_norm": 3.0, "learning_rate": 9.323403558663523e-08, "loss": 1.6897, "step": 13335 }, { "epoch": 3.752989168659446, "grad_norm": 3.703125, "learning_rate": 9.218283570824149e-08, "loss": 1.5377, "step": 13340 }, { "epoch": 3.7543958362638907, "grad_norm": 3.359375, "learning_rate": 9.113752645303829e-08, "loss": 1.4597, "step": 13345 }, { "epoch": 3.755802503868336, "grad_norm": 3.234375, "learning_rate": 9.009810939671991e-08, "loss": 1.3243, "step": 13350 }, { "epoch": 3.757209171472781, "grad_norm": 3.75, "learning_rate": 8.906458610609791e-08, "loss": 1.6396, "step": 13355 }, { "epoch": 3.758615839077226, "grad_norm": 3.53125, "learning_rate": 8.803695813910072e-08, "loss": 1.7091, "step": 13360 }, { "epoch": 3.760022506681671, "grad_norm": 4.03125, "learning_rate": 8.701522704476838e-08, "loss": 1.6138, "step": 13365 }, { "epoch": 3.7614291742861163, "grad_norm": 3.328125, "learning_rate": 8.599939436325376e-08, "loss": 1.5434, "step": 13370 }, { "epoch": 3.762835841890561, "grad_norm": 3.140625, "learning_rate": 8.498946162581732e-08, "loss": 1.6085, "step": 13375 }, { "epoch": 3.7642425094950065, "grad_norm": 3.75, "learning_rate": 8.39854303548262e-08, "loss": 1.4269, "step": 13380 }, { "epoch": 3.7656491770994514, "grad_norm": 4.40625, "learning_rate": 8.298730206375237e-08, "loss": 1.65, "step": 13385 }, { "epoch": 3.767055844703896, "grad_norm": 3.046875, "learning_rate": 8.199507825716923e-08, "loss": 1.6611, "step": 13390 }, { "epoch": 3.7684625123083415, "grad_norm": 3.421875, "learning_rate": 8.100876043074878e-08, "loss": 1.6301, "step": 13395 }, { "epoch": 3.769869179912787, "grad_norm": 2.640625, "learning_rate": 8.002835007126263e-08, "loss": 1.7159, "step": 13400 }, { "epoch": 3.7712758475172317, "grad_norm": 4.03125, "learning_rate": 7.905384865657572e-08, "loss": 1.6362, "step": 13405 }, { "epoch": 3.7726825151216765, "grad_norm": 3.46875, "learning_rate": 7.808525765564634e-08, "loss": 1.5149, "step": 13410 }, { "epoch": 3.774089182726122, "grad_norm": 2.90625, "learning_rate": 7.712257852852344e-08, "loss": 1.7875, "step": 13415 }, { "epoch": 3.775495850330567, "grad_norm": 3.015625, "learning_rate": 7.616581272634493e-08, "loss": 1.5831, "step": 13420 }, { "epoch": 3.776902517935012, "grad_norm": 3.296875, "learning_rate": 7.521496169133445e-08, "loss": 1.75, "step": 13425 }, { "epoch": 3.778309185539457, "grad_norm": 2.53125, "learning_rate": 7.427002685679884e-08, "loss": 1.7126, "step": 13430 }, { "epoch": 3.779715853143902, "grad_norm": 3.140625, "learning_rate": 7.33310096471298e-08, "loss": 1.453, "step": 13435 }, { "epoch": 3.7811225207483474, "grad_norm": 5.15625, "learning_rate": 7.23979114777955e-08, "loss": 1.6371, "step": 13440 }, { "epoch": 3.7825291883527923, "grad_norm": 3.03125, "learning_rate": 7.147073375534374e-08, "loss": 1.7193, "step": 13445 }, { "epoch": 3.783935855957237, "grad_norm": 2.9375, "learning_rate": 7.054947787739785e-08, "loss": 1.4624, "step": 13450 }, { "epoch": 3.7853425235616824, "grad_norm": 3.21875, "learning_rate": 6.963414523265321e-08, "loss": 1.6463, "step": 13455 }, { "epoch": 3.7867491911661273, "grad_norm": 3.5625, "learning_rate": 6.872473720087768e-08, "loss": 1.6806, "step": 13460 }, { "epoch": 3.7881558587705726, "grad_norm": 2.953125, "learning_rate": 6.782125515290937e-08, "loss": 1.5029, "step": 13465 }, { "epoch": 3.7895625263750174, "grad_norm": 2.71875, "learning_rate": 6.692370045065043e-08, "loss": 1.4976, "step": 13470 }, { "epoch": 3.7909691939794627, "grad_norm": 3.0, "learning_rate": 6.603207444707149e-08, "loss": 1.4385, "step": 13475 }, { "epoch": 3.7923758615839076, "grad_norm": 3.09375, "learning_rate": 6.514637848620497e-08, "loss": 1.4465, "step": 13480 }, { "epoch": 3.793782529188353, "grad_norm": 3.0625, "learning_rate": 6.426661390314336e-08, "loss": 1.6002, "step": 13485 }, { "epoch": 3.7951891967927978, "grad_norm": 5.125, "learning_rate": 6.339278202404009e-08, "loss": 1.43, "step": 13490 }, { "epoch": 3.796595864397243, "grad_norm": 3.453125, "learning_rate": 6.252488416610458e-08, "loss": 1.6215, "step": 13495 }, { "epoch": 3.798002532001688, "grad_norm": 3.25, "learning_rate": 6.166292163760145e-08, "loss": 1.7022, "step": 13500 }, { "epoch": 3.799409199606133, "grad_norm": 2.96875, "learning_rate": 6.080689573784826e-08, "loss": 1.4709, "step": 13505 }, { "epoch": 3.800815867210578, "grad_norm": 2.828125, "learning_rate": 5.995680775721457e-08, "loss": 1.5759, "step": 13510 }, { "epoch": 3.8022225348150234, "grad_norm": 3.0625, "learning_rate": 5.911265897711759e-08, "loss": 1.6665, "step": 13515 }, { "epoch": 3.8036292024194682, "grad_norm": 4.5, "learning_rate": 5.82744506700239e-08, "loss": 1.7009, "step": 13520 }, { "epoch": 3.8050358700239135, "grad_norm": 2.953125, "learning_rate": 5.744218409944412e-08, "loss": 1.5255, "step": 13525 }, { "epoch": 3.8064425376283584, "grad_norm": 3.09375, "learning_rate": 5.6615860519932054e-08, "loss": 1.6446, "step": 13530 }, { "epoch": 3.8078492052328032, "grad_norm": 4.53125, "learning_rate": 5.5795481177083324e-08, "loss": 1.5484, "step": 13535 }, { "epoch": 3.8092558728372485, "grad_norm": 3.46875, "learning_rate": 5.49810473075345e-08, "loss": 1.6404, "step": 13540 }, { "epoch": 3.810662540441694, "grad_norm": 5.59375, "learning_rate": 5.417256013895777e-08, "loss": 1.5426, "step": 13545 }, { "epoch": 3.8120692080461387, "grad_norm": 3.234375, "learning_rate": 5.337002089006315e-08, "loss": 1.681, "step": 13550 }, { "epoch": 3.8134758756505835, "grad_norm": 3.609375, "learning_rate": 5.2573430770594505e-08, "loss": 1.7098, "step": 13555 }, { "epoch": 3.814882543255029, "grad_norm": 7.40625, "learning_rate": 5.178279098132643e-08, "loss": 1.7041, "step": 13560 }, { "epoch": 3.816289210859474, "grad_norm": 3.65625, "learning_rate": 5.099810271406646e-08, "loss": 1.6355, "step": 13565 }, { "epoch": 3.817695878463919, "grad_norm": 4.09375, "learning_rate": 5.021936715164843e-08, "loss": 1.6715, "step": 13570 }, { "epoch": 3.819102546068364, "grad_norm": 6.0, "learning_rate": 4.9446585467935566e-08, "loss": 1.4218, "step": 13575 }, { "epoch": 3.820509213672809, "grad_norm": 3.171875, "learning_rate": 4.8679758827813835e-08, "loss": 1.7012, "step": 13580 }, { "epoch": 3.8219158812772545, "grad_norm": 3.0625, "learning_rate": 4.791888838719416e-08, "loss": 1.4577, "step": 13585 }, { "epoch": 3.8233225488816993, "grad_norm": 3.046875, "learning_rate": 4.7163975293008416e-08, "loss": 1.6121, "step": 13590 }, { "epoch": 3.824729216486144, "grad_norm": 3.171875, "learning_rate": 4.641502068320946e-08, "loss": 1.3883, "step": 13595 }, { "epoch": 3.8261358840905895, "grad_norm": 2.9375, "learning_rate": 4.567202568676665e-08, "loss": 1.6198, "step": 13600 }, { "epoch": 3.8275425516950343, "grad_norm": 3.484375, "learning_rate": 4.493499142366719e-08, "loss": 1.8679, "step": 13605 }, { "epoch": 3.8289492192994796, "grad_norm": 3.21875, "learning_rate": 4.4203919004912606e-08, "loss": 1.5141, "step": 13610 }, { "epoch": 3.8303558869039245, "grad_norm": 2.65625, "learning_rate": 4.347880953251737e-08, "loss": 1.5901, "step": 13615 }, { "epoch": 3.83176255450837, "grad_norm": 3.125, "learning_rate": 4.275966409950804e-08, "loss": 1.5317, "step": 13620 }, { "epoch": 3.8331692221128146, "grad_norm": 3.625, "learning_rate": 4.2046483789920596e-08, "loss": 1.4834, "step": 13625 }, { "epoch": 3.83457588971726, "grad_norm": 5.25, "learning_rate": 4.1339269678799525e-08, "loss": 1.6823, "step": 13630 }, { "epoch": 3.835982557321705, "grad_norm": 3.421875, "learning_rate": 4.0638022832195197e-08, "loss": 1.7195, "step": 13635 }, { "epoch": 3.83738922492615, "grad_norm": 2.5625, "learning_rate": 3.994274430716427e-08, "loss": 1.8747, "step": 13640 }, { "epoch": 3.838795892530595, "grad_norm": 2.296875, "learning_rate": 3.925343515176482e-08, "loss": 1.5168, "step": 13645 }, { "epoch": 3.8402025601350402, "grad_norm": 3.3125, "learning_rate": 3.857009640505859e-08, "loss": 1.7263, "step": 13650 }, { "epoch": 3.841609227739485, "grad_norm": 3.421875, "learning_rate": 3.7892729097106944e-08, "loss": 1.3747, "step": 13655 }, { "epoch": 3.84301589534393, "grad_norm": 3.9375, "learning_rate": 3.722133424896956e-08, "loss": 1.4349, "step": 13660 }, { "epoch": 3.8444225629483753, "grad_norm": 4.8125, "learning_rate": 3.655591287270354e-08, "loss": 1.7337, "step": 13665 }, { "epoch": 3.8458292305528206, "grad_norm": 2.609375, "learning_rate": 3.589646597136209e-08, "loss": 1.647, "step": 13670 }, { "epoch": 3.8472358981572654, "grad_norm": 3.5625, "learning_rate": 3.524299453899093e-08, "loss": 1.7049, "step": 13675 }, { "epoch": 3.8486425657617103, "grad_norm": 3.015625, "learning_rate": 3.459549956063013e-08, "loss": 1.4334, "step": 13680 }, { "epoch": 3.8500492333661556, "grad_norm": 2.734375, "learning_rate": 3.395398201231048e-08, "loss": 1.685, "step": 13685 }, { "epoch": 3.851455900970601, "grad_norm": 3.46875, "learning_rate": 3.331844286105179e-08, "loss": 1.6998, "step": 13690 }, { "epoch": 3.8528625685750457, "grad_norm": 3.0, "learning_rate": 3.268888306486284e-08, "loss": 1.5654, "step": 13695 }, { "epoch": 3.8542692361794906, "grad_norm": 4.46875, "learning_rate": 3.206530357273829e-08, "loss": 1.623, "step": 13700 }, { "epoch": 3.855675903783936, "grad_norm": 2.453125, "learning_rate": 3.1447705324659126e-08, "loss": 1.7226, "step": 13705 }, { "epoch": 3.857082571388381, "grad_norm": 4.34375, "learning_rate": 3.0836089251589535e-08, "loss": 1.5045, "step": 13710 }, { "epoch": 3.858489238992826, "grad_norm": 2.3125, "learning_rate": 3.0230456275476045e-08, "loss": 1.6649, "step": 13715 }, { "epoch": 3.859895906597271, "grad_norm": 3.5, "learning_rate": 2.963080730924705e-08, "loss": 1.6491, "step": 13720 }, { "epoch": 3.861302574201716, "grad_norm": 2.65625, "learning_rate": 2.903714325681017e-08, "loss": 1.5911, "step": 13725 }, { "epoch": 3.862709241806161, "grad_norm": 3.078125, "learning_rate": 2.8449465013051343e-08, "loss": 1.5251, "step": 13730 }, { "epoch": 3.8641159094106063, "grad_norm": 2.96875, "learning_rate": 2.7867773463833954e-08, "loss": 1.6151, "step": 13735 }, { "epoch": 3.865522577015051, "grad_norm": 2.609375, "learning_rate": 2.7292069485996604e-08, "loss": 1.7493, "step": 13740 }, { "epoch": 3.8669292446194965, "grad_norm": 4.75, "learning_rate": 2.6722353947352227e-08, "loss": 1.5258, "step": 13745 }, { "epoch": 3.8683359122239414, "grad_norm": 3.21875, "learning_rate": 2.615862770668764e-08, "loss": 1.4538, "step": 13750 }, { "epoch": 3.8697425798283867, "grad_norm": 3.71875, "learning_rate": 2.5600891613760445e-08, "loss": 1.5134, "step": 13755 }, { "epoch": 3.8711492474328315, "grad_norm": 3.859375, "learning_rate": 2.5049146509299012e-08, "loss": 1.4722, "step": 13760 }, { "epoch": 3.872555915037277, "grad_norm": 3.0625, "learning_rate": 2.450339322500161e-08, "loss": 1.608, "step": 13765 }, { "epoch": 3.8739625826417217, "grad_norm": 2.6875, "learning_rate": 2.3963632583533733e-08, "loss": 1.6941, "step": 13770 }, { "epoch": 3.875369250246167, "grad_norm": 2.40625, "learning_rate": 2.342986539852676e-08, "loss": 1.4776, "step": 13775 }, { "epoch": 3.876775917850612, "grad_norm": 4.0, "learning_rate": 2.2902092474579747e-08, "loss": 1.5991, "step": 13780 }, { "epoch": 3.878182585455057, "grad_norm": 3.296875, "learning_rate": 2.2380314607254536e-08, "loss": 1.542, "step": 13785 }, { "epoch": 3.879589253059502, "grad_norm": 3.4375, "learning_rate": 2.186453258307619e-08, "loss": 1.6427, "step": 13790 }, { "epoch": 3.8809959206639473, "grad_norm": 3.125, "learning_rate": 2.1354747179531674e-08, "loss": 1.5292, "step": 13795 }, { "epoch": 3.882402588268392, "grad_norm": 3.171875, "learning_rate": 2.0850959165069403e-08, "loss": 1.4787, "step": 13800 }, { "epoch": 3.883809255872837, "grad_norm": 3.03125, "learning_rate": 2.035316929909614e-08, "loss": 1.6944, "step": 13805 }, { "epoch": 3.8852159234772823, "grad_norm": 3.015625, "learning_rate": 1.9861378331978318e-08, "loss": 1.5436, "step": 13810 }, { "epoch": 3.8866225910817276, "grad_norm": 2.96875, "learning_rate": 1.937558700503894e-08, "loss": 1.8179, "step": 13815 }, { "epoch": 3.8880292586861724, "grad_norm": 3.765625, "learning_rate": 1.8895796050557134e-08, "loss": 1.3398, "step": 13820 }, { "epoch": 3.8894359262906173, "grad_norm": 3.0625, "learning_rate": 1.8422006191766813e-08, "loss": 1.6199, "step": 13825 }, { "epoch": 3.8908425938950626, "grad_norm": 3.296875, "learning_rate": 1.795421814285758e-08, "loss": 1.4014, "step": 13830 }, { "epoch": 3.892249261499508, "grad_norm": 3.171875, "learning_rate": 1.7492432608969375e-08, "loss": 1.7273, "step": 13835 }, { "epoch": 3.8936559291039528, "grad_norm": 4.96875, "learning_rate": 1.7036650286196498e-08, "loss": 1.3763, "step": 13840 }, { "epoch": 3.8950625967083976, "grad_norm": 3.640625, "learning_rate": 1.6586871861581807e-08, "loss": 1.6194, "step": 13845 }, { "epoch": 3.896469264312843, "grad_norm": 3.53125, "learning_rate": 1.6143098013119415e-08, "loss": 1.4738, "step": 13850 }, { "epoch": 3.897875931917288, "grad_norm": 3.46875, "learning_rate": 1.5705329409751556e-08, "loss": 1.6051, "step": 13855 }, { "epoch": 3.899282599521733, "grad_norm": 3.734375, "learning_rate": 1.5273566711369036e-08, "loss": 1.6372, "step": 13860 }, { "epoch": 3.900689267126178, "grad_norm": 3.484375, "learning_rate": 1.4847810568807683e-08, "loss": 1.5585, "step": 13865 }, { "epoch": 3.902095934730623, "grad_norm": 3.953125, "learning_rate": 1.4428061623850573e-08, "loss": 1.4838, "step": 13870 }, { "epoch": 3.903502602335068, "grad_norm": 3.25, "learning_rate": 1.4014320509224909e-08, "loss": 1.6295, "step": 13875 }, { "epoch": 3.9049092699395134, "grad_norm": 3.296875, "learning_rate": 1.3606587848602024e-08, "loss": 1.6314, "step": 13880 }, { "epoch": 3.9063159375439582, "grad_norm": 3.390625, "learning_rate": 1.3204864256596504e-08, "loss": 1.5786, "step": 13885 }, { "epoch": 3.9077226051484035, "grad_norm": 3.375, "learning_rate": 1.2809150338763064e-08, "loss": 1.345, "step": 13890 }, { "epoch": 3.9091292727528484, "grad_norm": 3.25, "learning_rate": 1.241944669160011e-08, "loss": 1.7872, "step": 13895 }, { "epoch": 3.9105359403572937, "grad_norm": 3.046875, "learning_rate": 1.203575390254441e-08, "loss": 1.6342, "step": 13900 }, { "epoch": 3.9119426079617385, "grad_norm": 3.09375, "learning_rate": 1.1658072549971975e-08, "loss": 1.3083, "step": 13905 }, { "epoch": 3.913349275566184, "grad_norm": 3.375, "learning_rate": 1.1286403203198513e-08, "loss": 1.6906, "step": 13910 }, { "epoch": 3.9147559431706287, "grad_norm": 4.71875, "learning_rate": 1.0920746422476313e-08, "loss": 1.3366, "step": 13915 }, { "epoch": 3.916162610775074, "grad_norm": 3.046875, "learning_rate": 1.05611027589938e-08, "loss": 1.4893, "step": 13920 }, { "epoch": 3.917569278379519, "grad_norm": 3.109375, "learning_rate": 1.0207472754876878e-08, "loss": 1.844, "step": 13925 }, { "epoch": 3.918975945983964, "grad_norm": 4.21875, "learning_rate": 9.859856943184919e-09, "loss": 1.7505, "step": 13930 }, { "epoch": 3.920382613588409, "grad_norm": 3.984375, "learning_rate": 9.518255847912548e-09, "loss": 1.6995, "step": 13935 }, { "epoch": 3.9217892811928543, "grad_norm": 3.40625, "learning_rate": 9.182669983986979e-09, "loss": 1.4661, "step": 13940 }, { "epoch": 3.923195948797299, "grad_norm": 3.671875, "learning_rate": 8.853099857269342e-09, "loss": 1.2838, "step": 13945 }, { "epoch": 3.924602616401744, "grad_norm": 3.6875, "learning_rate": 8.529545964551577e-09, "loss": 1.5647, "step": 13950 }, { "epoch": 3.9260092840061893, "grad_norm": 2.65625, "learning_rate": 8.212008793556436e-09, "loss": 1.657, "step": 13955 }, { "epoch": 3.9274159516106346, "grad_norm": 4.0, "learning_rate": 7.900488822939255e-09, "loss": 1.4927, "step": 13960 }, { "epoch": 3.9288226192150795, "grad_norm": 3.046875, "learning_rate": 7.594986522282188e-09, "loss": 1.604, "step": 13965 }, { "epoch": 3.9302292868195243, "grad_norm": 3.765625, "learning_rate": 7.295502352098637e-09, "loss": 1.5973, "step": 13970 }, { "epoch": 3.9316359544239696, "grad_norm": 3.53125, "learning_rate": 7.002036763829267e-09, "loss": 1.6541, "step": 13975 }, { "epoch": 3.933042622028415, "grad_norm": 3.15625, "learning_rate": 6.7145901998424404e-09, "loss": 1.6558, "step": 13980 }, { "epoch": 3.93444928963286, "grad_norm": 2.90625, "learning_rate": 6.43316309343378e-09, "loss": 1.7133, "step": 13985 }, { "epoch": 3.9358559572373046, "grad_norm": 3.921875, "learning_rate": 6.157755868824832e-09, "loss": 1.5935, "step": 13990 }, { "epoch": 3.93726262484175, "grad_norm": 2.515625, "learning_rate": 5.888368941163513e-09, "loss": 1.4415, "step": 13995 }, { "epoch": 3.9386692924461952, "grad_norm": 3.234375, "learning_rate": 5.625002716521887e-09, "loss": 1.6804, "step": 14000 }, { "epoch": 3.94007596005064, "grad_norm": 3.8125, "learning_rate": 5.3676575918966125e-09, "loss": 1.4467, "step": 14005 }, { "epoch": 3.941482627655085, "grad_norm": 2.71875, "learning_rate": 5.1163339552084964e-09, "loss": 1.4877, "step": 14010 }, { "epoch": 3.9428892952595302, "grad_norm": 3.75, "learning_rate": 4.871032185302048e-09, "loss": 1.4424, "step": 14015 }, { "epoch": 3.944295962863975, "grad_norm": 3.625, "learning_rate": 4.631752651943266e-09, "loss": 1.6061, "step": 14020 }, { "epoch": 3.9457026304684204, "grad_norm": 2.515625, "learning_rate": 4.39849571582096e-09, "loss": 1.4244, "step": 14025 }, { "epoch": 3.9471092980728653, "grad_norm": 4.15625, "learning_rate": 4.171261728545428e-09, "loss": 1.4968, "step": 14030 }, { "epoch": 3.9485159656773106, "grad_norm": 2.375, "learning_rate": 3.950051032648449e-09, "loss": 1.5433, "step": 14035 }, { "epoch": 3.9499226332817554, "grad_norm": 4.4375, "learning_rate": 3.734863961581069e-09, "loss": 1.6523, "step": 14040 }, { "epoch": 3.9513293008862007, "grad_norm": 4.6875, "learning_rate": 3.525700839715817e-09, "loss": 1.3554, "step": 14045 }, { "epoch": 3.9527359684906456, "grad_norm": 3.65625, "learning_rate": 3.322561982343597e-09, "loss": 1.5458, "step": 14050 }, { "epoch": 3.954142636095091, "grad_norm": 2.625, "learning_rate": 3.1254476956750207e-09, "loss": 1.5498, "step": 14055 }, { "epoch": 3.9555493036995357, "grad_norm": 3.796875, "learning_rate": 2.9343582768395215e-09, "loss": 1.5235, "step": 14060 }, { "epoch": 3.956955971303981, "grad_norm": 2.703125, "learning_rate": 2.7492940138840183e-09, "loss": 1.4832, "step": 14065 }, { "epoch": 3.958362638908426, "grad_norm": 2.609375, "learning_rate": 2.5702551857733623e-09, "loss": 1.6262, "step": 14070 }, { "epoch": 3.9597693065128707, "grad_norm": 3.203125, "learning_rate": 2.3972420623898927e-09, "loss": 1.6804, "step": 14075 }, { "epoch": 3.961175974117316, "grad_norm": 3.0625, "learning_rate": 2.230254904532547e-09, "loss": 1.3863, "step": 14080 }, { "epoch": 3.9625826417217613, "grad_norm": 5.03125, "learning_rate": 2.069293963916419e-09, "loss": 1.4973, "step": 14085 }, { "epoch": 3.963989309326206, "grad_norm": 3.1875, "learning_rate": 1.9143594831740882e-09, "loss": 1.6832, "step": 14090 }, { "epoch": 3.965395976930651, "grad_norm": 3.640625, "learning_rate": 1.7654516958525156e-09, "loss": 1.3712, "step": 14095 }, { "epoch": 3.9668026445350963, "grad_norm": 3.5625, "learning_rate": 1.6225708264148153e-09, "loss": 1.4175, "step": 14100 }, { "epoch": 3.9682093121395416, "grad_norm": 4.125, "learning_rate": 1.4857170902384807e-09, "loss": 1.5806, "step": 14105 }, { "epoch": 3.9696159797439865, "grad_norm": 2.875, "learning_rate": 1.354890693616273e-09, "loss": 1.7819, "step": 14110 }, { "epoch": 3.9710226473484314, "grad_norm": 5.59375, "learning_rate": 1.2300918337553312e-09, "loss": 1.413, "step": 14115 }, { "epoch": 3.9724293149528767, "grad_norm": 2.875, "learning_rate": 1.1113206987767298e-09, "loss": 1.66, "step": 14120 }, { "epoch": 3.973835982557322, "grad_norm": 2.859375, "learning_rate": 9.985774677150339e-10, "loss": 1.5321, "step": 14125 }, { "epoch": 3.975242650161767, "grad_norm": 3.0625, "learning_rate": 8.91862310519631e-10, "loss": 1.4343, "step": 14130 }, { "epoch": 3.9766493177662117, "grad_norm": 3.0, "learning_rate": 7.911753880516236e-10, "loss": 1.6013, "step": 14135 }, { "epoch": 3.978055985370657, "grad_norm": 3.4375, "learning_rate": 6.965168520864928e-10, "loss": 1.517, "step": 14140 }, { "epoch": 3.979462652975102, "grad_norm": 3.6875, "learning_rate": 6.07886845311878e-10, "loss": 1.6839, "step": 14145 }, { "epoch": 3.980869320579547, "grad_norm": 3.625, "learning_rate": 5.252855013280211e-10, "loss": 1.5155, "step": 14150 }, { "epoch": 3.982275988183992, "grad_norm": 2.609375, "learning_rate": 4.487129446477667e-10, "loss": 1.6638, "step": 14155 }, { "epoch": 3.9836826557884373, "grad_norm": 2.953125, "learning_rate": 3.7816929069656165e-10, "loss": 1.6047, "step": 14160 }, { "epoch": 3.985089323392882, "grad_norm": 3.25, "learning_rate": 3.1365464581112334e-10, "loss": 1.5561, "step": 14165 }, { "epoch": 3.9864959909973274, "grad_norm": 3.53125, "learning_rate": 2.5516910724077133e-10, "loss": 1.7709, "step": 14170 }, { "epoch": 3.9879026586017723, "grad_norm": 3.15625, "learning_rate": 2.0271276314565155e-10, "loss": 1.612, "step": 14175 }, { "epoch": 3.9893093262062176, "grad_norm": 3.203125, "learning_rate": 1.5628569259940049e-10, "loss": 1.4502, "step": 14180 }, { "epoch": 3.9907159938106624, "grad_norm": 3.109375, "learning_rate": 1.1588796558470449e-10, "loss": 1.6444, "step": 14185 }, { "epoch": 3.9921226614151077, "grad_norm": 3.296875, "learning_rate": 8.15196429977405e-11, "loss": 1.2351, "step": 14190 }, { "epoch": 3.9935293290195526, "grad_norm": 4.125, "learning_rate": 5.3180776644623505e-11, "loss": 1.6336, "step": 14195 }, { "epoch": 3.994935996623998, "grad_norm": 3.421875, "learning_rate": 3.087140924318277e-11, "loss": 1.638, "step": 14200 }, { "epoch": 3.9963426642284428, "grad_norm": 4.0625, "learning_rate": 1.4591574422961883e-11, "loss": 1.7758, "step": 14205 }, { "epoch": 3.997749331832888, "grad_norm": 3.484375, "learning_rate": 4.3412967238865004e-12, "loss": 1.8328, "step": 14210 }, { "epoch": 3.999155999437333, "grad_norm": 3.75, "learning_rate": 1.2059159670840813e-13, "loss": 1.8058, "step": 14215 }, { "epoch": 3.9994373329582222, "eval_loss": 1.576697826385498, "eval_runtime": 330.563, "eval_samples_per_second": 9.553, "eval_steps_per_second": 4.777, "step": 14216 }, { "epoch": 3.9994373329582222, "step": 14216, "total_flos": 1.684239053169361e+18, "train_loss": 1.6145593146796164, "train_runtime": 58184.933, "train_samples_per_second": 1.955, "train_steps_per_second": 0.244 } ], "logging_steps": 5, "max_steps": 14216, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 1, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.684239053169361e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }