{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "eval_steps": 500, "global_step": 37420, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.002672367717797969, "grad_norm": 1.0593788623809814, "learning_rate": 0.0001999999647578693, "loss": 2.8682, "step": 10 }, { "epoch": 0.005344735435595938, "grad_norm": 1.3729766607284546, "learning_rate": 0.00019999985903150193, "loss": 2.4555, "step": 20 }, { "epoch": 0.008017103153393906, "grad_norm": 0.9050101041793823, "learning_rate": 0.00019999968282097247, "loss": 2.2431, "step": 30 }, { "epoch": 0.010689470871191877, "grad_norm": 0.8476489186286926, "learning_rate": 0.0001999994361264051, "loss": 2.2865, "step": 40 }, { "epoch": 0.013361838588989846, "grad_norm": 0.8859209418296814, "learning_rate": 0.00019999911894797372, "loss": 2.2338, "step": 50 }, { "epoch": 0.016034206306787813, "grad_norm": 0.8001280426979065, "learning_rate": 0.00019999873128590187, "loss": 2.2895, "step": 60 }, { "epoch": 0.01870657402458578, "grad_norm": 0.8251445293426514, "learning_rate": 0.00019999827314046284, "loss": 2.2898, "step": 70 }, { "epoch": 0.021378941742383754, "grad_norm": 0.7365292310714722, "learning_rate": 0.00019999774451197945, "loss": 2.2032, "step": 80 }, { "epoch": 0.024051309460181722, "grad_norm": 0.8399436473846436, "learning_rate": 0.0001999971454008244, "loss": 2.1982, "step": 90 }, { "epoch": 0.02672367717797969, "grad_norm": 0.7834980487823486, "learning_rate": 0.00019999647580741989, "loss": 2.2182, "step": 100 }, { "epoch": 0.02939604489577766, "grad_norm": 0.7640682458877563, "learning_rate": 0.00019999573573223795, "loss": 2.2249, "step": 110 }, { "epoch": 0.032068412613575625, "grad_norm": 0.8014008402824402, "learning_rate": 0.0001999949251758002, "loss": 2.2053, "step": 120 }, { "epoch": 0.034740780331373594, "grad_norm": 0.7757932543754578, "learning_rate": 0.0001999940441386779, "loss": 2.2097, "step": 130 }, { "epoch": 0.03741314804917156, "grad_norm": 0.6968387961387634, "learning_rate": 0.0001999930926214921, "loss": 2.2222, "step": 140 }, { "epoch": 0.04008551576696953, "grad_norm": 0.7413656711578369, "learning_rate": 0.00019999207062491346, "loss": 2.2071, "step": 150 }, { "epoch": 0.04275788348476751, "grad_norm": 0.9104848504066467, "learning_rate": 0.0001999909781496623, "loss": 2.2083, "step": 160 }, { "epoch": 0.045430251202565476, "grad_norm": 0.7201218605041504, "learning_rate": 0.00019998981519650868, "loss": 2.1978, "step": 170 }, { "epoch": 0.048102618920363445, "grad_norm": 0.8535075783729553, "learning_rate": 0.00019998858176627227, "loss": 2.1774, "step": 180 }, { "epoch": 0.050774986638161414, "grad_norm": 0.7436001896858215, "learning_rate": 0.00019998727785982247, "loss": 2.0783, "step": 190 }, { "epoch": 0.05344735435595938, "grad_norm": 0.7691600918769836, "learning_rate": 0.00019998590347807826, "loss": 2.2117, "step": 200 }, { "epoch": 0.05611972207375735, "grad_norm": 0.7730023860931396, "learning_rate": 0.00019998445862200845, "loss": 2.1707, "step": 210 }, { "epoch": 0.05879208979155532, "grad_norm": 0.7378381490707397, "learning_rate": 0.00019998294329263142, "loss": 2.1539, "step": 220 }, { "epoch": 0.06146445750935329, "grad_norm": 0.7207351922988892, "learning_rate": 0.0001999813574910152, "loss": 2.1684, "step": 230 }, { "epoch": 0.06413682522715125, "grad_norm": 0.7563502788543701, "learning_rate": 0.00019997970121827754, "loss": 2.21, "step": 240 }, { "epoch": 0.06680919294494922, "grad_norm": 0.7375833988189697, "learning_rate": 0.0001999779744755859, "loss": 2.1732, "step": 250 }, { "epoch": 0.06948156066274719, "grad_norm": 0.8896276950836182, "learning_rate": 0.0001999761772641573, "loss": 2.1718, "step": 260 }, { "epoch": 0.07215392838054516, "grad_norm": 0.7230550050735474, "learning_rate": 0.0001999743095852585, "loss": 2.1987, "step": 270 }, { "epoch": 0.07482629609834313, "grad_norm": 0.7491215467453003, "learning_rate": 0.00019997237144020596, "loss": 2.1536, "step": 280 }, { "epoch": 0.0774986638161411, "grad_norm": 0.8419792056083679, "learning_rate": 0.00019997036283036574, "loss": 2.1556, "step": 290 }, { "epoch": 0.08017103153393906, "grad_norm": 0.751606822013855, "learning_rate": 0.00019996828375715356, "loss": 2.1717, "step": 300 }, { "epoch": 0.08284339925173703, "grad_norm": 0.8259275555610657, "learning_rate": 0.00019996613422203492, "loss": 2.1644, "step": 310 }, { "epoch": 0.08551576696953501, "grad_norm": 0.8105437755584717, "learning_rate": 0.00019996391422652488, "loss": 2.0957, "step": 320 }, { "epoch": 0.08818813468733298, "grad_norm": 0.7292518615722656, "learning_rate": 0.0001999616237721881, "loss": 2.1461, "step": 330 }, { "epoch": 0.09086050240513095, "grad_norm": 0.7527144551277161, "learning_rate": 0.0001999592628606391, "loss": 2.1472, "step": 340 }, { "epoch": 0.09353287012292892, "grad_norm": 0.7687304615974426, "learning_rate": 0.00019995683149354193, "loss": 2.224, "step": 350 }, { "epoch": 0.09620523784072689, "grad_norm": 0.7979860305786133, "learning_rate": 0.00019995432967261025, "loss": 2.1426, "step": 360 }, { "epoch": 0.09887760555852486, "grad_norm": 0.7962672114372253, "learning_rate": 0.00019995175739960752, "loss": 2.1492, "step": 370 }, { "epoch": 0.10154997327632283, "grad_norm": 0.7849389314651489, "learning_rate": 0.00019994911467634675, "loss": 2.0579, "step": 380 }, { "epoch": 0.1042223409941208, "grad_norm": 0.8130772113800049, "learning_rate": 0.0001999464015046907, "loss": 2.1402, "step": 390 }, { "epoch": 0.10689470871191876, "grad_norm": 0.7432055473327637, "learning_rate": 0.00019994361788655164, "loss": 2.1381, "step": 400 }, { "epoch": 0.10956707642971673, "grad_norm": 0.7954995632171631, "learning_rate": 0.00019994076382389166, "loss": 2.1832, "step": 410 }, { "epoch": 0.1122394441475147, "grad_norm": 0.8514792323112488, "learning_rate": 0.0001999378393187224, "loss": 2.2132, "step": 420 }, { "epoch": 0.11491181186531267, "grad_norm": 0.9865955114364624, "learning_rate": 0.00019993484437310518, "loss": 2.1417, "step": 430 }, { "epoch": 0.11758417958311064, "grad_norm": 0.7414752244949341, "learning_rate": 0.00019993177898915094, "loss": 2.1338, "step": 440 }, { "epoch": 0.12025654730090861, "grad_norm": 0.6959917545318604, "learning_rate": 0.00019992864316902034, "loss": 2.1111, "step": 450 }, { "epoch": 0.12292891501870658, "grad_norm": 0.7759518623352051, "learning_rate": 0.0001999254369149236, "loss": 2.0755, "step": 460 }, { "epoch": 0.12560128273650453, "grad_norm": 0.8887970447540283, "learning_rate": 0.0001999221602291206, "loss": 2.1154, "step": 470 }, { "epoch": 0.1282736504543025, "grad_norm": 0.8119891285896301, "learning_rate": 0.00019991881311392097, "loss": 2.1146, "step": 480 }, { "epoch": 0.13094601817210047, "grad_norm": 0.8034600615501404, "learning_rate": 0.0001999153955716838, "loss": 2.138, "step": 490 }, { "epoch": 0.13361838588989844, "grad_norm": 0.8797882795333862, "learning_rate": 0.000199911907604818, "loss": 2.15, "step": 500 }, { "epoch": 0.1362907536076964, "grad_norm": 0.8310526609420776, "learning_rate": 0.00019990834921578205, "loss": 2.0907, "step": 510 }, { "epoch": 0.13896312132549438, "grad_norm": 0.8153840899467468, "learning_rate": 0.000199904720407084, "loss": 2.1459, "step": 520 }, { "epoch": 0.14163548904329235, "grad_norm": 0.7525952458381653, "learning_rate": 0.00019990102118128155, "loss": 2.2412, "step": 530 }, { "epoch": 0.14430785676109031, "grad_norm": 0.8331490159034729, "learning_rate": 0.00019989725154098218, "loss": 2.1192, "step": 540 }, { "epoch": 0.14698022447888828, "grad_norm": 0.780968189239502, "learning_rate": 0.00019989341148884282, "loss": 2.0778, "step": 550 }, { "epoch": 0.14965259219668625, "grad_norm": 0.8250514268875122, "learning_rate": 0.00019988950102757012, "loss": 2.179, "step": 560 }, { "epoch": 0.15232495991448422, "grad_norm": 0.8055257201194763, "learning_rate": 0.00019988552015992036, "loss": 2.0793, "step": 570 }, { "epoch": 0.1549973276322822, "grad_norm": 0.9461053013801575, "learning_rate": 0.00019988146888869939, "loss": 2.0553, "step": 580 }, { "epoch": 0.15766969535008016, "grad_norm": 0.9230166077613831, "learning_rate": 0.00019987734721676272, "loss": 2.1883, "step": 590 }, { "epoch": 0.16034206306787813, "grad_norm": 0.8047350645065308, "learning_rate": 0.00019987315514701553, "loss": 2.1516, "step": 600 }, { "epoch": 0.1630144307856761, "grad_norm": 0.8617376685142517, "learning_rate": 0.00019986889268241252, "loss": 2.0262, "step": 610 }, { "epoch": 0.16568679850347406, "grad_norm": 0.8188526630401611, "learning_rate": 0.00019986455982595803, "loss": 2.106, "step": 620 }, { "epoch": 0.16835916622127206, "grad_norm": 0.7814268469810486, "learning_rate": 0.0001998601565807061, "loss": 2.0266, "step": 630 }, { "epoch": 0.17103153393907003, "grad_norm": 0.8137741088867188, "learning_rate": 0.00019985568294976036, "loss": 2.0728, "step": 640 }, { "epoch": 0.173703901656868, "grad_norm": 0.74834805727005, "learning_rate": 0.0001998511389362739, "loss": 2.1, "step": 650 }, { "epoch": 0.17637626937466597, "grad_norm": 0.841573178768158, "learning_rate": 0.00019984652454344966, "loss": 2.1448, "step": 660 }, { "epoch": 0.17904863709246394, "grad_norm": 0.8152615427970886, "learning_rate": 0.00019984183977453995, "loss": 2.1404, "step": 670 }, { "epoch": 0.1817210048102619, "grad_norm": 0.8331522941589355, "learning_rate": 0.00019983708463284686, "loss": 2.054, "step": 680 }, { "epoch": 0.18439337252805987, "grad_norm": 0.9017409086227417, "learning_rate": 0.000199832259121722, "loss": 2.1148, "step": 690 }, { "epoch": 0.18706574024585784, "grad_norm": 0.8750881552696228, "learning_rate": 0.00019982736324456658, "loss": 2.0815, "step": 700 }, { "epoch": 0.1897381079636558, "grad_norm": 0.8703632950782776, "learning_rate": 0.00019982239700483146, "loss": 2.0799, "step": 710 }, { "epoch": 0.19241047568145378, "grad_norm": 0.8342357873916626, "learning_rate": 0.00019981736040601704, "loss": 2.0941, "step": 720 }, { "epoch": 0.19508284339925175, "grad_norm": 0.8456534147262573, "learning_rate": 0.0001998122534516733, "loss": 2.091, "step": 730 }, { "epoch": 0.19775521111704972, "grad_norm": 0.7912267446517944, "learning_rate": 0.0001998070761453999, "loss": 2.1353, "step": 740 }, { "epoch": 0.20042757883484769, "grad_norm": 0.7755369544029236, "learning_rate": 0.00019980182849084594, "loss": 2.1272, "step": 750 }, { "epoch": 0.20309994655264565, "grad_norm": 0.7691362500190735, "learning_rate": 0.00019979651049171026, "loss": 2.1243, "step": 760 }, { "epoch": 0.20577231427044362, "grad_norm": 0.8000863194465637, "learning_rate": 0.0001997911221517412, "loss": 2.0803, "step": 770 }, { "epoch": 0.2084446819882416, "grad_norm": 0.7349149584770203, "learning_rate": 0.0001997856634747367, "loss": 2.1411, "step": 780 }, { "epoch": 0.21111704970603956, "grad_norm": 0.865577220916748, "learning_rate": 0.00019978013446454425, "loss": 2.16, "step": 790 }, { "epoch": 0.21378941742383753, "grad_norm": 0.7996314167976379, "learning_rate": 0.00019977453512506094, "loss": 2.0745, "step": 800 }, { "epoch": 0.2164617851416355, "grad_norm": 0.8223113417625427, "learning_rate": 0.0001997688654602334, "loss": 2.1165, "step": 810 }, { "epoch": 0.21913415285943347, "grad_norm": 0.7781673669815063, "learning_rate": 0.00019976312547405784, "loss": 2.1064, "step": 820 }, { "epoch": 0.22180652057723144, "grad_norm": 0.9179145097732544, "learning_rate": 0.00019975731517058012, "loss": 2.1021, "step": 830 }, { "epoch": 0.2244788882950294, "grad_norm": 0.8308878540992737, "learning_rate": 0.00019975143455389555, "loss": 2.0775, "step": 840 }, { "epoch": 0.22715125601282737, "grad_norm": 0.7753422260284424, "learning_rate": 0.000199745483628149, "loss": 2.0697, "step": 850 }, { "epoch": 0.22982362373062534, "grad_norm": 0.8096175193786621, "learning_rate": 0.000199739462397535, "loss": 2.014, "step": 860 }, { "epoch": 0.2324959914484233, "grad_norm": 0.8164053559303284, "learning_rate": 0.00019973337086629753, "loss": 2.1292, "step": 870 }, { "epoch": 0.23516835916622128, "grad_norm": 0.7342932820320129, "learning_rate": 0.00019972720903873017, "loss": 2.0944, "step": 880 }, { "epoch": 0.23784072688401925, "grad_norm": 0.8686122298240662, "learning_rate": 0.00019972097691917603, "loss": 2.1152, "step": 890 }, { "epoch": 0.24051309460181722, "grad_norm": 0.8342360258102417, "learning_rate": 0.0001997146745120278, "loss": 2.0581, "step": 900 }, { "epoch": 0.2431854623196152, "grad_norm": 0.7059487104415894, "learning_rate": 0.00019970830182172768, "loss": 2.0178, "step": 910 }, { "epoch": 0.24585783003741316, "grad_norm": 0.7941276431083679, "learning_rate": 0.0001997018588527674, "loss": 2.0689, "step": 920 }, { "epoch": 0.24853019775521112, "grad_norm": 0.7752018570899963, "learning_rate": 0.00019969534560968823, "loss": 2.0976, "step": 930 }, { "epoch": 0.25120256547300907, "grad_norm": 0.7754221558570862, "learning_rate": 0.000199688762097081, "loss": 2.0676, "step": 940 }, { "epoch": 0.25387493319080706, "grad_norm": 0.7400916814804077, "learning_rate": 0.00019968210831958604, "loss": 2.0806, "step": 950 }, { "epoch": 0.256547300908605, "grad_norm": 0.8396859765052795, "learning_rate": 0.00019967538428189321, "loss": 2.0666, "step": 960 }, { "epoch": 0.259219668626403, "grad_norm": 0.821546196937561, "learning_rate": 0.00019966858998874193, "loss": 2.0362, "step": 970 }, { "epoch": 0.26189203634420094, "grad_norm": 0.7961469888687134, "learning_rate": 0.00019966172544492108, "loss": 2.0702, "step": 980 }, { "epoch": 0.26456440406199894, "grad_norm": 0.8565064072608948, "learning_rate": 0.00019965479065526908, "loss": 2.0715, "step": 990 }, { "epoch": 0.2672367717797969, "grad_norm": 0.9843941926956177, "learning_rate": 0.00019964778562467386, "loss": 2.1114, "step": 1000 }, { "epoch": 0.2699091394975949, "grad_norm": 0.8522785305976868, "learning_rate": 0.0001996407103580729, "loss": 2.102, "step": 1010 }, { "epoch": 0.2725815072153928, "grad_norm": 0.8632455468177795, "learning_rate": 0.00019963356486045312, "loss": 2.075, "step": 1020 }, { "epoch": 0.2752538749331908, "grad_norm": 0.8559175729751587, "learning_rate": 0.000199626349136851, "loss": 2.0732, "step": 1030 }, { "epoch": 0.27792624265098875, "grad_norm": 0.8057096600532532, "learning_rate": 0.00019961906319235242, "loss": 2.0666, "step": 1040 }, { "epoch": 0.28059861036878675, "grad_norm": 0.766559362411499, "learning_rate": 0.0001996117070320929, "loss": 2.0428, "step": 1050 }, { "epoch": 0.2832709780865847, "grad_norm": 0.8320493102073669, "learning_rate": 0.00019960428066125735, "loss": 2.1302, "step": 1060 }, { "epoch": 0.2859433458043827, "grad_norm": 0.8726279735565186, "learning_rate": 0.0001995967840850802, "loss": 2.1308, "step": 1070 }, { "epoch": 0.28861571352218063, "grad_norm": 0.8588752150535583, "learning_rate": 0.0001995892173088453, "loss": 2.0735, "step": 1080 }, { "epoch": 0.2912880812399786, "grad_norm": 0.8037392497062683, "learning_rate": 0.00019958158033788612, "loss": 2.0189, "step": 1090 }, { "epoch": 0.29396044895777657, "grad_norm": 0.8673548698425293, "learning_rate": 0.0001995738731775855, "loss": 2.0375, "step": 1100 }, { "epoch": 0.29663281667557456, "grad_norm": 0.7821346521377563, "learning_rate": 0.00019956609583337572, "loss": 2.074, "step": 1110 }, { "epoch": 0.2993051843933725, "grad_norm": 0.908275306224823, "learning_rate": 0.00019955824831073863, "loss": 2.1195, "step": 1120 }, { "epoch": 0.3019775521111705, "grad_norm": 1.0341707468032837, "learning_rate": 0.0001995503306152055, "loss": 2.0508, "step": 1130 }, { "epoch": 0.30464991982896844, "grad_norm": 0.8817709684371948, "learning_rate": 0.00019954234275235705, "loss": 2.0335, "step": 1140 }, { "epoch": 0.30732228754676644, "grad_norm": 0.8848737478256226, "learning_rate": 0.00019953428472782348, "loss": 2.0893, "step": 1150 }, { "epoch": 0.3099946552645644, "grad_norm": 0.834466814994812, "learning_rate": 0.0001995261565472844, "loss": 2.0991, "step": 1160 }, { "epoch": 0.3126670229823624, "grad_norm": 0.8102088570594788, "learning_rate": 0.00019951795821646892, "loss": 2.0823, "step": 1170 }, { "epoch": 0.3153393907001603, "grad_norm": 0.8358621001243591, "learning_rate": 0.0001995096897411556, "loss": 2.0049, "step": 1180 }, { "epoch": 0.3180117584179583, "grad_norm": 0.8446478843688965, "learning_rate": 0.00019950135112717235, "loss": 2.1207, "step": 1190 }, { "epoch": 0.32068412613575625, "grad_norm": 0.8990554809570312, "learning_rate": 0.0001994929423803966, "loss": 2.043, "step": 1200 }, { "epoch": 0.32335649385355425, "grad_norm": 0.8363642692565918, "learning_rate": 0.0001994844635067552, "loss": 2.0611, "step": 1210 }, { "epoch": 0.3260288615713522, "grad_norm": 0.7991921901702881, "learning_rate": 0.00019947591451222448, "loss": 2.0634, "step": 1220 }, { "epoch": 0.3287012292891502, "grad_norm": 0.8177972435951233, "learning_rate": 0.00019946729540283, "loss": 2.0359, "step": 1230 }, { "epoch": 0.33137359700694813, "grad_norm": 0.8667873740196228, "learning_rate": 0.00019945860618464703, "loss": 1.9758, "step": 1240 }, { "epoch": 0.3340459647247461, "grad_norm": 0.8804141283035278, "learning_rate": 0.00019944984686380001, "loss": 2.0795, "step": 1250 }, { "epoch": 0.3367183324425441, "grad_norm": 0.8883786797523499, "learning_rate": 0.00019944101744646288, "loss": 2.0324, "step": 1260 }, { "epoch": 0.33939070016034206, "grad_norm": 0.8005781173706055, "learning_rate": 0.00019943211793885902, "loss": 2.0907, "step": 1270 }, { "epoch": 0.34206306787814006, "grad_norm": 0.7767745852470398, "learning_rate": 0.00019942314834726118, "loss": 2.0345, "step": 1280 }, { "epoch": 0.344735435595938, "grad_norm": 0.8179928064346313, "learning_rate": 0.00019941410867799152, "loss": 2.0417, "step": 1290 }, { "epoch": 0.347407803313736, "grad_norm": 0.782202959060669, "learning_rate": 0.00019940499893742155, "loss": 2.0555, "step": 1300 }, { "epoch": 0.35008017103153394, "grad_norm": 0.7710146307945251, "learning_rate": 0.0001993958191319722, "loss": 2.0209, "step": 1310 }, { "epoch": 0.35275253874933193, "grad_norm": 0.8291559815406799, "learning_rate": 0.00019938656926811384, "loss": 2.143, "step": 1320 }, { "epoch": 0.3554249064671299, "grad_norm": 0.8180106282234192, "learning_rate": 0.0001993772493523661, "loss": 2.0363, "step": 1330 }, { "epoch": 0.35809727418492787, "grad_norm": 0.8292155265808105, "learning_rate": 0.0001993678593912981, "loss": 2.0649, "step": 1340 }, { "epoch": 0.3607696419027258, "grad_norm": 0.8484436869621277, "learning_rate": 0.00019935839939152832, "loss": 2.0399, "step": 1350 }, { "epoch": 0.3634420096205238, "grad_norm": 0.8078696727752686, "learning_rate": 0.0001993488693597245, "loss": 2.0051, "step": 1360 }, { "epoch": 0.36611437733832175, "grad_norm": 0.8645321726799011, "learning_rate": 0.00019933926930260385, "loss": 2.1165, "step": 1370 }, { "epoch": 0.36878674505611975, "grad_norm": 0.8084302544593811, "learning_rate": 0.00019932959922693287, "loss": 2.0621, "step": 1380 }, { "epoch": 0.3714591127739177, "grad_norm": 0.8464387655258179, "learning_rate": 0.00019931985913952747, "loss": 2.0896, "step": 1390 }, { "epoch": 0.3741314804917157, "grad_norm": 1.1601299047470093, "learning_rate": 0.00019931004904725285, "loss": 2.0684, "step": 1400 }, { "epoch": 0.3768038482095136, "grad_norm": 0.8167917728424072, "learning_rate": 0.0001993001689570236, "loss": 2.0864, "step": 1410 }, { "epoch": 0.3794762159273116, "grad_norm": 0.8336923718452454, "learning_rate": 0.00019929021887580368, "loss": 2.1179, "step": 1420 }, { "epoch": 0.38214858364510956, "grad_norm": 0.8231195211410522, "learning_rate": 0.00019928019881060625, "loss": 2.0374, "step": 1430 }, { "epoch": 0.38482095136290756, "grad_norm": 0.9005337953567505, "learning_rate": 0.00019927010876849387, "loss": 2.0469, "step": 1440 }, { "epoch": 0.3874933190807055, "grad_norm": 0.7575165629386902, "learning_rate": 0.0001992599487565785, "loss": 2.0316, "step": 1450 }, { "epoch": 0.3901656867985035, "grad_norm": 0.84959477186203, "learning_rate": 0.00019924971878202135, "loss": 2.064, "step": 1460 }, { "epoch": 0.39283805451630144, "grad_norm": 0.9098086357116699, "learning_rate": 0.00019923941885203287, "loss": 2.1038, "step": 1470 }, { "epoch": 0.39551042223409943, "grad_norm": 0.8135308623313904, "learning_rate": 0.00019922904897387293, "loss": 2.0406, "step": 1480 }, { "epoch": 0.3981827899518974, "grad_norm": 0.822067141532898, "learning_rate": 0.0001992186091548507, "loss": 2.0401, "step": 1490 }, { "epoch": 0.40085515766969537, "grad_norm": 0.8210283517837524, "learning_rate": 0.00019920809940232454, "loss": 2.0821, "step": 1500 }, { "epoch": 0.4035275253874933, "grad_norm": 1.090965747833252, "learning_rate": 0.00019919751972370222, "loss": 2.0746, "step": 1510 }, { "epoch": 0.4061998931052913, "grad_norm": 0.808088481426239, "learning_rate": 0.00019918687012644072, "loss": 2.0348, "step": 1520 }, { "epoch": 0.40887226082308925, "grad_norm": 0.8605575561523438, "learning_rate": 0.00019917615061804637, "loss": 2.0399, "step": 1530 }, { "epoch": 0.41154462854088725, "grad_norm": 0.8858651518821716, "learning_rate": 0.0001991653612060747, "loss": 2.0639, "step": 1540 }, { "epoch": 0.4142169962586852, "grad_norm": 0.847783625125885, "learning_rate": 0.00019915450189813053, "loss": 2.0702, "step": 1550 }, { "epoch": 0.4168893639764832, "grad_norm": 0.8495000004768372, "learning_rate": 0.000199143572701868, "loss": 2.0471, "step": 1560 }, { "epoch": 0.4195617316942811, "grad_norm": 0.8186051845550537, "learning_rate": 0.00019913257362499047, "loss": 2.0661, "step": 1570 }, { "epoch": 0.4222340994120791, "grad_norm": 0.8053857684135437, "learning_rate": 0.00019912150467525056, "loss": 2.0013, "step": 1580 }, { "epoch": 0.42490646712987706, "grad_norm": 0.8610458374023438, "learning_rate": 0.0001991103658604501, "loss": 2.0594, "step": 1590 }, { "epoch": 0.42757883484767506, "grad_norm": 0.9696066975593567, "learning_rate": 0.00019909915718844024, "loss": 2.0695, "step": 1600 }, { "epoch": 0.430251202565473, "grad_norm": 0.7572137713432312, "learning_rate": 0.00019908787866712132, "loss": 2.0141, "step": 1610 }, { "epoch": 0.432923570283271, "grad_norm": 0.809211254119873, "learning_rate": 0.00019907653030444293, "loss": 2.051, "step": 1620 }, { "epoch": 0.43559593800106894, "grad_norm": 0.8706380724906921, "learning_rate": 0.00019906511210840386, "loss": 2.076, "step": 1630 }, { "epoch": 0.43826830571886694, "grad_norm": 0.8855727910995483, "learning_rate": 0.00019905362408705214, "loss": 2.0107, "step": 1640 }, { "epoch": 0.4409406734366649, "grad_norm": 0.82815021276474, "learning_rate": 0.00019904206624848507, "loss": 1.9842, "step": 1650 }, { "epoch": 0.4436130411544629, "grad_norm": 0.8440461754798889, "learning_rate": 0.00019903043860084905, "loss": 2.0796, "step": 1660 }, { "epoch": 0.4462854088722608, "grad_norm": 0.963351309299469, "learning_rate": 0.00019901874115233975, "loss": 2.0181, "step": 1670 }, { "epoch": 0.4489577765900588, "grad_norm": 0.8633173704147339, "learning_rate": 0.000199006973911202, "loss": 2.0045, "step": 1680 }, { "epoch": 0.45163014430785675, "grad_norm": 0.8765243291854858, "learning_rate": 0.00019899513688572998, "loss": 2.1057, "step": 1690 }, { "epoch": 0.45430251202565475, "grad_norm": 0.8254087567329407, "learning_rate": 0.00019898323008426679, "loss": 2.0332, "step": 1700 }, { "epoch": 0.4569748797434527, "grad_norm": 0.8703109622001648, "learning_rate": 0.00019897125351520488, "loss": 2.0668, "step": 1710 }, { "epoch": 0.4596472474612507, "grad_norm": 0.9613226652145386, "learning_rate": 0.0001989592071869859, "loss": 2.0436, "step": 1720 }, { "epoch": 0.4623196151790486, "grad_norm": 0.844394862651825, "learning_rate": 0.00019894709110810057, "loss": 2.0382, "step": 1730 }, { "epoch": 0.4649919828968466, "grad_norm": 0.8338039517402649, "learning_rate": 0.00019893490528708884, "loss": 2.0148, "step": 1740 }, { "epoch": 0.46766435061464456, "grad_norm": 0.9704885482788086, "learning_rate": 0.0001989226497325398, "loss": 2.0642, "step": 1750 }, { "epoch": 0.47033671833244256, "grad_norm": 0.8037359118461609, "learning_rate": 0.00019891032445309165, "loss": 2.0764, "step": 1760 }, { "epoch": 0.4730090860502405, "grad_norm": 0.8697875142097473, "learning_rate": 0.00019889792945743183, "loss": 2.0119, "step": 1770 }, { "epoch": 0.4756814537680385, "grad_norm": 0.9716849327087402, "learning_rate": 0.0001988854647542968, "loss": 2.0506, "step": 1780 }, { "epoch": 0.47835382148583644, "grad_norm": 0.8439099788665771, "learning_rate": 0.00019887293035247226, "loss": 1.9582, "step": 1790 }, { "epoch": 0.48102618920363444, "grad_norm": 0.9845560193061829, "learning_rate": 0.00019886032626079296, "loss": 1.9794, "step": 1800 }, { "epoch": 0.4836985569214324, "grad_norm": 0.8697616457939148, "learning_rate": 0.00019884765248814282, "loss": 2.0714, "step": 1810 }, { "epoch": 0.4863709246392304, "grad_norm": 0.8646136522293091, "learning_rate": 0.00019883490904345485, "loss": 1.9833, "step": 1820 }, { "epoch": 0.4890432923570283, "grad_norm": 0.7895427942276001, "learning_rate": 0.00019882209593571117, "loss": 1.9911, "step": 1830 }, { "epoch": 0.4917156600748263, "grad_norm": 1.0419542789459229, "learning_rate": 0.000198809213173943, "loss": 2.0412, "step": 1840 }, { "epoch": 0.49438802779262425, "grad_norm": 0.8819153308868408, "learning_rate": 0.0001987962607672307, "loss": 2.0194, "step": 1850 }, { "epoch": 0.49706039551042225, "grad_norm": 0.8423511981964111, "learning_rate": 0.00019878323872470364, "loss": 2.0493, "step": 1860 }, { "epoch": 0.4997327632282202, "grad_norm": 0.9035742878913879, "learning_rate": 0.00019877014705554027, "loss": 2.0242, "step": 1870 }, { "epoch": 0.5024051309460181, "grad_norm": 0.7854070663452148, "learning_rate": 0.0001987569857689682, "loss": 2.016, "step": 1880 }, { "epoch": 0.5050774986638161, "grad_norm": 1.0013773441314697, "learning_rate": 0.00019874375487426412, "loss": 2.0343, "step": 1890 }, { "epoch": 0.5077498663816141, "grad_norm": 0.8524296283721924, "learning_rate": 0.00019873045438075366, "loss": 2.0316, "step": 1900 }, { "epoch": 0.5104222340994121, "grad_norm": 0.8658786416053772, "learning_rate": 0.00019871708429781157, "loss": 2.0874, "step": 1910 }, { "epoch": 0.51309460181721, "grad_norm": 0.8632771968841553, "learning_rate": 0.00019870364463486168, "loss": 1.9716, "step": 1920 }, { "epoch": 0.515766969535008, "grad_norm": 0.8511568307876587, "learning_rate": 0.00019869013540137683, "loss": 1.9946, "step": 1930 }, { "epoch": 0.518439337252806, "grad_norm": 0.8525965809822083, "learning_rate": 0.00019867655660687895, "loss": 1.9906, "step": 1940 }, { "epoch": 0.521111704970604, "grad_norm": 0.8269341588020325, "learning_rate": 0.00019866290826093885, "loss": 2.0097, "step": 1950 }, { "epoch": 0.5237840726884019, "grad_norm": 0.8655617833137512, "learning_rate": 0.00019864919037317652, "loss": 2.1078, "step": 1960 }, { "epoch": 0.5264564404061999, "grad_norm": 0.8016558289527893, "learning_rate": 0.00019863540295326094, "loss": 1.9657, "step": 1970 }, { "epoch": 0.5291288081239979, "grad_norm": 0.8438629508018494, "learning_rate": 0.00019862154601091005, "loss": 2.0081, "step": 1980 }, { "epoch": 0.5318011758417959, "grad_norm": 0.8288686275482178, "learning_rate": 0.0001986076195558908, "loss": 2.0081, "step": 1990 }, { "epoch": 0.5344735435595938, "grad_norm": 0.9016776084899902, "learning_rate": 0.00019859362359801917, "loss": 2.0128, "step": 2000 }, { "epoch": 0.5371459112773918, "grad_norm": 0.9300397634506226, "learning_rate": 0.0001985795581471601, "loss": 2.0566, "step": 2010 }, { "epoch": 0.5398182789951897, "grad_norm": 1.0317420959472656, "learning_rate": 0.0001985654232132275, "loss": 1.9624, "step": 2020 }, { "epoch": 0.5424906467129877, "grad_norm": 1.061338186264038, "learning_rate": 0.00019855121880618432, "loss": 2.1136, "step": 2030 }, { "epoch": 0.5451630144307856, "grad_norm": 0.9200817346572876, "learning_rate": 0.00019853694493604236, "loss": 2.0427, "step": 2040 }, { "epoch": 0.5478353821485836, "grad_norm": 0.925785481929779, "learning_rate": 0.00019852260161286252, "loss": 2.0383, "step": 2050 }, { "epoch": 0.5505077498663816, "grad_norm": 0.8511183857917786, "learning_rate": 0.00019850818884675452, "loss": 2.0463, "step": 2060 }, { "epoch": 0.5531801175841796, "grad_norm": 0.9213443398475647, "learning_rate": 0.00019849370664787717, "loss": 1.9861, "step": 2070 }, { "epoch": 0.5558524853019775, "grad_norm": 0.9268140196800232, "learning_rate": 0.00019847915502643808, "loss": 2.0883, "step": 2080 }, { "epoch": 0.5585248530197755, "grad_norm": 0.9962113499641418, "learning_rate": 0.00019846453399269387, "loss": 2.0392, "step": 2090 }, { "epoch": 0.5611972207375735, "grad_norm": 0.9202669262886047, "learning_rate": 0.00019844984355695008, "loss": 2.0181, "step": 2100 }, { "epoch": 0.5638695884553715, "grad_norm": 0.8881505727767944, "learning_rate": 0.00019843508372956116, "loss": 1.9855, "step": 2110 }, { "epoch": 0.5665419561731694, "grad_norm": 0.8822938799858093, "learning_rate": 0.00019842025452093044, "loss": 2.0401, "step": 2120 }, { "epoch": 0.5692143238909674, "grad_norm": 0.8415065407752991, "learning_rate": 0.00019840535594151023, "loss": 2.0081, "step": 2130 }, { "epoch": 0.5718866916087654, "grad_norm": 0.8376311659812927, "learning_rate": 0.0001983903880018016, "loss": 2.0858, "step": 2140 }, { "epoch": 0.5745590593265634, "grad_norm": 0.889597475528717, "learning_rate": 0.00019837535071235462, "loss": 2.0322, "step": 2150 }, { "epoch": 0.5772314270443613, "grad_norm": 0.8720742464065552, "learning_rate": 0.00019836024408376824, "loss": 1.9494, "step": 2160 }, { "epoch": 0.5799037947621593, "grad_norm": 0.8727856278419495, "learning_rate": 0.00019834506812669027, "loss": 2.073, "step": 2170 }, { "epoch": 0.5825761624799572, "grad_norm": 0.8532377481460571, "learning_rate": 0.00019832982285181733, "loss": 1.9606, "step": 2180 }, { "epoch": 0.5852485301977552, "grad_norm": 0.8545679450035095, "learning_rate": 0.00019831450826989495, "loss": 1.9868, "step": 2190 }, { "epoch": 0.5879208979155531, "grad_norm": 0.9709376096725464, "learning_rate": 0.00019829912439171753, "loss": 2.0459, "step": 2200 }, { "epoch": 0.5905932656333511, "grad_norm": 0.9187166094779968, "learning_rate": 0.00019828367122812823, "loss": 2.0411, "step": 2210 }, { "epoch": 0.5932656333511491, "grad_norm": 0.8447383642196655, "learning_rate": 0.0001982681487900191, "loss": 2.0448, "step": 2220 }, { "epoch": 0.5959380010689471, "grad_norm": 0.8754927515983582, "learning_rate": 0.00019825255708833108, "loss": 1.987, "step": 2230 }, { "epoch": 0.598610368786745, "grad_norm": 0.8265763521194458, "learning_rate": 0.00019823689613405383, "loss": 1.9984, "step": 2240 }, { "epoch": 0.601282736504543, "grad_norm": 0.948645830154419, "learning_rate": 0.00019822116593822586, "loss": 2.0691, "step": 2250 }, { "epoch": 0.603955104222341, "grad_norm": 0.8917912244796753, "learning_rate": 0.00019820536651193445, "loss": 2.0276, "step": 2260 }, { "epoch": 0.606627471940139, "grad_norm": 0.8749749064445496, "learning_rate": 0.00019818949786631573, "loss": 1.991, "step": 2270 }, { "epoch": 0.6092998396579369, "grad_norm": 0.8689747452735901, "learning_rate": 0.00019817356001255461, "loss": 1.979, "step": 2280 }, { "epoch": 0.6119722073757349, "grad_norm": 0.8354358673095703, "learning_rate": 0.0001981575529618848, "loss": 2.0286, "step": 2290 }, { "epoch": 0.6146445750935329, "grad_norm": 0.8581618070602417, "learning_rate": 0.0001981414767255887, "loss": 2.0085, "step": 2300 }, { "epoch": 0.6173169428113309, "grad_norm": 0.8814964890480042, "learning_rate": 0.0001981253313149975, "loss": 1.9892, "step": 2310 }, { "epoch": 0.6199893105291288, "grad_norm": 0.8790181875228882, "learning_rate": 0.00019810911674149125, "loss": 1.9553, "step": 2320 }, { "epoch": 0.6226616782469268, "grad_norm": 0.8602410554885864, "learning_rate": 0.00019809283301649866, "loss": 1.9178, "step": 2330 }, { "epoch": 0.6253340459647247, "grad_norm": 0.8457080125808716, "learning_rate": 0.0001980764801514971, "loss": 2.0387, "step": 2340 }, { "epoch": 0.6280064136825227, "grad_norm": 0.868516206741333, "learning_rate": 0.0001980600581580129, "loss": 2.0005, "step": 2350 }, { "epoch": 0.6306787814003206, "grad_norm": 0.9111327528953552, "learning_rate": 0.00019804356704762087, "loss": 2.0489, "step": 2360 }, { "epoch": 0.6333511491181186, "grad_norm": 0.8195019364356995, "learning_rate": 0.00019802700683194474, "loss": 1.9894, "step": 2370 }, { "epoch": 0.6360235168359166, "grad_norm": 0.914009153842926, "learning_rate": 0.00019801037752265675, "loss": 1.9797, "step": 2380 }, { "epoch": 0.6386958845537146, "grad_norm": 0.8143549561500549, "learning_rate": 0.00019799367913147806, "loss": 1.9689, "step": 2390 }, { "epoch": 0.6413682522715125, "grad_norm": 0.9965906143188477, "learning_rate": 0.00019797691167017833, "loss": 1.9581, "step": 2400 }, { "epoch": 0.6440406199893105, "grad_norm": 0.8981281518936157, "learning_rate": 0.000197960075150576, "loss": 2.0586, "step": 2410 }, { "epoch": 0.6467129877071085, "grad_norm": 0.911881685256958, "learning_rate": 0.00019794316958453818, "loss": 2.0806, "step": 2420 }, { "epoch": 0.6493853554249065, "grad_norm": 0.9170183539390564, "learning_rate": 0.00019792619498398064, "loss": 2.0259, "step": 2430 }, { "epoch": 0.6520577231427044, "grad_norm": 0.8699791431427002, "learning_rate": 0.00019790915136086776, "loss": 1.9954, "step": 2440 }, { "epoch": 0.6547300908605024, "grad_norm": 0.8370604515075684, "learning_rate": 0.00019789203872721265, "loss": 1.9813, "step": 2450 }, { "epoch": 0.6574024585783004, "grad_norm": 0.9251917600631714, "learning_rate": 0.000197874857095077, "loss": 1.9787, "step": 2460 }, { "epoch": 0.6600748262960984, "grad_norm": 0.8766528367996216, "learning_rate": 0.00019785760647657118, "loss": 2.037, "step": 2470 }, { "epoch": 0.6627471940138963, "grad_norm": 0.9043455719947815, "learning_rate": 0.00019784028688385415, "loss": 1.981, "step": 2480 }, { "epoch": 0.6654195617316943, "grad_norm": 0.9129969477653503, "learning_rate": 0.00019782289832913347, "loss": 1.9867, "step": 2490 }, { "epoch": 0.6680919294494923, "grad_norm": 0.9942403435707092, "learning_rate": 0.0001978054408246654, "loss": 2.057, "step": 2500 }, { "epoch": 0.6707642971672902, "grad_norm": 0.8948236107826233, "learning_rate": 0.00019778791438275465, "loss": 2.0288, "step": 2510 }, { "epoch": 0.6734366648850882, "grad_norm": 0.8486409187316895, "learning_rate": 0.0001977703190157547, "loss": 1.9786, "step": 2520 }, { "epoch": 0.6761090326028861, "grad_norm": 0.8692457675933838, "learning_rate": 0.00019775265473606743, "loss": 1.995, "step": 2530 }, { "epoch": 0.6787814003206841, "grad_norm": 0.8827071785926819, "learning_rate": 0.00019773492155614338, "loss": 2.0023, "step": 2540 }, { "epoch": 0.6814537680384821, "grad_norm": 0.9078000783920288, "learning_rate": 0.00019771711948848172, "loss": 1.9479, "step": 2550 }, { "epoch": 0.6841261357562801, "grad_norm": 0.8222280144691467, "learning_rate": 0.00019769924854563006, "loss": 1.9448, "step": 2560 }, { "epoch": 0.686798503474078, "grad_norm": 0.8683822154998779, "learning_rate": 0.00019768130874018463, "loss": 2.0054, "step": 2570 }, { "epoch": 0.689470871191876, "grad_norm": 0.9791079163551331, "learning_rate": 0.0001976633000847901, "loss": 2.0422, "step": 2580 }, { "epoch": 0.692143238909674, "grad_norm": 0.860660970211029, "learning_rate": 0.0001976452225921398, "loss": 2.0139, "step": 2590 }, { "epoch": 0.694815606627472, "grad_norm": 1.0318732261657715, "learning_rate": 0.0001976270762749755, "loss": 2.0283, "step": 2600 }, { "epoch": 0.6974879743452699, "grad_norm": 0.9548642039299011, "learning_rate": 0.00019760886114608749, "loss": 2.0183, "step": 2610 }, { "epoch": 0.7001603420630679, "grad_norm": 0.885148823261261, "learning_rate": 0.0001975905772183146, "loss": 2.006, "step": 2620 }, { "epoch": 0.7028327097808659, "grad_norm": 0.9038057327270508, "learning_rate": 0.00019757222450454408, "loss": 1.999, "step": 2630 }, { "epoch": 0.7055050774986639, "grad_norm": 0.889042317867279, "learning_rate": 0.00019755380301771176, "loss": 1.9462, "step": 2640 }, { "epoch": 0.7081774452164618, "grad_norm": 0.8829699754714966, "learning_rate": 0.0001975353127708018, "loss": 1.9913, "step": 2650 }, { "epoch": 0.7108498129342598, "grad_norm": 0.8977982401847839, "learning_rate": 0.00019751675377684702, "loss": 2.0038, "step": 2660 }, { "epoch": 0.7135221806520577, "grad_norm": 0.9305534958839417, "learning_rate": 0.0001974981260489285, "loss": 2.0484, "step": 2670 }, { "epoch": 0.7161945483698557, "grad_norm": 0.9134384989738464, "learning_rate": 0.00019747942960017588, "loss": 2.028, "step": 2680 }, { "epoch": 0.7188669160876536, "grad_norm": 0.9414751529693604, "learning_rate": 0.00019746066444376728, "loss": 2.0499, "step": 2690 }, { "epoch": 0.7215392838054516, "grad_norm": 0.9050728678703308, "learning_rate": 0.0001974418305929291, "loss": 2.0301, "step": 2700 }, { "epoch": 0.7242116515232496, "grad_norm": 0.8627575635910034, "learning_rate": 0.00019742292806093628, "loss": 1.9909, "step": 2710 }, { "epoch": 0.7268840192410476, "grad_norm": 0.9992572069168091, "learning_rate": 0.0001974039568611121, "loss": 2.0404, "step": 2720 }, { "epoch": 0.7295563869588455, "grad_norm": 0.8972439765930176, "learning_rate": 0.00019738491700682832, "loss": 1.9952, "step": 2730 }, { "epoch": 0.7322287546766435, "grad_norm": 0.8587448596954346, "learning_rate": 0.00019736580851150497, "loss": 1.9673, "step": 2740 }, { "epoch": 0.7349011223944415, "grad_norm": 0.883135199546814, "learning_rate": 0.00019734663138861057, "loss": 2.0555, "step": 2750 }, { "epoch": 0.7375734901122395, "grad_norm": 0.8620017170906067, "learning_rate": 0.00019732738565166202, "loss": 1.9871, "step": 2760 }, { "epoch": 0.7402458578300374, "grad_norm": 0.933148205280304, "learning_rate": 0.00019730807131422448, "loss": 1.9907, "step": 2770 }, { "epoch": 0.7429182255478354, "grad_norm": 0.9321724772453308, "learning_rate": 0.00019728868838991153, "loss": 1.9875, "step": 2780 }, { "epoch": 0.7455905932656334, "grad_norm": 1.0499948263168335, "learning_rate": 0.00019726923689238505, "loss": 2.0677, "step": 2790 }, { "epoch": 0.7482629609834314, "grad_norm": 0.9769105911254883, "learning_rate": 0.00019724971683535535, "loss": 2.0078, "step": 2800 }, { "epoch": 0.7509353287012293, "grad_norm": 0.9284023642539978, "learning_rate": 0.00019723012823258095, "loss": 1.9492, "step": 2810 }, { "epoch": 0.7536076964190273, "grad_norm": 0.8626641035079956, "learning_rate": 0.00019721047109786874, "loss": 1.9734, "step": 2820 }, { "epoch": 0.7562800641368252, "grad_norm": 0.8522788286209106, "learning_rate": 0.00019719074544507391, "loss": 1.9692, "step": 2830 }, { "epoch": 0.7589524318546232, "grad_norm": 0.8608167767524719, "learning_rate": 0.00019717095128809998, "loss": 1.9476, "step": 2840 }, { "epoch": 0.7616247995724211, "grad_norm": 0.9356365203857422, "learning_rate": 0.00019715108864089861, "loss": 1.9983, "step": 2850 }, { "epoch": 0.7642971672902191, "grad_norm": 0.8763227462768555, "learning_rate": 0.00019713115751746995, "loss": 1.9729, "step": 2860 }, { "epoch": 0.7669695350080171, "grad_norm": 0.9147051572799683, "learning_rate": 0.00019711115793186225, "loss": 2.009, "step": 2870 }, { "epoch": 0.7696419027258151, "grad_norm": 0.8648636937141418, "learning_rate": 0.00019709108989817212, "loss": 2.0437, "step": 2880 }, { "epoch": 0.772314270443613, "grad_norm": 1.0553714036941528, "learning_rate": 0.0001970709534305443, "loss": 2.002, "step": 2890 }, { "epoch": 0.774986638161411, "grad_norm": 0.9300885796546936, "learning_rate": 0.00019705074854317186, "loss": 1.9963, "step": 2900 }, { "epoch": 0.777659005879209, "grad_norm": 0.9071952700614929, "learning_rate": 0.00019703047525029604, "loss": 1.9934, "step": 2910 }, { "epoch": 0.780331373597007, "grad_norm": 0.9207800626754761, "learning_rate": 0.00019701013356620637, "loss": 2.0097, "step": 2920 }, { "epoch": 0.7830037413148049, "grad_norm": 0.9175180196762085, "learning_rate": 0.0001969897235052405, "loss": 2.0086, "step": 2930 }, { "epoch": 0.7856761090326029, "grad_norm": 0.8758548498153687, "learning_rate": 0.00019696924508178434, "loss": 2.0164, "step": 2940 }, { "epoch": 0.7883484767504009, "grad_norm": 0.979289174079895, "learning_rate": 0.00019694869831027192, "loss": 2.0434, "step": 2950 }, { "epoch": 0.7910208444681989, "grad_norm": 0.9176928400993347, "learning_rate": 0.0001969280832051855, "loss": 2.0593, "step": 2960 }, { "epoch": 0.7936932121859968, "grad_norm": 0.857001781463623, "learning_rate": 0.00019690739978105547, "loss": 1.9793, "step": 2970 }, { "epoch": 0.7963655799037948, "grad_norm": 0.9963621497154236, "learning_rate": 0.00019688664805246042, "loss": 2.029, "step": 2980 }, { "epoch": 0.7990379476215927, "grad_norm": 0.9742750525474548, "learning_rate": 0.000196865828034027, "loss": 2.0583, "step": 2990 }, { "epoch": 0.8017103153393907, "grad_norm": 0.9546931982040405, "learning_rate": 0.0001968449397404301, "loss": 2.0153, "step": 3000 }, { "epoch": 0.8043826830571886, "grad_norm": 0.9610984921455383, "learning_rate": 0.0001968239831863927, "loss": 1.9752, "step": 3010 }, { "epoch": 0.8070550507749866, "grad_norm": 0.8832826614379883, "learning_rate": 0.0001968029583866858, "loss": 1.9657, "step": 3020 }, { "epoch": 0.8097274184927846, "grad_norm": 0.8802211284637451, "learning_rate": 0.0001967818653561286, "loss": 2.0418, "step": 3030 }, { "epoch": 0.8123997862105826, "grad_norm": 0.9195637106895447, "learning_rate": 0.00019676070410958838, "loss": 1.9739, "step": 3040 }, { "epoch": 0.8150721539283805, "grad_norm": 1.000476598739624, "learning_rate": 0.00019673947466198048, "loss": 2.0173, "step": 3050 }, { "epoch": 0.8177445216461785, "grad_norm": 0.9482691287994385, "learning_rate": 0.00019671817702826832, "loss": 2.0132, "step": 3060 }, { "epoch": 0.8204168893639765, "grad_norm": 0.8577678799629211, "learning_rate": 0.00019669681122346338, "loss": 1.9925, "step": 3070 }, { "epoch": 0.8230892570817745, "grad_norm": 0.849473237991333, "learning_rate": 0.0001966753772626252, "loss": 2.0145, "step": 3080 }, { "epoch": 0.8257616247995724, "grad_norm": 0.8931000232696533, "learning_rate": 0.00019665387516086132, "loss": 2.0173, "step": 3090 }, { "epoch": 0.8284339925173704, "grad_norm": 0.8839173913002014, "learning_rate": 0.00019663230493332736, "loss": 1.9789, "step": 3100 }, { "epoch": 0.8311063602351684, "grad_norm": 0.8746767044067383, "learning_rate": 0.00019661066659522694, "loss": 1.9989, "step": 3110 }, { "epoch": 0.8337787279529664, "grad_norm": 0.9150108098983765, "learning_rate": 0.00019658896016181167, "loss": 1.9957, "step": 3120 }, { "epoch": 0.8364510956707643, "grad_norm": 0.929069459438324, "learning_rate": 0.0001965671856483812, "loss": 1.9754, "step": 3130 }, { "epoch": 0.8391234633885623, "grad_norm": 0.9361787438392639, "learning_rate": 0.00019654534307028307, "loss": 1.9889, "step": 3140 }, { "epoch": 0.8417958311063602, "grad_norm": 0.8942627906799316, "learning_rate": 0.0001965234324429129, "loss": 1.992, "step": 3150 }, { "epoch": 0.8444681988241582, "grad_norm": 0.9483137130737305, "learning_rate": 0.00019650145378171427, "loss": 1.9743, "step": 3160 }, { "epoch": 0.8471405665419561, "grad_norm": 0.9091803431510925, "learning_rate": 0.00019647940710217863, "loss": 1.9508, "step": 3170 }, { "epoch": 0.8498129342597541, "grad_norm": 0.961330771446228, "learning_rate": 0.00019645729241984542, "loss": 2.0216, "step": 3180 }, { "epoch": 0.8524853019775521, "grad_norm": 0.9919024109840393, "learning_rate": 0.000196435109750302, "loss": 1.9786, "step": 3190 }, { "epoch": 0.8551576696953501, "grad_norm": 0.8831027746200562, "learning_rate": 0.0001964128591091837, "loss": 1.975, "step": 3200 }, { "epoch": 0.857830037413148, "grad_norm": 0.8697969317436218, "learning_rate": 0.0001963905405121737, "loss": 1.9896, "step": 3210 }, { "epoch": 0.860502405130946, "grad_norm": 1.0299196243286133, "learning_rate": 0.00019636815397500312, "loss": 2.0273, "step": 3220 }, { "epoch": 0.863174772848744, "grad_norm": 0.9206592440605164, "learning_rate": 0.00019634569951345088, "loss": 1.9528, "step": 3230 }, { "epoch": 0.865847140566542, "grad_norm": 0.9223202466964722, "learning_rate": 0.00019632317714334395, "loss": 2.0344, "step": 3240 }, { "epoch": 0.8685195082843399, "grad_norm": 1.019234299659729, "learning_rate": 0.00019630058688055693, "loss": 1.9809, "step": 3250 }, { "epoch": 0.8711918760021379, "grad_norm": 0.8873318433761597, "learning_rate": 0.0001962779287410125, "loss": 1.959, "step": 3260 }, { "epoch": 0.8738642437199359, "grad_norm": 0.9181541800498962, "learning_rate": 0.00019625520274068104, "loss": 1.9607, "step": 3270 }, { "epoch": 0.8765366114377339, "grad_norm": 0.9755869507789612, "learning_rate": 0.0001962324088955808, "loss": 2.0121, "step": 3280 }, { "epoch": 0.8792089791555318, "grad_norm": 0.8667630553245544, "learning_rate": 0.00019620954722177785, "loss": 2.0633, "step": 3290 }, { "epoch": 0.8818813468733298, "grad_norm": 0.9352260231971741, "learning_rate": 0.00019618661773538606, "loss": 1.9954, "step": 3300 }, { "epoch": 0.8845537145911277, "grad_norm": 1.1069834232330322, "learning_rate": 0.0001961636204525672, "loss": 1.9668, "step": 3310 }, { "epoch": 0.8872260823089257, "grad_norm": 0.8498321771621704, "learning_rate": 0.00019614055538953062, "loss": 2.0374, "step": 3320 }, { "epoch": 0.8898984500267236, "grad_norm": 0.9243105053901672, "learning_rate": 0.0001961174225625336, "loss": 1.9106, "step": 3330 }, { "epoch": 0.8925708177445216, "grad_norm": 0.9041611552238464, "learning_rate": 0.00019609422198788119, "loss": 1.9412, "step": 3340 }, { "epoch": 0.8952431854623196, "grad_norm": 0.9980326294898987, "learning_rate": 0.00019607095368192609, "loss": 1.9816, "step": 3350 }, { "epoch": 0.8979155531801176, "grad_norm": 0.8987212777137756, "learning_rate": 0.00019604761766106878, "loss": 2.0317, "step": 3360 }, { "epoch": 0.9005879208979155, "grad_norm": 0.9415543675422668, "learning_rate": 0.00019602421394175753, "loss": 2.0433, "step": 3370 }, { "epoch": 0.9032602886157135, "grad_norm": 0.8867407441139221, "learning_rate": 0.00019600074254048826, "loss": 1.9821, "step": 3380 }, { "epoch": 0.9059326563335115, "grad_norm": 1.0224419832229614, "learning_rate": 0.0001959772034738046, "loss": 2.0231, "step": 3390 }, { "epoch": 0.9086050240513095, "grad_norm": 0.9306578040122986, "learning_rate": 0.0001959535967582979, "loss": 2.0645, "step": 3400 }, { "epoch": 0.9112773917691074, "grad_norm": 0.9539933204650879, "learning_rate": 0.00019592992241060717, "loss": 2.023, "step": 3410 }, { "epoch": 0.9139497594869054, "grad_norm": 1.0526412725448608, "learning_rate": 0.0001959061804474191, "loss": 2.0413, "step": 3420 }, { "epoch": 0.9166221272047034, "grad_norm": 0.9722763299942017, "learning_rate": 0.00019588237088546807, "loss": 2.0235, "step": 3430 }, { "epoch": 0.9192944949225014, "grad_norm": 0.9531739354133606, "learning_rate": 0.00019585849374153603, "loss": 1.9189, "step": 3440 }, { "epoch": 0.9219668626402993, "grad_norm": 0.8986770510673523, "learning_rate": 0.00019583454903245265, "loss": 2.0764, "step": 3450 }, { "epoch": 0.9246392303580973, "grad_norm": 0.8783401846885681, "learning_rate": 0.0001958105367750951, "loss": 1.9981, "step": 3460 }, { "epoch": 0.9273115980758952, "grad_norm": 0.8921948075294495, "learning_rate": 0.00019578645698638836, "loss": 1.9624, "step": 3470 }, { "epoch": 0.9299839657936932, "grad_norm": 0.9205301403999329, "learning_rate": 0.0001957623096833048, "loss": 1.954, "step": 3480 }, { "epoch": 0.9326563335114911, "grad_norm": 0.9025654196739197, "learning_rate": 0.00019573809488286452, "loss": 2.0084, "step": 3490 }, { "epoch": 0.9353287012292891, "grad_norm": 0.9316665530204773, "learning_rate": 0.00019571381260213508, "loss": 1.9663, "step": 3500 }, { "epoch": 0.9380010689470871, "grad_norm": 1.025264024734497, "learning_rate": 0.00019568946285823175, "loss": 2.001, "step": 3510 }, { "epoch": 0.9406734366648851, "grad_norm": 0.8893492817878723, "learning_rate": 0.0001956650456683172, "loss": 1.9919, "step": 3520 }, { "epoch": 0.943345804382683, "grad_norm": 0.9078524708747864, "learning_rate": 0.00019564056104960176, "loss": 2.0113, "step": 3530 }, { "epoch": 0.946018172100481, "grad_norm": 0.9093054533004761, "learning_rate": 0.0001956160090193432, "loss": 2.0191, "step": 3540 }, { "epoch": 0.948690539818279, "grad_norm": 0.9037675857543945, "learning_rate": 0.0001955913895948468, "loss": 1.9785, "step": 3550 }, { "epoch": 0.951362907536077, "grad_norm": 0.914854884147644, "learning_rate": 0.00019556670279346548, "loss": 2.0078, "step": 3560 }, { "epoch": 0.9540352752538749, "grad_norm": 0.9203145503997803, "learning_rate": 0.00019554194863259948, "loss": 1.9819, "step": 3570 }, { "epoch": 0.9567076429716729, "grad_norm": 0.9281837344169617, "learning_rate": 0.00019551712712969656, "loss": 1.981, "step": 3580 }, { "epoch": 0.9593800106894709, "grad_norm": 0.9327614903450012, "learning_rate": 0.00019549223830225203, "loss": 1.9994, "step": 3590 }, { "epoch": 0.9620523784072689, "grad_norm": 0.9100453853607178, "learning_rate": 0.0001954672821678086, "loss": 1.9513, "step": 3600 }, { "epoch": 0.9647247461250668, "grad_norm": 0.8855226635932922, "learning_rate": 0.0001954422587439564, "loss": 1.9675, "step": 3610 }, { "epoch": 0.9673971138428648, "grad_norm": 0.91499924659729, "learning_rate": 0.00019541716804833296, "loss": 1.9912, "step": 3620 }, { "epoch": 0.9700694815606627, "grad_norm": 0.9307335615158081, "learning_rate": 0.00019539201009862332, "loss": 1.9813, "step": 3630 }, { "epoch": 0.9727418492784607, "grad_norm": 1.0112236738204956, "learning_rate": 0.00019536678491255987, "loss": 1.9543, "step": 3640 }, { "epoch": 0.9754142169962586, "grad_norm": 0.939149022102356, "learning_rate": 0.00019534149250792238, "loss": 1.9789, "step": 3650 }, { "epoch": 0.9780865847140566, "grad_norm": 0.8903077840805054, "learning_rate": 0.00019531613290253805, "loss": 2.0364, "step": 3660 }, { "epoch": 0.9807589524318546, "grad_norm": 0.8920482397079468, "learning_rate": 0.00019529070611428137, "loss": 1.9463, "step": 3670 }, { "epoch": 0.9834313201496526, "grad_norm": 0.947891354560852, "learning_rate": 0.00019526521216107427, "loss": 1.9562, "step": 3680 }, { "epoch": 0.9861036878674505, "grad_norm": 0.9681838154792786, "learning_rate": 0.00019523965106088594, "loss": 2.0359, "step": 3690 }, { "epoch": 0.9887760555852485, "grad_norm": 0.9280781149864197, "learning_rate": 0.0001952140228317329, "loss": 1.9686, "step": 3700 }, { "epoch": 0.9914484233030465, "grad_norm": 0.9600924253463745, "learning_rate": 0.00019518832749167912, "loss": 1.9946, "step": 3710 }, { "epoch": 0.9941207910208445, "grad_norm": 0.8943674564361572, "learning_rate": 0.00019516256505883566, "loss": 1.9827, "step": 3720 }, { "epoch": 0.9967931587386424, "grad_norm": 0.9458594918251038, "learning_rate": 0.00019513673555136105, "loss": 1.9716, "step": 3730 }, { "epoch": 0.9994655264564404, "grad_norm": 0.9723443984985352, "learning_rate": 0.00019511083898746101, "loss": 1.9845, "step": 3740 }, { "epoch": 1.0021378941742385, "grad_norm": 0.9325452446937561, "learning_rate": 0.00019508487538538852, "loss": 1.8939, "step": 3750 }, { "epoch": 1.0048102618920363, "grad_norm": 0.9777805805206299, "learning_rate": 0.00019505884476344384, "loss": 1.8208, "step": 3760 }, { "epoch": 1.0074826296098343, "grad_norm": 1.0973187685012817, "learning_rate": 0.0001950327471399745, "loss": 1.8432, "step": 3770 }, { "epoch": 1.0101549973276323, "grad_norm": 1.0199981927871704, "learning_rate": 0.00019500658253337517, "loss": 1.8238, "step": 3780 }, { "epoch": 1.0128273650454303, "grad_norm": 1.0170180797576904, "learning_rate": 0.00019498035096208782, "loss": 1.8515, "step": 3790 }, { "epoch": 1.0154997327632282, "grad_norm": 0.9383100867271423, "learning_rate": 0.00019495405244460154, "loss": 1.8544, "step": 3800 }, { "epoch": 1.0181721004810262, "grad_norm": 0.9334129095077515, "learning_rate": 0.0001949276869994527, "loss": 1.8378, "step": 3810 }, { "epoch": 1.0208444681988242, "grad_norm": 1.0014691352844238, "learning_rate": 0.0001949012546452247, "loss": 1.778, "step": 3820 }, { "epoch": 1.0235168359166222, "grad_norm": 1.0079625844955444, "learning_rate": 0.00019487475540054828, "loss": 1.8765, "step": 3830 }, { "epoch": 1.02618920363442, "grad_norm": 0.9731796383857727, "learning_rate": 0.0001948481892841012, "loss": 1.8032, "step": 3840 }, { "epoch": 1.028861571352218, "grad_norm": 0.9402268528938293, "learning_rate": 0.00019482155631460836, "loss": 1.8097, "step": 3850 }, { "epoch": 1.031533939070016, "grad_norm": 1.013541340827942, "learning_rate": 0.00019479485651084186, "loss": 1.8851, "step": 3860 }, { "epoch": 1.034206306787814, "grad_norm": 1.0363236665725708, "learning_rate": 0.00019476808989162087, "loss": 1.7967, "step": 3870 }, { "epoch": 1.036878674505612, "grad_norm": 1.1777698993682861, "learning_rate": 0.00019474125647581157, "loss": 1.8259, "step": 3880 }, { "epoch": 1.03955104222341, "grad_norm": 0.9758262038230896, "learning_rate": 0.0001947143562823274, "loss": 1.8336, "step": 3890 }, { "epoch": 1.042223409941208, "grad_norm": 1.039447546005249, "learning_rate": 0.00019468738933012866, "loss": 1.8978, "step": 3900 }, { "epoch": 1.044895777659006, "grad_norm": 1.0558720827102661, "learning_rate": 0.00019466035563822285, "loss": 1.8205, "step": 3910 }, { "epoch": 1.0475681453768038, "grad_norm": 0.9531826972961426, "learning_rate": 0.0001946332552256645, "loss": 1.8302, "step": 3920 }, { "epoch": 1.0502405130946018, "grad_norm": 1.007906198501587, "learning_rate": 0.00019460608811155508, "loss": 1.8116, "step": 3930 }, { "epoch": 1.0529128808123998, "grad_norm": 1.0126383304595947, "learning_rate": 0.00019457885431504318, "loss": 1.8165, "step": 3940 }, { "epoch": 1.0555852485301978, "grad_norm": 1.0046895742416382, "learning_rate": 0.0001945515538553243, "loss": 1.86, "step": 3950 }, { "epoch": 1.0582576162479957, "grad_norm": 1.0600786209106445, "learning_rate": 0.00019452418675164096, "loss": 1.9054, "step": 3960 }, { "epoch": 1.0609299839657937, "grad_norm": 1.1300631761550903, "learning_rate": 0.0001944967530232827, "loss": 1.8817, "step": 3970 }, { "epoch": 1.0636023516835917, "grad_norm": 1.0602167844772339, "learning_rate": 0.00019446925268958598, "loss": 1.8615, "step": 3980 }, { "epoch": 1.0662747194013895, "grad_norm": 0.9714751839637756, "learning_rate": 0.00019444168576993417, "loss": 1.8389, "step": 3990 }, { "epoch": 1.0689470871191875, "grad_norm": 1.0327038764953613, "learning_rate": 0.00019441405228375763, "loss": 1.8058, "step": 4000 }, { "epoch": 1.0716194548369855, "grad_norm": 0.9951843619346619, "learning_rate": 0.00019438635225053362, "loss": 1.8708, "step": 4010 }, { "epoch": 1.0742918225547835, "grad_norm": 1.0652300119400024, "learning_rate": 0.00019435858568978633, "loss": 1.881, "step": 4020 }, { "epoch": 1.0769641902725815, "grad_norm": 1.0392547845840454, "learning_rate": 0.00019433075262108678, "loss": 1.8336, "step": 4030 }, { "epoch": 1.0796365579903795, "grad_norm": 1.0417035818099976, "learning_rate": 0.0001943028530640529, "loss": 1.8565, "step": 4040 }, { "epoch": 1.0823089257081775, "grad_norm": 0.9951125979423523, "learning_rate": 0.00019427488703834952, "loss": 1.8354, "step": 4050 }, { "epoch": 1.0849812934259755, "grad_norm": 1.0386244058609009, "learning_rate": 0.00019424685456368823, "loss": 1.838, "step": 4060 }, { "epoch": 1.0876536611437735, "grad_norm": 1.0159943103790283, "learning_rate": 0.00019421875565982756, "loss": 1.7713, "step": 4070 }, { "epoch": 1.0903260288615713, "grad_norm": 1.042913556098938, "learning_rate": 0.0001941905903465728, "loss": 1.8475, "step": 4080 }, { "epoch": 1.0929983965793693, "grad_norm": 0.9615035653114319, "learning_rate": 0.0001941623586437761, "loss": 1.8218, "step": 4090 }, { "epoch": 1.0956707642971673, "grad_norm": 1.1015328168869019, "learning_rate": 0.0001941340605713363, "loss": 1.8404, "step": 4100 }, { "epoch": 1.0983431320149653, "grad_norm": 1.030737280845642, "learning_rate": 0.0001941056961491991, "loss": 1.7929, "step": 4110 }, { "epoch": 1.1010154997327632, "grad_norm": 1.2163360118865967, "learning_rate": 0.00019407726539735705, "loss": 1.7742, "step": 4120 }, { "epoch": 1.1036878674505612, "grad_norm": 1.073459267616272, "learning_rate": 0.00019404876833584923, "loss": 1.8787, "step": 4130 }, { "epoch": 1.1063602351683592, "grad_norm": 1.069594144821167, "learning_rate": 0.0001940202049847616, "loss": 1.7977, "step": 4140 }, { "epoch": 1.1090326028861572, "grad_norm": 0.9366630911827087, "learning_rate": 0.00019399157536422693, "loss": 1.8075, "step": 4150 }, { "epoch": 1.111704970603955, "grad_norm": 1.013414978981018, "learning_rate": 0.00019396287949442447, "loss": 1.8798, "step": 4160 }, { "epoch": 1.114377338321753, "grad_norm": 0.9827794432640076, "learning_rate": 0.0001939341173955804, "loss": 1.8713, "step": 4170 }, { "epoch": 1.117049706039551, "grad_norm": 1.013396143913269, "learning_rate": 0.00019390528908796737, "loss": 1.8524, "step": 4180 }, { "epoch": 1.119722073757349, "grad_norm": 0.9573310613632202, "learning_rate": 0.00019387639459190488, "loss": 1.8323, "step": 4190 }, { "epoch": 1.122394441475147, "grad_norm": 1.0018446445465088, "learning_rate": 0.00019384743392775893, "loss": 1.7816, "step": 4200 }, { "epoch": 1.125066809192945, "grad_norm": 1.1065547466278076, "learning_rate": 0.00019381840711594234, "loss": 1.8001, "step": 4210 }, { "epoch": 1.127739176910743, "grad_norm": 1.0700459480285645, "learning_rate": 0.00019378931417691434, "loss": 1.8595, "step": 4220 }, { "epoch": 1.130411544628541, "grad_norm": 1.0059701204299927, "learning_rate": 0.00019376015513118092, "loss": 1.8351, "step": 4230 }, { "epoch": 1.1330839123463388, "grad_norm": 0.9777234792709351, "learning_rate": 0.00019373092999929462, "loss": 1.8352, "step": 4240 }, { "epoch": 1.1357562800641368, "grad_norm": 1.075574517250061, "learning_rate": 0.00019370163880185452, "loss": 1.8722, "step": 4250 }, { "epoch": 1.1384286477819348, "grad_norm": 1.0689491033554077, "learning_rate": 0.00019367228155950633, "loss": 1.8386, "step": 4260 }, { "epoch": 1.1411010154997328, "grad_norm": 1.088172197341919, "learning_rate": 0.00019364285829294228, "loss": 1.8065, "step": 4270 }, { "epoch": 1.1437733832175307, "grad_norm": 1.137303113937378, "learning_rate": 0.0001936133690229012, "loss": 1.9046, "step": 4280 }, { "epoch": 1.1464457509353287, "grad_norm": 1.072802186012268, "learning_rate": 0.0001935838137701683, "loss": 1.8399, "step": 4290 }, { "epoch": 1.1491181186531267, "grad_norm": 1.100389838218689, "learning_rate": 0.0001935541925555754, "loss": 1.8745, "step": 4300 }, { "epoch": 1.1517904863709245, "grad_norm": 1.0402289628982544, "learning_rate": 0.00019352450540000083, "loss": 1.8369, "step": 4310 }, { "epoch": 1.1544628540887225, "grad_norm": 1.0470116138458252, "learning_rate": 0.00019349475232436936, "loss": 1.8598, "step": 4320 }, { "epoch": 1.1571352218065205, "grad_norm": 1.1584430932998657, "learning_rate": 0.00019346493334965217, "loss": 1.8547, "step": 4330 }, { "epoch": 1.1598075895243185, "grad_norm": 1.0952426195144653, "learning_rate": 0.000193435048496867, "loss": 1.8756, "step": 4340 }, { "epoch": 1.1624799572421165, "grad_norm": 1.0330133438110352, "learning_rate": 0.00019340509778707792, "loss": 1.8375, "step": 4350 }, { "epoch": 1.1651523249599145, "grad_norm": 1.1242432594299316, "learning_rate": 0.00019337508124139553, "loss": 1.7711, "step": 4360 }, { "epoch": 1.1678246926777125, "grad_norm": 1.0563093423843384, "learning_rate": 0.00019334499888097674, "loss": 1.8461, "step": 4370 }, { "epoch": 1.1704970603955105, "grad_norm": 1.044492244720459, "learning_rate": 0.00019331485072702484, "loss": 1.9135, "step": 4380 }, { "epoch": 1.1731694281133085, "grad_norm": 0.9759335517883301, "learning_rate": 0.0001932846368007896, "loss": 1.8698, "step": 4390 }, { "epoch": 1.1758417958311065, "grad_norm": 1.0697232484817505, "learning_rate": 0.00019325435712356704, "loss": 1.8769, "step": 4400 }, { "epoch": 1.1785141635489043, "grad_norm": 1.0401313304901123, "learning_rate": 0.00019322401171669958, "loss": 1.9235, "step": 4410 }, { "epoch": 1.1811865312667023, "grad_norm": 1.1742342710494995, "learning_rate": 0.00019319360060157594, "loss": 1.7966, "step": 4420 }, { "epoch": 1.1838588989845003, "grad_norm": 1.048688292503357, "learning_rate": 0.00019316312379963118, "loss": 1.7849, "step": 4430 }, { "epoch": 1.1865312667022982, "grad_norm": 1.037684679031372, "learning_rate": 0.0001931325813323467, "loss": 1.8176, "step": 4440 }, { "epoch": 1.1892036344200962, "grad_norm": 1.0703048706054688, "learning_rate": 0.00019310197322125006, "loss": 1.8587, "step": 4450 }, { "epoch": 1.1918760021378942, "grad_norm": 0.9846740365028381, "learning_rate": 0.0001930712994879152, "loss": 1.8726, "step": 4460 }, { "epoch": 1.1945483698556922, "grad_norm": 1.0052661895751953, "learning_rate": 0.0001930405601539622, "loss": 1.8343, "step": 4470 }, { "epoch": 1.19722073757349, "grad_norm": 1.0184731483459473, "learning_rate": 0.00019300975524105758, "loss": 1.8112, "step": 4480 }, { "epoch": 1.199893105291288, "grad_norm": 1.0682815313339233, "learning_rate": 0.00019297888477091388, "loss": 1.8566, "step": 4490 }, { "epoch": 1.202565473009086, "grad_norm": 0.9575110077857971, "learning_rate": 0.00019294794876528992, "loss": 1.8807, "step": 4500 }, { "epoch": 1.205237840726884, "grad_norm": 1.0943394899368286, "learning_rate": 0.00019291694724599072, "loss": 1.8857, "step": 4510 }, { "epoch": 1.207910208444682, "grad_norm": 1.1048181056976318, "learning_rate": 0.00019288588023486748, "loss": 1.8038, "step": 4520 }, { "epoch": 1.21058257616248, "grad_norm": 1.0477944612503052, "learning_rate": 0.00019285474775381759, "loss": 1.8832, "step": 4530 }, { "epoch": 1.213254943880278, "grad_norm": 1.0154651403427124, "learning_rate": 0.00019282354982478445, "loss": 1.8237, "step": 4540 }, { "epoch": 1.215927311598076, "grad_norm": 1.1119800806045532, "learning_rate": 0.00019279228646975778, "loss": 1.8808, "step": 4550 }, { "epoch": 1.218599679315874, "grad_norm": 1.0663464069366455, "learning_rate": 0.0001927609577107733, "loss": 1.8468, "step": 4560 }, { "epoch": 1.2212720470336718, "grad_norm": 1.007313847541809, "learning_rate": 0.00019272956356991284, "loss": 1.7839, "step": 4570 }, { "epoch": 1.2239444147514698, "grad_norm": 1.0278741121292114, "learning_rate": 0.00019269810406930438, "loss": 1.8177, "step": 4580 }, { "epoch": 1.2266167824692678, "grad_norm": 1.0416043996810913, "learning_rate": 0.00019266657923112184, "loss": 1.8579, "step": 4590 }, { "epoch": 1.2292891501870657, "grad_norm": 1.0626152753829956, "learning_rate": 0.0001926349890775853, "loss": 1.8608, "step": 4600 }, { "epoch": 1.2319615179048637, "grad_norm": 1.106374740600586, "learning_rate": 0.00019260333363096087, "loss": 1.8846, "step": 4610 }, { "epoch": 1.2346338856226617, "grad_norm": 1.1124606132507324, "learning_rate": 0.00019257161291356064, "loss": 1.8111, "step": 4620 }, { "epoch": 1.2373062533404597, "grad_norm": 1.074202299118042, "learning_rate": 0.0001925398269477427, "loss": 1.9275, "step": 4630 }, { "epoch": 1.2399786210582575, "grad_norm": 0.9949113726615906, "learning_rate": 0.0001925079757559112, "loss": 1.8803, "step": 4640 }, { "epoch": 1.2426509887760555, "grad_norm": 1.1150023937225342, "learning_rate": 0.00019247605936051617, "loss": 1.8573, "step": 4650 }, { "epoch": 1.2453233564938535, "grad_norm": 1.0155227184295654, "learning_rate": 0.00019244407778405372, "loss": 1.8604, "step": 4660 }, { "epoch": 1.2479957242116515, "grad_norm": 1.0073447227478027, "learning_rate": 0.0001924120310490657, "loss": 1.8634, "step": 4670 }, { "epoch": 1.2506680919294495, "grad_norm": 1.0564298629760742, "learning_rate": 0.00019237991917814013, "loss": 1.8315, "step": 4680 }, { "epoch": 1.2533404596472475, "grad_norm": 1.0739296674728394, "learning_rate": 0.00019234774219391082, "loss": 1.8478, "step": 4690 }, { "epoch": 1.2560128273650455, "grad_norm": 1.1494308710098267, "learning_rate": 0.0001923155001190574, "loss": 1.8246, "step": 4700 }, { "epoch": 1.2586851950828435, "grad_norm": 1.0917733907699585, "learning_rate": 0.0001922831929763055, "loss": 1.8558, "step": 4710 }, { "epoch": 1.2613575628006415, "grad_norm": 1.0394524335861206, "learning_rate": 0.0001922508207884266, "loss": 1.8838, "step": 4720 }, { "epoch": 1.2640299305184393, "grad_norm": 1.042967438697815, "learning_rate": 0.000192218383578238, "loss": 1.8862, "step": 4730 }, { "epoch": 1.2667022982362373, "grad_norm": 1.2198184728622437, "learning_rate": 0.00019218588136860274, "loss": 1.9148, "step": 4740 }, { "epoch": 1.2693746659540353, "grad_norm": 1.0503358840942383, "learning_rate": 0.00019215331418242988, "loss": 1.7926, "step": 4750 }, { "epoch": 1.2720470336718332, "grad_norm": 1.0391160249710083, "learning_rate": 0.00019212068204267407, "loss": 1.8469, "step": 4760 }, { "epoch": 1.2747194013896312, "grad_norm": 1.2048615217208862, "learning_rate": 0.00019208798497233586, "loss": 1.9291, "step": 4770 }, { "epoch": 1.2773917691074292, "grad_norm": 1.1003245115280151, "learning_rate": 0.00019205522299446158, "loss": 1.8386, "step": 4780 }, { "epoch": 1.280064136825227, "grad_norm": 1.0906586647033691, "learning_rate": 0.0001920223961321432, "loss": 1.8626, "step": 4790 }, { "epoch": 1.282736504543025, "grad_norm": 1.0515018701553345, "learning_rate": 0.00019198950440851853, "loss": 1.8482, "step": 4800 }, { "epoch": 1.285408872260823, "grad_norm": 1.0668543577194214, "learning_rate": 0.00019195654784677106, "loss": 1.8351, "step": 4810 }, { "epoch": 1.288081239978621, "grad_norm": 1.0039379596710205, "learning_rate": 0.00019192352647012995, "loss": 1.8304, "step": 4820 }, { "epoch": 1.290753607696419, "grad_norm": 1.0450323820114136, "learning_rate": 0.00019189044030187017, "loss": 1.8702, "step": 4830 }, { "epoch": 1.293425975414217, "grad_norm": 1.1013343334197998, "learning_rate": 0.0001918572893653121, "loss": 1.8401, "step": 4840 }, { "epoch": 1.296098343132015, "grad_norm": 1.148041009902954, "learning_rate": 0.00019182407368382206, "loss": 1.9516, "step": 4850 }, { "epoch": 1.298770710849813, "grad_norm": 1.0496457815170288, "learning_rate": 0.00019179079328081184, "loss": 1.8738, "step": 4860 }, { "epoch": 1.301443078567611, "grad_norm": 1.0360918045043945, "learning_rate": 0.00019175744817973887, "loss": 1.8597, "step": 4870 }, { "epoch": 1.304115446285409, "grad_norm": 1.1782798767089844, "learning_rate": 0.00019172403840410625, "loss": 1.8483, "step": 4880 }, { "epoch": 1.3067878140032068, "grad_norm": 1.0753065347671509, "learning_rate": 0.00019169056397746255, "loss": 1.8699, "step": 4890 }, { "epoch": 1.3094601817210048, "grad_norm": 1.0449695587158203, "learning_rate": 0.000191657024923402, "loss": 1.8936, "step": 4900 }, { "epoch": 1.3121325494388028, "grad_norm": 1.0549265146255493, "learning_rate": 0.00019162342126556435, "loss": 1.8174, "step": 4910 }, { "epoch": 1.3148049171566007, "grad_norm": 1.0759707689285278, "learning_rate": 0.0001915897530276349, "loss": 1.8916, "step": 4920 }, { "epoch": 1.3174772848743987, "grad_norm": 1.0495866537094116, "learning_rate": 0.00019155602023334444, "loss": 1.829, "step": 4930 }, { "epoch": 1.3201496525921967, "grad_norm": 1.112349271774292, "learning_rate": 0.0001915222229064693, "loss": 1.8413, "step": 4940 }, { "epoch": 1.3228220203099945, "grad_norm": 1.011779546737671, "learning_rate": 0.00019148836107083127, "loss": 1.9259, "step": 4950 }, { "epoch": 1.3254943880277925, "grad_norm": 1.1030875444412231, "learning_rate": 0.0001914544347502976, "loss": 1.8494, "step": 4960 }, { "epoch": 1.3281667557455905, "grad_norm": 1.1434781551361084, "learning_rate": 0.00019142044396878104, "loss": 1.9277, "step": 4970 }, { "epoch": 1.3308391234633885, "grad_norm": 1.0343862771987915, "learning_rate": 0.00019138638875023972, "loss": 1.8284, "step": 4980 }, { "epoch": 1.3335114911811865, "grad_norm": 1.0611770153045654, "learning_rate": 0.00019135226911867718, "loss": 1.8642, "step": 4990 }, { "epoch": 1.3361838588989845, "grad_norm": 0.9910778999328613, "learning_rate": 0.00019131808509814245, "loss": 1.8258, "step": 5000 }, { "epoch": 1.3388562266167825, "grad_norm": 1.0867102146148682, "learning_rate": 0.00019128383671272988, "loss": 1.8897, "step": 5010 }, { "epoch": 1.3415285943345805, "grad_norm": 1.124879002571106, "learning_rate": 0.0001912495239865791, "loss": 1.8994, "step": 5020 }, { "epoch": 1.3442009620523785, "grad_norm": 1.1122723817825317, "learning_rate": 0.0001912151469438753, "loss": 1.9291, "step": 5030 }, { "epoch": 1.3468733297701765, "grad_norm": 1.0620670318603516, "learning_rate": 0.00019118070560884885, "loss": 1.8314, "step": 5040 }, { "epoch": 1.3495456974879745, "grad_norm": 1.0552860498428345, "learning_rate": 0.0001911462000057754, "loss": 1.7935, "step": 5050 }, { "epoch": 1.3522180652057723, "grad_norm": 1.136927843093872, "learning_rate": 0.00019111163015897607, "loss": 1.8598, "step": 5060 }, { "epoch": 1.3548904329235703, "grad_norm": 1.0880241394042969, "learning_rate": 0.0001910769960928171, "loss": 1.8243, "step": 5070 }, { "epoch": 1.3575628006413683, "grad_norm": 1.0525450706481934, "learning_rate": 0.00019104229783171005, "loss": 1.8203, "step": 5080 }, { "epoch": 1.3602351683591662, "grad_norm": 1.1240369081497192, "learning_rate": 0.0001910075354001118, "loss": 1.8641, "step": 5090 }, { "epoch": 1.3629075360769642, "grad_norm": 1.0436363220214844, "learning_rate": 0.0001909727088225243, "loss": 1.8671, "step": 5100 }, { "epoch": 1.3655799037947622, "grad_norm": 1.040597677230835, "learning_rate": 0.00019093781812349486, "loss": 1.8052, "step": 5110 }, { "epoch": 1.36825227151256, "grad_norm": 0.9839260578155518, "learning_rate": 0.00019090286332761592, "loss": 1.8177, "step": 5120 }, { "epoch": 1.370924639230358, "grad_norm": 1.1703975200653076, "learning_rate": 0.00019086784445952515, "loss": 1.8334, "step": 5130 }, { "epoch": 1.373597006948156, "grad_norm": 1.0526046752929688, "learning_rate": 0.0001908327615439053, "loss": 1.7837, "step": 5140 }, { "epoch": 1.376269374665954, "grad_norm": 1.1150929927825928, "learning_rate": 0.00019079761460548427, "loss": 1.8717, "step": 5150 }, { "epoch": 1.378941742383752, "grad_norm": 1.211668848991394, "learning_rate": 0.00019076240366903518, "loss": 1.8667, "step": 5160 }, { "epoch": 1.38161411010155, "grad_norm": 1.0549869537353516, "learning_rate": 0.0001907271287593762, "loss": 1.8025, "step": 5170 }, { "epoch": 1.384286477819348, "grad_norm": 1.2277272939682007, "learning_rate": 0.00019069178990137054, "loss": 1.8574, "step": 5180 }, { "epoch": 1.386958845537146, "grad_norm": 1.0361557006835938, "learning_rate": 0.00019065638711992654, "loss": 1.8309, "step": 5190 }, { "epoch": 1.389631213254944, "grad_norm": 1.081335425376892, "learning_rate": 0.00019062092043999767, "loss": 1.8703, "step": 5200 }, { "epoch": 1.392303580972742, "grad_norm": 1.1670414209365845, "learning_rate": 0.00019058538988658223, "loss": 1.9426, "step": 5210 }, { "epoch": 1.3949759486905398, "grad_norm": 1.1812220811843872, "learning_rate": 0.00019054979548472378, "loss": 1.8306, "step": 5220 }, { "epoch": 1.3976483164083378, "grad_norm": 1.1016520261764526, "learning_rate": 0.0001905141372595107, "loss": 1.8894, "step": 5230 }, { "epoch": 1.4003206841261358, "grad_norm": 1.078292727470398, "learning_rate": 0.00019047841523607648, "loss": 1.822, "step": 5240 }, { "epoch": 1.4029930518439337, "grad_norm": 1.0356316566467285, "learning_rate": 0.00019044262943959949, "loss": 1.8315, "step": 5250 }, { "epoch": 1.4056654195617317, "grad_norm": 1.0294471979141235, "learning_rate": 0.00019040677989530305, "loss": 1.8875, "step": 5260 }, { "epoch": 1.4083377872795297, "grad_norm": 1.043329119682312, "learning_rate": 0.0001903708666284555, "loss": 1.8544, "step": 5270 }, { "epoch": 1.4110101549973275, "grad_norm": 1.1735234260559082, "learning_rate": 0.00019033488966437004, "loss": 1.8749, "step": 5280 }, { "epoch": 1.4136825227151255, "grad_norm": 1.0429078340530396, "learning_rate": 0.00019029884902840478, "loss": 1.8523, "step": 5290 }, { "epoch": 1.4163548904329235, "grad_norm": 1.1267577409744263, "learning_rate": 0.00019026274474596267, "loss": 1.8811, "step": 5300 }, { "epoch": 1.4190272581507215, "grad_norm": 1.157902717590332, "learning_rate": 0.00019022657684249152, "loss": 1.8161, "step": 5310 }, { "epoch": 1.4216996258685195, "grad_norm": 1.0437076091766357, "learning_rate": 0.00019019034534348408, "loss": 1.8793, "step": 5320 }, { "epoch": 1.4243719935863175, "grad_norm": 0.9943877458572388, "learning_rate": 0.00019015405027447777, "loss": 1.8072, "step": 5330 }, { "epoch": 1.4270443613041155, "grad_norm": 1.1403636932373047, "learning_rate": 0.00019011769166105496, "loss": 1.9485, "step": 5340 }, { "epoch": 1.4297167290219135, "grad_norm": 1.0246118307113647, "learning_rate": 0.00019008126952884272, "loss": 1.9087, "step": 5350 }, { "epoch": 1.4323890967397115, "grad_norm": 1.1130748987197876, "learning_rate": 0.00019004478390351296, "loss": 1.8049, "step": 5360 }, { "epoch": 1.4350614644575095, "grad_norm": 1.1769757270812988, "learning_rate": 0.00019000823481078228, "loss": 1.8204, "step": 5370 }, { "epoch": 1.4377338321753073, "grad_norm": 1.2379077672958374, "learning_rate": 0.00018997162227641203, "loss": 1.8298, "step": 5380 }, { "epoch": 1.4404061998931053, "grad_norm": 1.1044251918792725, "learning_rate": 0.0001899349463262083, "loss": 1.9136, "step": 5390 }, { "epoch": 1.4430785676109033, "grad_norm": 1.0281884670257568, "learning_rate": 0.00018989820698602183, "loss": 1.846, "step": 5400 }, { "epoch": 1.4457509353287012, "grad_norm": 1.1197541952133179, "learning_rate": 0.0001898614042817481, "loss": 1.8016, "step": 5410 }, { "epoch": 1.4484233030464992, "grad_norm": 1.0418063402175903, "learning_rate": 0.00018982453823932722, "loss": 1.8255, "step": 5420 }, { "epoch": 1.4510956707642972, "grad_norm": 1.0735664367675781, "learning_rate": 0.00018978760888474397, "loss": 1.8143, "step": 5430 }, { "epoch": 1.453768038482095, "grad_norm": 1.1447256803512573, "learning_rate": 0.00018975061624402768, "loss": 1.8316, "step": 5440 }, { "epoch": 1.456440406199893, "grad_norm": 1.2337983846664429, "learning_rate": 0.0001897135603432524, "loss": 1.8917, "step": 5450 }, { "epoch": 1.459112773917691, "grad_norm": 1.085819959640503, "learning_rate": 0.00018967644120853667, "loss": 1.8752, "step": 5460 }, { "epoch": 1.461785141635489, "grad_norm": 1.1256166696548462, "learning_rate": 0.00018963925886604366, "loss": 1.9224, "step": 5470 }, { "epoch": 1.464457509353287, "grad_norm": 1.051095724105835, "learning_rate": 0.00018960201334198104, "loss": 1.7973, "step": 5480 }, { "epoch": 1.467129877071085, "grad_norm": 1.1790239810943604, "learning_rate": 0.00018956470466260106, "loss": 1.8763, "step": 5490 }, { "epoch": 1.469802244788883, "grad_norm": 1.0808390378952026, "learning_rate": 0.0001895273328542005, "loss": 1.8333, "step": 5500 }, { "epoch": 1.472474612506681, "grad_norm": 1.176282525062561, "learning_rate": 0.0001894898979431205, "loss": 1.8483, "step": 5510 }, { "epoch": 1.475146980224479, "grad_norm": 1.1892223358154297, "learning_rate": 0.0001894523999557469, "loss": 1.8969, "step": 5520 }, { "epoch": 1.477819347942277, "grad_norm": 1.2082583904266357, "learning_rate": 0.00018941483891850983, "loss": 1.9246, "step": 5530 }, { "epoch": 1.4804917156600748, "grad_norm": 1.0708314180374146, "learning_rate": 0.00018937721485788388, "loss": 1.8795, "step": 5540 }, { "epoch": 1.4831640833778728, "grad_norm": 1.1821789741516113, "learning_rate": 0.00018933952780038815, "loss": 1.8206, "step": 5550 }, { "epoch": 1.4858364510956708, "grad_norm": 1.065481424331665, "learning_rate": 0.00018930177777258604, "loss": 1.8698, "step": 5560 }, { "epoch": 1.4885088188134687, "grad_norm": 1.157880425453186, "learning_rate": 0.00018926396480108538, "loss": 1.8601, "step": 5570 }, { "epoch": 1.4911811865312667, "grad_norm": 1.1690573692321777, "learning_rate": 0.00018922608891253843, "loss": 1.8147, "step": 5580 }, { "epoch": 1.4938535542490647, "grad_norm": 1.0936037302017212, "learning_rate": 0.00018918815013364162, "loss": 1.8651, "step": 5590 }, { "epoch": 1.4965259219668625, "grad_norm": 1.087705135345459, "learning_rate": 0.0001891501484911359, "loss": 1.8107, "step": 5600 }, { "epoch": 1.4991982896846605, "grad_norm": 1.171329140663147, "learning_rate": 0.00018911208401180642, "loss": 1.9208, "step": 5610 }, { "epoch": 1.5018706574024585, "grad_norm": 1.0548722743988037, "learning_rate": 0.00018907395672248267, "loss": 1.8568, "step": 5620 }, { "epoch": 1.5045430251202565, "grad_norm": 1.0758652687072754, "learning_rate": 0.00018903576665003832, "loss": 1.8277, "step": 5630 }, { "epoch": 1.5072153928380545, "grad_norm": 1.0779211521148682, "learning_rate": 0.00018899751382139143, "loss": 1.8602, "step": 5640 }, { "epoch": 1.5098877605558525, "grad_norm": 1.063739538192749, "learning_rate": 0.0001889591982635042, "loss": 1.8677, "step": 5650 }, { "epoch": 1.5125601282736505, "grad_norm": 1.0633587837219238, "learning_rate": 0.0001889208200033831, "loss": 1.8956, "step": 5660 }, { "epoch": 1.5152324959914485, "grad_norm": 1.100932240486145, "learning_rate": 0.0001888823790680787, "loss": 1.816, "step": 5670 }, { "epoch": 1.5179048637092465, "grad_norm": 1.0497140884399414, "learning_rate": 0.00018884387548468587, "loss": 1.883, "step": 5680 }, { "epoch": 1.5205772314270445, "grad_norm": 1.1753191947937012, "learning_rate": 0.00018880530928034353, "loss": 1.8177, "step": 5690 }, { "epoch": 1.5232495991448425, "grad_norm": 1.0253231525421143, "learning_rate": 0.00018876668048223478, "loss": 1.8721, "step": 5700 }, { "epoch": 1.5259219668626403, "grad_norm": 1.1670130491256714, "learning_rate": 0.0001887279891175869, "loss": 1.8963, "step": 5710 }, { "epoch": 1.5285943345804383, "grad_norm": 1.018936038017273, "learning_rate": 0.00018868923521367118, "loss": 1.8449, "step": 5720 }, { "epoch": 1.5312667022982362, "grad_norm": 1.0974923372268677, "learning_rate": 0.000188650418797803, "loss": 1.9107, "step": 5730 }, { "epoch": 1.5339390700160342, "grad_norm": 1.0885361433029175, "learning_rate": 0.00018861153989734185, "loss": 1.8306, "step": 5740 }, { "epoch": 1.536611437733832, "grad_norm": 1.2174198627471924, "learning_rate": 0.00018857259853969125, "loss": 1.8493, "step": 5750 }, { "epoch": 1.53928380545163, "grad_norm": 1.1191515922546387, "learning_rate": 0.0001885335947522987, "loss": 1.7669, "step": 5760 }, { "epoch": 1.541956173169428, "grad_norm": 1.0823826789855957, "learning_rate": 0.00018849452856265574, "loss": 1.8432, "step": 5770 }, { "epoch": 1.544628540887226, "grad_norm": 1.1358907222747803, "learning_rate": 0.00018845539999829788, "loss": 1.8698, "step": 5780 }, { "epoch": 1.547300908605024, "grad_norm": 1.1223632097244263, "learning_rate": 0.00018841620908680463, "loss": 1.8296, "step": 5790 }, { "epoch": 1.549973276322822, "grad_norm": 1.072277545928955, "learning_rate": 0.00018837695585579938, "loss": 1.8513, "step": 5800 }, { "epoch": 1.55264564404062, "grad_norm": 1.0672662258148193, "learning_rate": 0.00018833764033294946, "loss": 1.8718, "step": 5810 }, { "epoch": 1.555318011758418, "grad_norm": 1.0297456979751587, "learning_rate": 0.00018829826254596621, "loss": 1.8659, "step": 5820 }, { "epoch": 1.557990379476216, "grad_norm": 1.0709869861602783, "learning_rate": 0.00018825882252260467, "loss": 1.7967, "step": 5830 }, { "epoch": 1.560662747194014, "grad_norm": 1.1051650047302246, "learning_rate": 0.0001882193202906639, "loss": 1.838, "step": 5840 }, { "epoch": 1.563335114911812, "grad_norm": 1.1122859716415405, "learning_rate": 0.0001881797558779868, "loss": 1.8849, "step": 5850 }, { "epoch": 1.56600748262961, "grad_norm": 1.1311308145523071, "learning_rate": 0.00018814012931246, "loss": 1.871, "step": 5860 }, { "epoch": 1.5686798503474078, "grad_norm": 1.0583769083023071, "learning_rate": 0.00018810044062201396, "loss": 1.8633, "step": 5870 }, { "epoch": 1.5713522180652058, "grad_norm": 1.008792519569397, "learning_rate": 0.00018806068983462303, "loss": 1.9072, "step": 5880 }, { "epoch": 1.5740245857830037, "grad_norm": 1.4484059810638428, "learning_rate": 0.00018802087697830522, "loss": 1.8135, "step": 5890 }, { "epoch": 1.5766969535008017, "grad_norm": 1.1241083145141602, "learning_rate": 0.00018798100208112234, "loss": 1.7813, "step": 5900 }, { "epoch": 1.5793693212185995, "grad_norm": 1.1186484098434448, "learning_rate": 0.00018794106517117993, "loss": 1.8517, "step": 5910 }, { "epoch": 1.5820416889363975, "grad_norm": 1.09034264087677, "learning_rate": 0.0001879010662766272, "loss": 1.8193, "step": 5920 }, { "epoch": 1.5847140566541955, "grad_norm": 1.064842700958252, "learning_rate": 0.0001878610054256571, "loss": 1.8343, "step": 5930 }, { "epoch": 1.5873864243719935, "grad_norm": 1.1788545846939087, "learning_rate": 0.00018782088264650618, "loss": 1.8777, "step": 5940 }, { "epoch": 1.5900587920897915, "grad_norm": 1.1238136291503906, "learning_rate": 0.00018778069796745474, "loss": 1.8223, "step": 5950 }, { "epoch": 1.5927311598075895, "grad_norm": 1.1837947368621826, "learning_rate": 0.00018774045141682662, "loss": 1.8243, "step": 5960 }, { "epoch": 1.5954035275253875, "grad_norm": 1.0124212503433228, "learning_rate": 0.00018770014302298932, "loss": 1.7579, "step": 5970 }, { "epoch": 1.5980758952431855, "grad_norm": 1.0607494115829468, "learning_rate": 0.00018765977281435392, "loss": 1.885, "step": 5980 }, { "epoch": 1.6007482629609835, "grad_norm": 1.0666025876998901, "learning_rate": 0.00018761934081937504, "loss": 1.9027, "step": 5990 }, { "epoch": 1.6034206306787815, "grad_norm": 1.1201056241989136, "learning_rate": 0.00018757884706655087, "loss": 1.8567, "step": 6000 }, { "epoch": 1.6060929983965795, "grad_norm": 1.068117380142212, "learning_rate": 0.00018753829158442314, "loss": 1.8151, "step": 6010 }, { "epoch": 1.6087653661143775, "grad_norm": 1.1987791061401367, "learning_rate": 0.0001874976744015771, "loss": 1.9186, "step": 6020 }, { "epoch": 1.6114377338321753, "grad_norm": 1.2409660816192627, "learning_rate": 0.00018745699554664148, "loss": 1.869, "step": 6030 }, { "epoch": 1.6141101015499733, "grad_norm": 1.100341558456421, "learning_rate": 0.00018741625504828843, "loss": 1.8966, "step": 6040 }, { "epoch": 1.6167824692677712, "grad_norm": 1.1203241348266602, "learning_rate": 0.00018737545293523363, "loss": 1.7927, "step": 6050 }, { "epoch": 1.6194548369855692, "grad_norm": 1.1305533647537231, "learning_rate": 0.0001873345892362361, "loss": 1.7844, "step": 6060 }, { "epoch": 1.622127204703367, "grad_norm": 1.1537564992904663, "learning_rate": 0.00018729366398009836, "loss": 1.928, "step": 6070 }, { "epoch": 1.624799572421165, "grad_norm": 1.112511396408081, "learning_rate": 0.00018725267719566627, "loss": 1.7971, "step": 6080 }, { "epoch": 1.627471940138963, "grad_norm": 1.2046387195587158, "learning_rate": 0.00018721162891182903, "loss": 1.8557, "step": 6090 }, { "epoch": 1.630144307856761, "grad_norm": 1.129660964012146, "learning_rate": 0.00018717051915751926, "loss": 1.8919, "step": 6100 }, { "epoch": 1.632816675574559, "grad_norm": 1.0578484535217285, "learning_rate": 0.00018712934796171285, "loss": 1.8957, "step": 6110 }, { "epoch": 1.635489043292357, "grad_norm": 1.1723358631134033, "learning_rate": 0.00018708811535342898, "loss": 1.9637, "step": 6120 }, { "epoch": 1.638161411010155, "grad_norm": 0.9757607579231262, "learning_rate": 0.0001870468213617302, "loss": 1.8283, "step": 6130 }, { "epoch": 1.640833778727953, "grad_norm": 1.2090801000595093, "learning_rate": 0.00018700546601572223, "loss": 1.8349, "step": 6140 }, { "epoch": 1.643506146445751, "grad_norm": 1.1274935007095337, "learning_rate": 0.00018696404934455415, "loss": 1.9076, "step": 6150 }, { "epoch": 1.646178514163549, "grad_norm": 1.1622339487075806, "learning_rate": 0.00018692257137741813, "loss": 1.9078, "step": 6160 }, { "epoch": 1.648850881881347, "grad_norm": 1.0929253101348877, "learning_rate": 0.00018688103214354964, "loss": 1.8216, "step": 6170 }, { "epoch": 1.651523249599145, "grad_norm": 1.1966350078582764, "learning_rate": 0.0001868394316722273, "loss": 1.8581, "step": 6180 }, { "epoch": 1.6541956173169428, "grad_norm": 1.1206058263778687, "learning_rate": 0.00018679776999277285, "loss": 1.9012, "step": 6190 }, { "epoch": 1.6568679850347408, "grad_norm": 1.1007308959960938, "learning_rate": 0.00018675604713455125, "loss": 1.8622, "step": 6200 }, { "epoch": 1.6595403527525387, "grad_norm": 1.0664573907852173, "learning_rate": 0.00018671426312697059, "loss": 1.8181, "step": 6210 }, { "epoch": 1.6622127204703367, "grad_norm": 1.0425337553024292, "learning_rate": 0.00018667241799948196, "loss": 1.8398, "step": 6220 }, { "epoch": 1.6648850881881345, "grad_norm": 1.132024884223938, "learning_rate": 0.00018663051178157965, "loss": 1.8853, "step": 6230 }, { "epoch": 1.6675574559059325, "grad_norm": 1.1238131523132324, "learning_rate": 0.00018658854450280085, "loss": 1.8376, "step": 6240 }, { "epoch": 1.6702298236237305, "grad_norm": 1.1062246561050415, "learning_rate": 0.00018654651619272596, "loss": 1.8883, "step": 6250 }, { "epoch": 1.6729021913415285, "grad_norm": 1.114938497543335, "learning_rate": 0.0001865044268809783, "loss": 1.8638, "step": 6260 }, { "epoch": 1.6755745590593265, "grad_norm": 1.092271089553833, "learning_rate": 0.0001864622765972243, "loss": 1.8254, "step": 6270 }, { "epoch": 1.6782469267771245, "grad_norm": 1.424766182899475, "learning_rate": 0.0001864200653711731, "loss": 1.8943, "step": 6280 }, { "epoch": 1.6809192944949225, "grad_norm": 1.1574349403381348, "learning_rate": 0.0001863777932325771, "loss": 1.9185, "step": 6290 }, { "epoch": 1.6835916622127205, "grad_norm": 1.229568600654602, "learning_rate": 0.00018633546021123147, "loss": 1.8111, "step": 6300 }, { "epoch": 1.6862640299305185, "grad_norm": 1.1014653444290161, "learning_rate": 0.00018629306633697433, "loss": 1.8791, "step": 6310 }, { "epoch": 1.6889363976483165, "grad_norm": 1.1644926071166992, "learning_rate": 0.0001862506116396867, "loss": 1.8766, "step": 6320 }, { "epoch": 1.6916087653661145, "grad_norm": 1.1016088724136353, "learning_rate": 0.00018620809614929243, "loss": 1.8591, "step": 6330 }, { "epoch": 1.6942811330839125, "grad_norm": 1.0894542932510376, "learning_rate": 0.00018616551989575827, "loss": 1.9127, "step": 6340 }, { "epoch": 1.6969535008017105, "grad_norm": 1.157467246055603, "learning_rate": 0.00018612288290909378, "loss": 1.8365, "step": 6350 }, { "epoch": 1.6996258685195083, "grad_norm": 1.1508746147155762, "learning_rate": 0.0001860801852193513, "loss": 1.8592, "step": 6360 }, { "epoch": 1.7022982362373063, "grad_norm": 1.2317012548446655, "learning_rate": 0.00018603742685662606, "loss": 1.867, "step": 6370 }, { "epoch": 1.7049706039551042, "grad_norm": 1.1163196563720703, "learning_rate": 0.00018599460785105587, "loss": 1.9096, "step": 6380 }, { "epoch": 1.7076429716729022, "grad_norm": 1.0866283178329468, "learning_rate": 0.00018595172823282147, "loss": 1.904, "step": 6390 }, { "epoch": 1.7103153393907, "grad_norm": 1.0890675783157349, "learning_rate": 0.00018590878803214617, "loss": 1.8342, "step": 6400 }, { "epoch": 1.712987707108498, "grad_norm": 1.1632815599441528, "learning_rate": 0.0001858657872792961, "loss": 1.806, "step": 6410 }, { "epoch": 1.715660074826296, "grad_norm": 1.287760853767395, "learning_rate": 0.00018582272600458003, "loss": 1.8504, "step": 6420 }, { "epoch": 1.718332442544094, "grad_norm": 1.004636287689209, "learning_rate": 0.00018577960423834936, "loss": 1.9219, "step": 6430 }, { "epoch": 1.721004810261892, "grad_norm": 1.1246731281280518, "learning_rate": 0.00018573642201099815, "loss": 1.8198, "step": 6440 }, { "epoch": 1.72367717797969, "grad_norm": 1.2009623050689697, "learning_rate": 0.00018569317935296308, "loss": 1.823, "step": 6450 }, { "epoch": 1.726349545697488, "grad_norm": 1.147096037864685, "learning_rate": 0.00018564987629472347, "loss": 1.8457, "step": 6460 }, { "epoch": 1.729021913415286, "grad_norm": 1.1151899099349976, "learning_rate": 0.00018560651286680103, "loss": 1.8493, "step": 6470 }, { "epoch": 1.731694281133084, "grad_norm": 1.1140600442886353, "learning_rate": 0.00018556308909976028, "loss": 1.9133, "step": 6480 }, { "epoch": 1.734366648850882, "grad_norm": 1.0503792762756348, "learning_rate": 0.00018551960502420807, "loss": 1.8455, "step": 6490 }, { "epoch": 1.73703901656868, "grad_norm": 1.1020917892456055, "learning_rate": 0.0001854760606707939, "loss": 1.7742, "step": 6500 }, { "epoch": 1.739711384286478, "grad_norm": 1.0384830236434937, "learning_rate": 0.0001854324560702096, "loss": 1.9241, "step": 6510 }, { "epoch": 1.7423837520042758, "grad_norm": 1.0608861446380615, "learning_rate": 0.00018538879125318958, "loss": 1.8665, "step": 6520 }, { "epoch": 1.7450561197220738, "grad_norm": 1.1713470220565796, "learning_rate": 0.00018534506625051067, "loss": 1.7895, "step": 6530 }, { "epoch": 1.7477284874398717, "grad_norm": 1.0618896484375, "learning_rate": 0.0001853012810929921, "loss": 1.8779, "step": 6540 }, { "epoch": 1.7504008551576697, "grad_norm": 1.171653151512146, "learning_rate": 0.0001852574358114956, "loss": 1.8838, "step": 6550 }, { "epoch": 1.7530732228754675, "grad_norm": 1.2031880617141724, "learning_rate": 0.0001852135304369251, "loss": 1.9069, "step": 6560 }, { "epoch": 1.7557455905932655, "grad_norm": 1.048595666885376, "learning_rate": 0.00018516956500022697, "loss": 1.8772, "step": 6570 }, { "epoch": 1.7584179583110635, "grad_norm": 1.0669938325881958, "learning_rate": 0.00018512553953238997, "loss": 1.8379, "step": 6580 }, { "epoch": 1.7610903260288615, "grad_norm": 1.0816853046417236, "learning_rate": 0.0001850814540644451, "loss": 1.8266, "step": 6590 }, { "epoch": 1.7637626937466595, "grad_norm": 1.0739929676055908, "learning_rate": 0.00018503730862746574, "loss": 1.8376, "step": 6600 }, { "epoch": 1.7664350614644575, "grad_norm": 1.144262671470642, "learning_rate": 0.0001849931032525674, "loss": 1.854, "step": 6610 }, { "epoch": 1.7691074291822555, "grad_norm": 1.1057103872299194, "learning_rate": 0.00018494883797090793, "loss": 1.9118, "step": 6620 }, { "epoch": 1.7717797969000535, "grad_norm": 1.1595027446746826, "learning_rate": 0.0001849045128136874, "loss": 1.8516, "step": 6630 }, { "epoch": 1.7744521646178515, "grad_norm": 1.1505125761032104, "learning_rate": 0.00018486012781214806, "loss": 1.8469, "step": 6640 }, { "epoch": 1.7771245323356495, "grad_norm": 1.2194557189941406, "learning_rate": 0.00018481568299757436, "loss": 1.8613, "step": 6650 }, { "epoch": 1.7797969000534475, "grad_norm": 1.0633267164230347, "learning_rate": 0.0001847711784012929, "loss": 1.8683, "step": 6660 }, { "epoch": 1.7824692677712455, "grad_norm": 1.0982544422149658, "learning_rate": 0.0001847266140546724, "loss": 1.8655, "step": 6670 }, { "epoch": 1.7851416354890433, "grad_norm": 1.1030447483062744, "learning_rate": 0.0001846819899891237, "loss": 1.7817, "step": 6680 }, { "epoch": 1.7878140032068413, "grad_norm": 1.08018159866333, "learning_rate": 0.0001846373062360998, "loss": 1.8533, "step": 6690 }, { "epoch": 1.7904863709246392, "grad_norm": 1.2506588697433472, "learning_rate": 0.00018459256282709566, "loss": 1.8723, "step": 6700 }, { "epoch": 1.7931587386424372, "grad_norm": 1.0448452234268188, "learning_rate": 0.00018454775979364837, "loss": 1.7822, "step": 6710 }, { "epoch": 1.795831106360235, "grad_norm": 1.2007098197937012, "learning_rate": 0.000184502897167337, "loss": 1.8141, "step": 6720 }, { "epoch": 1.798503474078033, "grad_norm": 1.0861321687698364, "learning_rate": 0.00018445797497978266, "loss": 1.8231, "step": 6730 }, { "epoch": 1.801175841795831, "grad_norm": 1.1015934944152832, "learning_rate": 0.00018441299326264838, "loss": 1.8457, "step": 6740 }, { "epoch": 1.803848209513629, "grad_norm": 1.0648318529129028, "learning_rate": 0.00018436795204763922, "loss": 1.843, "step": 6750 }, { "epoch": 1.806520577231427, "grad_norm": 1.1390804052352905, "learning_rate": 0.00018432285136650215, "loss": 1.8092, "step": 6760 }, { "epoch": 1.809192944949225, "grad_norm": 1.3198267221450806, "learning_rate": 0.00018427769125102606, "loss": 1.8894, "step": 6770 }, { "epoch": 1.811865312667023, "grad_norm": 1.1119917631149292, "learning_rate": 0.00018423247173304173, "loss": 1.8626, "step": 6780 }, { "epoch": 1.814537680384821, "grad_norm": 1.1425055265426636, "learning_rate": 0.00018418719284442176, "loss": 1.9254, "step": 6790 }, { "epoch": 1.817210048102619, "grad_norm": 1.2299972772598267, "learning_rate": 0.00018414185461708066, "loss": 1.8001, "step": 6800 }, { "epoch": 1.819882415820417, "grad_norm": 1.1462756395339966, "learning_rate": 0.00018409645708297476, "loss": 1.7814, "step": 6810 }, { "epoch": 1.822554783538215, "grad_norm": 1.1987062692642212, "learning_rate": 0.00018405100027410218, "loss": 1.8232, "step": 6820 }, { "epoch": 1.825227151256013, "grad_norm": 1.0986485481262207, "learning_rate": 0.00018400548422250278, "loss": 1.796, "step": 6830 }, { "epoch": 1.8278995189738108, "grad_norm": 1.223432183265686, "learning_rate": 0.00018395990896025825, "loss": 1.8445, "step": 6840 }, { "epoch": 1.8305718866916088, "grad_norm": 1.1204471588134766, "learning_rate": 0.000183914274519492, "loss": 1.7464, "step": 6850 }, { "epoch": 1.8332442544094067, "grad_norm": 1.1691845655441284, "learning_rate": 0.00018386858093236906, "loss": 1.8865, "step": 6860 }, { "epoch": 1.8359166221272047, "grad_norm": 1.2536617517471313, "learning_rate": 0.00018382282823109624, "loss": 1.8843, "step": 6870 }, { "epoch": 1.8385889898450025, "grad_norm": 1.1956285238265991, "learning_rate": 0.000183777016447922, "loss": 1.898, "step": 6880 }, { "epoch": 1.8412613575628005, "grad_norm": 1.0576280355453491, "learning_rate": 0.00018373114561513645, "loss": 1.8173, "step": 6890 }, { "epoch": 1.8439337252805985, "grad_norm": 1.1451317071914673, "learning_rate": 0.00018368521576507128, "loss": 1.8351, "step": 6900 }, { "epoch": 1.8466060929983965, "grad_norm": 1.1110111474990845, "learning_rate": 0.00018363922693009988, "loss": 1.8865, "step": 6910 }, { "epoch": 1.8492784607161945, "grad_norm": 1.1600598096847534, "learning_rate": 0.00018359317914263702, "loss": 1.839, "step": 6920 }, { "epoch": 1.8519508284339925, "grad_norm": 1.1117746829986572, "learning_rate": 0.00018354707243513926, "loss": 1.8191, "step": 6930 }, { "epoch": 1.8546231961517905, "grad_norm": 1.1108200550079346, "learning_rate": 0.0001835009068401045, "loss": 1.8204, "step": 6940 }, { "epoch": 1.8572955638695885, "grad_norm": 1.1949942111968994, "learning_rate": 0.00018345468239007223, "loss": 1.868, "step": 6950 }, { "epoch": 1.8599679315873865, "grad_norm": 1.1258621215820312, "learning_rate": 0.00018340839911762342, "loss": 1.8301, "step": 6960 }, { "epoch": 1.8626402993051845, "grad_norm": 1.1038228273391724, "learning_rate": 0.0001833620570553805, "loss": 1.855, "step": 6970 }, { "epoch": 1.8653126670229825, "grad_norm": 1.0757447481155396, "learning_rate": 0.0001833156562360073, "loss": 1.8098, "step": 6980 }, { "epoch": 1.8679850347407805, "grad_norm": 1.1476863622665405, "learning_rate": 0.00018326919669220913, "loss": 1.8136, "step": 6990 }, { "epoch": 1.8706574024585783, "grad_norm": 1.1160389184951782, "learning_rate": 0.00018322267845673266, "loss": 1.8543, "step": 7000 }, { "epoch": 1.8733297701763763, "grad_norm": 1.2721679210662842, "learning_rate": 0.00018317610156236592, "loss": 1.8373, "step": 7010 }, { "epoch": 1.8760021378941742, "grad_norm": 1.2080637216567993, "learning_rate": 0.00018312946604193825, "loss": 1.884, "step": 7020 }, { "epoch": 1.8786745056119722, "grad_norm": 1.0939440727233887, "learning_rate": 0.00018308277192832038, "loss": 1.8406, "step": 7030 }, { "epoch": 1.88134687332977, "grad_norm": 1.1047283411026, "learning_rate": 0.00018303601925442436, "loss": 1.8284, "step": 7040 }, { "epoch": 1.884019241047568, "grad_norm": 1.0662176609039307, "learning_rate": 0.00018298920805320337, "loss": 1.8719, "step": 7050 }, { "epoch": 1.886691608765366, "grad_norm": 1.106064796447754, "learning_rate": 0.00018294233835765202, "loss": 1.832, "step": 7060 }, { "epoch": 1.889363976483164, "grad_norm": 1.2257906198501587, "learning_rate": 0.00018289541020080602, "loss": 1.8272, "step": 7070 }, { "epoch": 1.892036344200962, "grad_norm": 1.1595290899276733, "learning_rate": 0.00018284842361574236, "loss": 1.8415, "step": 7080 }, { "epoch": 1.89470871191876, "grad_norm": 1.028878927230835, "learning_rate": 0.00018280137863557918, "loss": 1.8264, "step": 7090 }, { "epoch": 1.897381079636558, "grad_norm": 1.1327093839645386, "learning_rate": 0.0001827542752934758, "loss": 1.7834, "step": 7100 }, { "epoch": 1.900053447354356, "grad_norm": 1.1777125597000122, "learning_rate": 0.00018270711362263262, "loss": 1.8784, "step": 7110 }, { "epoch": 1.902725815072154, "grad_norm": 1.0794941186904907, "learning_rate": 0.00018265989365629125, "loss": 1.9329, "step": 7120 }, { "epoch": 1.905398182789952, "grad_norm": 1.4766443967819214, "learning_rate": 0.0001826126154277343, "loss": 1.9082, "step": 7130 }, { "epoch": 1.90807055050775, "grad_norm": 1.0953209400177002, "learning_rate": 0.00018256527897028547, "loss": 1.9247, "step": 7140 }, { "epoch": 1.910742918225548, "grad_norm": 1.0441701412200928, "learning_rate": 0.00018251788431730955, "loss": 1.8696, "step": 7150 }, { "epoch": 1.9134152859433458, "grad_norm": 1.1116993427276611, "learning_rate": 0.0001824704315022123, "loss": 1.8222, "step": 7160 }, { "epoch": 1.9160876536611438, "grad_norm": 1.1172654628753662, "learning_rate": 0.00018242292055844047, "loss": 1.9006, "step": 7170 }, { "epoch": 1.9187600213789417, "grad_norm": 1.1256300210952759, "learning_rate": 0.0001823753515194818, "loss": 1.8, "step": 7180 }, { "epoch": 1.9214323890967397, "grad_norm": 1.1486270427703857, "learning_rate": 0.00018232772441886501, "loss": 1.9045, "step": 7190 }, { "epoch": 1.9241047568145375, "grad_norm": 1.1202726364135742, "learning_rate": 0.00018228003929015966, "loss": 1.7617, "step": 7200 }, { "epoch": 1.9267771245323355, "grad_norm": 1.1803336143493652, "learning_rate": 0.0001822322961669763, "loss": 1.8798, "step": 7210 }, { "epoch": 1.9294494922501335, "grad_norm": 1.1806000471115112, "learning_rate": 0.0001821844950829663, "loss": 1.8395, "step": 7220 }, { "epoch": 1.9321218599679315, "grad_norm": 1.2114359140396118, "learning_rate": 0.0001821366360718219, "loss": 1.8914, "step": 7230 }, { "epoch": 1.9347942276857295, "grad_norm": 1.1424024105072021, "learning_rate": 0.00018208871916727617, "loss": 1.8854, "step": 7240 }, { "epoch": 1.9374665954035275, "grad_norm": 1.1180267333984375, "learning_rate": 0.000182040744403103, "loss": 1.8004, "step": 7250 }, { "epoch": 1.9401389631213255, "grad_norm": 1.235189437866211, "learning_rate": 0.000181992711813117, "loss": 1.8273, "step": 7260 }, { "epoch": 1.9428113308391235, "grad_norm": 1.2162269353866577, "learning_rate": 0.00018194462143117366, "loss": 1.8169, "step": 7270 }, { "epoch": 1.9454836985569215, "grad_norm": 1.2939962148666382, "learning_rate": 0.0001818964732911691, "loss": 1.8331, "step": 7280 }, { "epoch": 1.9481560662747195, "grad_norm": 1.1024796962738037, "learning_rate": 0.00018184826742704017, "loss": 1.85, "step": 7290 }, { "epoch": 1.9508284339925175, "grad_norm": 1.2357715368270874, "learning_rate": 0.00018180000387276441, "loss": 1.8573, "step": 7300 }, { "epoch": 1.9535008017103155, "grad_norm": 1.202744960784912, "learning_rate": 0.00018175168266236006, "loss": 1.7975, "step": 7310 }, { "epoch": 1.9561731694281133, "grad_norm": 1.0578547716140747, "learning_rate": 0.00018170330382988594, "loss": 1.8337, "step": 7320 }, { "epoch": 1.9588455371459113, "grad_norm": 1.1348929405212402, "learning_rate": 0.0001816548674094415, "loss": 1.9244, "step": 7330 }, { "epoch": 1.9615179048637092, "grad_norm": 1.1298471689224243, "learning_rate": 0.00018160637343516686, "loss": 1.7958, "step": 7340 }, { "epoch": 1.9641902725815072, "grad_norm": 1.12726628780365, "learning_rate": 0.0001815578219412426, "loss": 1.8162, "step": 7350 }, { "epoch": 1.966862640299305, "grad_norm": 1.2801421880722046, "learning_rate": 0.00018150921296188982, "loss": 1.7888, "step": 7360 }, { "epoch": 1.969535008017103, "grad_norm": 1.093096137046814, "learning_rate": 0.00018146054653137033, "loss": 1.9479, "step": 7370 }, { "epoch": 1.972207375734901, "grad_norm": 1.0839108228683472, "learning_rate": 0.0001814118226839862, "loss": 1.8465, "step": 7380 }, { "epoch": 1.974879743452699, "grad_norm": 1.2250748872756958, "learning_rate": 0.0001813630414540801, "loss": 1.7888, "step": 7390 }, { "epoch": 1.977552111170497, "grad_norm": 1.1333938837051392, "learning_rate": 0.00018131420287603516, "loss": 1.8968, "step": 7400 }, { "epoch": 1.980224478888295, "grad_norm": 1.0982754230499268, "learning_rate": 0.0001812653069842748, "loss": 1.8264, "step": 7410 }, { "epoch": 1.982896846606093, "grad_norm": 1.1105761528015137, "learning_rate": 0.00018121635381326305, "loss": 1.9022, "step": 7420 }, { "epoch": 1.985569214323891, "grad_norm": 1.217867136001587, "learning_rate": 0.00018116734339750407, "loss": 1.8284, "step": 7430 }, { "epoch": 1.988241582041689, "grad_norm": 1.0955591201782227, "learning_rate": 0.00018111827577154256, "loss": 1.7938, "step": 7440 }, { "epoch": 1.990913949759487, "grad_norm": 1.2205541133880615, "learning_rate": 0.00018106915096996348, "loss": 1.8832, "step": 7450 }, { "epoch": 1.993586317477285, "grad_norm": 1.1036415100097656, "learning_rate": 0.00018101996902739206, "loss": 1.8848, "step": 7460 }, { "epoch": 1.996258685195083, "grad_norm": 1.194022297859192, "learning_rate": 0.00018097072997849385, "loss": 1.8513, "step": 7470 }, { "epoch": 1.9989310529128808, "grad_norm": 1.1090449094772339, "learning_rate": 0.0001809214338579746, "loss": 1.8482, "step": 7480 }, { "epoch": 2.001603420630679, "grad_norm": 1.0925272703170776, "learning_rate": 0.0001808720807005803, "loss": 1.8398, "step": 7490 }, { "epoch": 2.004275788348477, "grad_norm": 1.2713534832000732, "learning_rate": 0.00018082267054109723, "loss": 1.6291, "step": 7500 }, { "epoch": 2.0069481560662745, "grad_norm": 1.154667615890503, "learning_rate": 0.0001807732034143517, "loss": 1.6611, "step": 7510 }, { "epoch": 2.0096205237840725, "grad_norm": 1.2427644729614258, "learning_rate": 0.00018072367935521028, "loss": 1.6878, "step": 7520 }, { "epoch": 2.0122928915018705, "grad_norm": 1.2123231887817383, "learning_rate": 0.0001806740983985797, "loss": 1.659, "step": 7530 }, { "epoch": 2.0149652592196685, "grad_norm": 1.1265623569488525, "learning_rate": 0.00018062446057940662, "loss": 1.6901, "step": 7540 }, { "epoch": 2.0176376269374665, "grad_norm": 1.1744213104248047, "learning_rate": 0.00018057476593267793, "loss": 1.7088, "step": 7550 }, { "epoch": 2.0203099946552645, "grad_norm": 1.2280677556991577, "learning_rate": 0.00018052501449342055, "loss": 1.6548, "step": 7560 }, { "epoch": 2.0229823623730625, "grad_norm": 1.3028384447097778, "learning_rate": 0.00018047520629670144, "loss": 1.6767, "step": 7570 }, { "epoch": 2.0256547300908605, "grad_norm": 1.2041252851486206, "learning_rate": 0.0001804253413776275, "loss": 1.636, "step": 7580 }, { "epoch": 2.0283270978086585, "grad_norm": 1.2943586111068726, "learning_rate": 0.00018037541977134567, "loss": 1.6053, "step": 7590 }, { "epoch": 2.0309994655264565, "grad_norm": 1.2200071811676025, "learning_rate": 0.0001803254415130428, "loss": 1.6664, "step": 7600 }, { "epoch": 2.0336718332442545, "grad_norm": 1.2025283575057983, "learning_rate": 0.00018027540663794573, "loss": 1.6928, "step": 7610 }, { "epoch": 2.0363442009620525, "grad_norm": 1.2312372922897339, "learning_rate": 0.00018022531518132112, "loss": 1.7203, "step": 7620 }, { "epoch": 2.0390165686798505, "grad_norm": 1.2150242328643799, "learning_rate": 0.00018017516717847563, "loss": 1.7097, "step": 7630 }, { "epoch": 2.0416889363976485, "grad_norm": 1.2572401762008667, "learning_rate": 0.00018012496266475565, "loss": 1.6523, "step": 7640 }, { "epoch": 2.0443613041154465, "grad_norm": 1.3172168731689453, "learning_rate": 0.0001800747016755475, "loss": 1.6943, "step": 7650 }, { "epoch": 2.0470336718332445, "grad_norm": 1.5790674686431885, "learning_rate": 0.00018002438424627725, "loss": 1.6717, "step": 7660 }, { "epoch": 2.049706039551042, "grad_norm": 1.2752028703689575, "learning_rate": 0.0001799740104124108, "loss": 1.6941, "step": 7670 }, { "epoch": 2.05237840726884, "grad_norm": 1.2330474853515625, "learning_rate": 0.00017992358020945372, "loss": 1.6986, "step": 7680 }, { "epoch": 2.055050774986638, "grad_norm": 1.2675464153289795, "learning_rate": 0.0001798730936729514, "loss": 1.6786, "step": 7690 }, { "epoch": 2.057723142704436, "grad_norm": 1.1552021503448486, "learning_rate": 0.0001798225508384889, "loss": 1.6453, "step": 7700 }, { "epoch": 2.060395510422234, "grad_norm": 1.3470126390457153, "learning_rate": 0.00017977195174169098, "loss": 1.6547, "step": 7710 }, { "epoch": 2.063067878140032, "grad_norm": 1.253616213798523, "learning_rate": 0.00017972129641822198, "loss": 1.6228, "step": 7720 }, { "epoch": 2.06574024585783, "grad_norm": 1.179351806640625, "learning_rate": 0.000179670584903786, "loss": 1.7211, "step": 7730 }, { "epoch": 2.068412613575628, "grad_norm": 1.2924188375473022, "learning_rate": 0.00017961981723412662, "loss": 1.7298, "step": 7740 }, { "epoch": 2.071084981293426, "grad_norm": 1.1614151000976562, "learning_rate": 0.0001795689934450271, "loss": 1.629, "step": 7750 }, { "epoch": 2.073757349011224, "grad_norm": 1.3283143043518066, "learning_rate": 0.0001795181135723102, "loss": 1.621, "step": 7760 }, { "epoch": 2.076429716729022, "grad_norm": 1.2223650217056274, "learning_rate": 0.00017946717765183822, "loss": 1.6813, "step": 7770 }, { "epoch": 2.07910208444682, "grad_norm": 1.2945345640182495, "learning_rate": 0.00017941618571951296, "loss": 1.6779, "step": 7780 }, { "epoch": 2.081774452164618, "grad_norm": 1.2835251092910767, "learning_rate": 0.0001793651378112757, "loss": 1.7069, "step": 7790 }, { "epoch": 2.084446819882416, "grad_norm": 1.2451834678649902, "learning_rate": 0.0001793140339631072, "loss": 1.6638, "step": 7800 }, { "epoch": 2.087119187600214, "grad_norm": 1.2750883102416992, "learning_rate": 0.00017926287421102764, "loss": 1.718, "step": 7810 }, { "epoch": 2.089791555318012, "grad_norm": 1.2635786533355713, "learning_rate": 0.00017921165859109656, "loss": 1.7728, "step": 7820 }, { "epoch": 2.0924639230358095, "grad_norm": 1.2947158813476562, "learning_rate": 0.00017916038713941296, "loss": 1.6969, "step": 7830 }, { "epoch": 2.0951362907536075, "grad_norm": 1.2207590341567993, "learning_rate": 0.00017910905989211505, "loss": 1.7142, "step": 7840 }, { "epoch": 2.0978086584714055, "grad_norm": 1.2223358154296875, "learning_rate": 0.0001790576768853806, "loss": 1.6306, "step": 7850 }, { "epoch": 2.1004810261892035, "grad_norm": 1.2628051042556763, "learning_rate": 0.00017900623815542642, "loss": 1.6523, "step": 7860 }, { "epoch": 2.1031533939070015, "grad_norm": 1.3147523403167725, "learning_rate": 0.0001789547437385088, "loss": 1.682, "step": 7870 }, { "epoch": 2.1058257616247995, "grad_norm": 1.2582192420959473, "learning_rate": 0.0001789031936709231, "loss": 1.687, "step": 7880 }, { "epoch": 2.1084981293425975, "grad_norm": 1.2919081449508667, "learning_rate": 0.00017885158798900414, "loss": 1.6614, "step": 7890 }, { "epoch": 2.1111704970603955, "grad_norm": 1.2467252016067505, "learning_rate": 0.00017879992672912572, "loss": 1.749, "step": 7900 }, { "epoch": 2.1138428647781935, "grad_norm": 1.2039240598678589, "learning_rate": 0.00017874820992770088, "loss": 1.6873, "step": 7910 }, { "epoch": 2.1165152324959915, "grad_norm": 1.3247950077056885, "learning_rate": 0.00017869643762118188, "loss": 1.7158, "step": 7920 }, { "epoch": 2.1191876002137895, "grad_norm": 1.228939414024353, "learning_rate": 0.00017864460984606, "loss": 1.6909, "step": 7930 }, { "epoch": 2.1218599679315875, "grad_norm": 1.5371471643447876, "learning_rate": 0.0001785927266388657, "loss": 1.7049, "step": 7940 }, { "epoch": 2.1245323356493855, "grad_norm": 1.2502973079681396, "learning_rate": 0.00017854078803616853, "loss": 1.6828, "step": 7950 }, { "epoch": 2.1272047033671835, "grad_norm": 1.2730250358581543, "learning_rate": 0.00017848879407457687, "loss": 1.6686, "step": 7960 }, { "epoch": 2.1298770710849815, "grad_norm": 1.2667866945266724, "learning_rate": 0.0001784367447907384, "loss": 1.6014, "step": 7970 }, { "epoch": 2.132549438802779, "grad_norm": 1.2797930240631104, "learning_rate": 0.00017838464022133967, "loss": 1.7063, "step": 7980 }, { "epoch": 2.135221806520577, "grad_norm": 1.3010532855987549, "learning_rate": 0.00017833248040310617, "loss": 1.7519, "step": 7990 }, { "epoch": 2.137894174238375, "grad_norm": 1.3024216890335083, "learning_rate": 0.00017828026537280235, "loss": 1.689, "step": 8000 }, { "epoch": 2.140566541956173, "grad_norm": 1.3460325002670288, "learning_rate": 0.00017822799516723165, "loss": 1.6936, "step": 8010 }, { "epoch": 2.143238909673971, "grad_norm": 1.1167391538619995, "learning_rate": 0.00017817566982323627, "loss": 1.6171, "step": 8020 }, { "epoch": 2.145911277391769, "grad_norm": 1.421852707862854, "learning_rate": 0.0001781232893776974, "loss": 1.7152, "step": 8030 }, { "epoch": 2.148583645109567, "grad_norm": 1.273163080215454, "learning_rate": 0.00017807085386753495, "loss": 1.6343, "step": 8040 }, { "epoch": 2.151256012827365, "grad_norm": 1.3871347904205322, "learning_rate": 0.00017801836332970777, "loss": 1.6637, "step": 8050 }, { "epoch": 2.153928380545163, "grad_norm": 1.3075594902038574, "learning_rate": 0.00017796581780121334, "loss": 1.7416, "step": 8060 }, { "epoch": 2.156600748262961, "grad_norm": 1.2139554023742676, "learning_rate": 0.00017791321731908805, "loss": 1.7024, "step": 8070 }, { "epoch": 2.159273115980759, "grad_norm": 1.2737025022506714, "learning_rate": 0.00017786056192040697, "loss": 1.686, "step": 8080 }, { "epoch": 2.161945483698557, "grad_norm": 1.3183096647262573, "learning_rate": 0.00017780785164228386, "loss": 1.7213, "step": 8090 }, { "epoch": 2.164617851416355, "grad_norm": 1.2604891061782837, "learning_rate": 0.0001777550865218711, "loss": 1.6772, "step": 8100 }, { "epoch": 2.167290219134153, "grad_norm": 1.2124100923538208, "learning_rate": 0.00017770226659635994, "loss": 1.7333, "step": 8110 }, { "epoch": 2.169962586851951, "grad_norm": 1.2802506685256958, "learning_rate": 0.00017764939190297998, "loss": 1.7097, "step": 8120 }, { "epoch": 2.172634954569749, "grad_norm": 1.3564809560775757, "learning_rate": 0.0001775964624789996, "loss": 1.6261, "step": 8130 }, { "epoch": 2.175307322287547, "grad_norm": 1.2205817699432373, "learning_rate": 0.00017754347836172575, "loss": 1.6072, "step": 8140 }, { "epoch": 2.177979690005345, "grad_norm": 1.2707934379577637, "learning_rate": 0.00017749043958850385, "loss": 1.6535, "step": 8150 }, { "epoch": 2.1806520577231425, "grad_norm": 1.285674810409546, "learning_rate": 0.00017743734619671787, "loss": 1.6612, "step": 8160 }, { "epoch": 2.1833244254409405, "grad_norm": 1.2037670612335205, "learning_rate": 0.00017738419822379035, "loss": 1.6488, "step": 8170 }, { "epoch": 2.1859967931587385, "grad_norm": 1.215549349784851, "learning_rate": 0.00017733099570718224, "loss": 1.6912, "step": 8180 }, { "epoch": 2.1886691608765365, "grad_norm": 1.3087904453277588, "learning_rate": 0.0001772777386843929, "loss": 1.6609, "step": 8190 }, { "epoch": 2.1913415285943345, "grad_norm": 1.2005733251571655, "learning_rate": 0.00017722442719296017, "loss": 1.6959, "step": 8200 }, { "epoch": 2.1940138963121325, "grad_norm": 1.394248366355896, "learning_rate": 0.00017717106127046028, "loss": 1.6696, "step": 8210 }, { "epoch": 2.1966862640299305, "grad_norm": 1.2202259302139282, "learning_rate": 0.0001771176409545078, "loss": 1.6734, "step": 8220 }, { "epoch": 2.1993586317477285, "grad_norm": 1.2602444887161255, "learning_rate": 0.00017706416628275563, "loss": 1.7377, "step": 8230 }, { "epoch": 2.2020309994655265, "grad_norm": 1.4346433877944946, "learning_rate": 0.00017701063729289498, "loss": 1.6947, "step": 8240 }, { "epoch": 2.2047033671833245, "grad_norm": 1.2007207870483398, "learning_rate": 0.0001769570540226554, "loss": 1.7211, "step": 8250 }, { "epoch": 2.2073757349011225, "grad_norm": 1.3080480098724365, "learning_rate": 0.00017690341650980463, "loss": 1.7328, "step": 8260 }, { "epoch": 2.2100481026189205, "grad_norm": 1.2944198846817017, "learning_rate": 0.0001768497247921487, "loss": 1.6602, "step": 8270 }, { "epoch": 2.2127204703367185, "grad_norm": 1.3239096403121948, "learning_rate": 0.00017679597890753182, "loss": 1.6563, "step": 8280 }, { "epoch": 2.2153928380545165, "grad_norm": 1.3229306936264038, "learning_rate": 0.00017674217889383637, "loss": 1.7382, "step": 8290 }, { "epoch": 2.2180652057723145, "grad_norm": 1.3248566389083862, "learning_rate": 0.0001766883247889829, "loss": 1.7465, "step": 8300 }, { "epoch": 2.220737573490112, "grad_norm": 1.284016728401184, "learning_rate": 0.0001766344166309301, "loss": 1.7106, "step": 8310 }, { "epoch": 2.22340994120791, "grad_norm": 1.2725125551223755, "learning_rate": 0.00017658045445767466, "loss": 1.7608, "step": 8320 }, { "epoch": 2.226082308925708, "grad_norm": 1.3566988706588745, "learning_rate": 0.0001765264383072515, "loss": 1.7268, "step": 8330 }, { "epoch": 2.228754676643506, "grad_norm": 1.265575885772705, "learning_rate": 0.00017647236821773349, "loss": 1.722, "step": 8340 }, { "epoch": 2.231427044361304, "grad_norm": 1.2469218969345093, "learning_rate": 0.00017641824422723147, "loss": 1.6961, "step": 8350 }, { "epoch": 2.234099412079102, "grad_norm": 1.4362605810165405, "learning_rate": 0.00017636406637389443, "loss": 1.7367, "step": 8360 }, { "epoch": 2.2367717797969, "grad_norm": 1.2980564832687378, "learning_rate": 0.00017630983469590916, "loss": 1.6679, "step": 8370 }, { "epoch": 2.239444147514698, "grad_norm": 1.2054814100265503, "learning_rate": 0.00017625554923150047, "loss": 1.7167, "step": 8380 }, { "epoch": 2.242116515232496, "grad_norm": 1.2891802787780762, "learning_rate": 0.0001762012100189311, "loss": 1.6553, "step": 8390 }, { "epoch": 2.244788882950294, "grad_norm": 1.2684916257858276, "learning_rate": 0.00017614681709650162, "loss": 1.7258, "step": 8400 }, { "epoch": 2.247461250668092, "grad_norm": 1.2523155212402344, "learning_rate": 0.00017609237050255047, "loss": 1.8091, "step": 8410 }, { "epoch": 2.25013361838589, "grad_norm": 1.2900711297988892, "learning_rate": 0.00017603787027545392, "loss": 1.6807, "step": 8420 }, { "epoch": 2.252805986103688, "grad_norm": 1.3863922357559204, "learning_rate": 0.0001759833164536261, "loss": 1.7239, "step": 8430 }, { "epoch": 2.255478353821486, "grad_norm": 1.3520922660827637, "learning_rate": 0.00017592870907551887, "loss": 1.7375, "step": 8440 }, { "epoch": 2.258150721539284, "grad_norm": 1.2624331712722778, "learning_rate": 0.00017587404817962177, "loss": 1.706, "step": 8450 }, { "epoch": 2.260823089257082, "grad_norm": 1.2753965854644775, "learning_rate": 0.00017581933380446217, "loss": 1.693, "step": 8460 }, { "epoch": 2.26349545697488, "grad_norm": 1.2787902355194092, "learning_rate": 0.00017576456598860512, "loss": 1.6783, "step": 8470 }, { "epoch": 2.2661678246926775, "grad_norm": 1.255979299545288, "learning_rate": 0.00017570974477065324, "loss": 1.7037, "step": 8480 }, { "epoch": 2.2688401924104755, "grad_norm": 1.220231533050537, "learning_rate": 0.00017565487018924694, "loss": 1.7603, "step": 8490 }, { "epoch": 2.2715125601282735, "grad_norm": 1.3431339263916016, "learning_rate": 0.0001755999422830641, "loss": 1.6591, "step": 8500 }, { "epoch": 2.2741849278460715, "grad_norm": 1.2896251678466797, "learning_rate": 0.0001755449610908203, "loss": 1.6617, "step": 8510 }, { "epoch": 2.2768572955638695, "grad_norm": 1.2404873371124268, "learning_rate": 0.00017548992665126858, "loss": 1.7057, "step": 8520 }, { "epoch": 2.2795296632816675, "grad_norm": 1.3518658876419067, "learning_rate": 0.0001754348390031996, "loss": 1.682, "step": 8530 }, { "epoch": 2.2822020309994655, "grad_norm": 1.28294837474823, "learning_rate": 0.00017537969818544143, "loss": 1.6905, "step": 8540 }, { "epoch": 2.2848743987172635, "grad_norm": 1.3236899375915527, "learning_rate": 0.00017532450423685974, "loss": 1.7282, "step": 8550 }, { "epoch": 2.2875467664350615, "grad_norm": 1.3238036632537842, "learning_rate": 0.0001752692571963575, "loss": 1.6797, "step": 8560 }, { "epoch": 2.2902191341528595, "grad_norm": 1.2107789516448975, "learning_rate": 0.00017521395710287524, "loss": 1.6792, "step": 8570 }, { "epoch": 2.2928915018706575, "grad_norm": 1.1869723796844482, "learning_rate": 0.0001751586039953908, "loss": 1.7645, "step": 8580 }, { "epoch": 2.2955638695884555, "grad_norm": 1.2540340423583984, "learning_rate": 0.0001751031979129194, "loss": 1.7228, "step": 8590 }, { "epoch": 2.2982362373062535, "grad_norm": 1.3475019931793213, "learning_rate": 0.00017504773889451361, "loss": 1.7466, "step": 8600 }, { "epoch": 2.3009086050240515, "grad_norm": 1.367012619972229, "learning_rate": 0.0001749922269792633, "loss": 1.6538, "step": 8610 }, { "epoch": 2.303580972741849, "grad_norm": 1.3030816316604614, "learning_rate": 0.00017493666220629568, "loss": 1.7625, "step": 8620 }, { "epoch": 2.306253340459647, "grad_norm": 1.3685482740402222, "learning_rate": 0.0001748810446147751, "loss": 1.6994, "step": 8630 }, { "epoch": 2.308925708177445, "grad_norm": 1.2512644529342651, "learning_rate": 0.00017482537424390332, "loss": 1.6751, "step": 8640 }, { "epoch": 2.311598075895243, "grad_norm": 1.2357844114303589, "learning_rate": 0.00017476965113291906, "loss": 1.7458, "step": 8650 }, { "epoch": 2.314270443613041, "grad_norm": 1.305551290512085, "learning_rate": 0.0001747138753210984, "loss": 1.6777, "step": 8660 }, { "epoch": 2.316942811330839, "grad_norm": 1.8365817070007324, "learning_rate": 0.0001746580468477545, "loss": 1.6625, "step": 8670 }, { "epoch": 2.319615179048637, "grad_norm": 1.3910210132598877, "learning_rate": 0.0001746021657522377, "loss": 1.687, "step": 8680 }, { "epoch": 2.322287546766435, "grad_norm": 1.301171898841858, "learning_rate": 0.0001745462320739353, "loss": 1.706, "step": 8690 }, { "epoch": 2.324959914484233, "grad_norm": 1.1999925374984741, "learning_rate": 0.00017449024585227177, "loss": 1.7317, "step": 8700 }, { "epoch": 2.327632282202031, "grad_norm": 1.381013035774231, "learning_rate": 0.0001744342071267086, "loss": 1.695, "step": 8710 }, { "epoch": 2.330304649919829, "grad_norm": 1.274725317955017, "learning_rate": 0.00017437811593674427, "loss": 1.7112, "step": 8720 }, { "epoch": 2.332977017637627, "grad_norm": 1.2251441478729248, "learning_rate": 0.00017432197232191424, "loss": 1.7284, "step": 8730 }, { "epoch": 2.335649385355425, "grad_norm": 1.3940762281417847, "learning_rate": 0.0001742657763217909, "loss": 1.6257, "step": 8740 }, { "epoch": 2.338321753073223, "grad_norm": 1.2540252208709717, "learning_rate": 0.00017420952797598358, "loss": 1.7471, "step": 8750 }, { "epoch": 2.340994120791021, "grad_norm": 1.3477139472961426, "learning_rate": 0.00017415322732413854, "loss": 1.7856, "step": 8760 }, { "epoch": 2.343666488508819, "grad_norm": 1.329470157623291, "learning_rate": 0.0001740968744059389, "loss": 1.6957, "step": 8770 }, { "epoch": 2.346338856226617, "grad_norm": 1.2522149085998535, "learning_rate": 0.00017404046926110454, "loss": 1.7184, "step": 8780 }, { "epoch": 2.349011223944415, "grad_norm": 1.3245595693588257, "learning_rate": 0.00017398401192939225, "loss": 1.6724, "step": 8790 }, { "epoch": 2.351683591662213, "grad_norm": 1.2924376726150513, "learning_rate": 0.00017392750245059552, "loss": 1.6874, "step": 8800 }, { "epoch": 2.3543559593800105, "grad_norm": 1.359859585762024, "learning_rate": 0.0001738709408645447, "loss": 1.7426, "step": 8810 }, { "epoch": 2.3570283270978085, "grad_norm": 1.3060246706008911, "learning_rate": 0.00017381432721110676, "loss": 1.6724, "step": 8820 }, { "epoch": 2.3597006948156065, "grad_norm": 1.378063440322876, "learning_rate": 0.00017375766153018542, "loss": 1.7137, "step": 8830 }, { "epoch": 2.3623730625334045, "grad_norm": 1.2369308471679688, "learning_rate": 0.0001737009438617211, "loss": 1.7285, "step": 8840 }, { "epoch": 2.3650454302512025, "grad_norm": 1.3088947534561157, "learning_rate": 0.0001736441742456908, "loss": 1.7183, "step": 8850 }, { "epoch": 2.3677177979690005, "grad_norm": 1.389485478401184, "learning_rate": 0.00017358735272210817, "loss": 1.6696, "step": 8860 }, { "epoch": 2.3703901656867985, "grad_norm": 1.2320945262908936, "learning_rate": 0.00017353047933102347, "loss": 1.6916, "step": 8870 }, { "epoch": 2.3730625334045965, "grad_norm": 1.2729374170303345, "learning_rate": 0.00017347355411252341, "loss": 1.7538, "step": 8880 }, { "epoch": 2.3757349011223945, "grad_norm": 1.2985988855361938, "learning_rate": 0.0001734165771067314, "loss": 1.7286, "step": 8890 }, { "epoch": 2.3784072688401925, "grad_norm": 1.290063500404358, "learning_rate": 0.0001733595483538072, "loss": 1.6714, "step": 8900 }, { "epoch": 2.3810796365579905, "grad_norm": 1.323641061782837, "learning_rate": 0.00017330246789394715, "loss": 1.6183, "step": 8910 }, { "epoch": 2.3837520042757885, "grad_norm": 1.1713404655456543, "learning_rate": 0.00017324533576738396, "loss": 1.6931, "step": 8920 }, { "epoch": 2.3864243719935865, "grad_norm": 1.326493263244629, "learning_rate": 0.00017318815201438682, "loss": 1.7018, "step": 8930 }, { "epoch": 2.3890967397113845, "grad_norm": 1.3071362972259521, "learning_rate": 0.00017313091667526121, "loss": 1.7383, "step": 8940 }, { "epoch": 2.391769107429182, "grad_norm": 1.309009075164795, "learning_rate": 0.00017307362979034912, "loss": 1.7412, "step": 8950 }, { "epoch": 2.39444147514698, "grad_norm": 1.3492226600646973, "learning_rate": 0.00017301629140002874, "loss": 1.7089, "step": 8960 }, { "epoch": 2.397113842864778, "grad_norm": 1.1894094944000244, "learning_rate": 0.0001729589015447146, "loss": 1.6837, "step": 8970 }, { "epoch": 2.399786210582576, "grad_norm": 1.2730973958969116, "learning_rate": 0.00017290146026485758, "loss": 1.6845, "step": 8980 }, { "epoch": 2.402458578300374, "grad_norm": 1.4169113636016846, "learning_rate": 0.00017284396760094465, "loss": 1.7025, "step": 8990 }, { "epoch": 2.405130946018172, "grad_norm": 1.3806253671646118, "learning_rate": 0.00017278642359349914, "loss": 1.6783, "step": 9000 }, { "epoch": 2.40780331373597, "grad_norm": 1.2125340700149536, "learning_rate": 0.00017272882828308053, "loss": 1.7031, "step": 9010 }, { "epoch": 2.410475681453768, "grad_norm": 1.2701557874679565, "learning_rate": 0.00017267118171028443, "loss": 1.7034, "step": 9020 }, { "epoch": 2.413148049171566, "grad_norm": 1.3081810474395752, "learning_rate": 0.00017261348391574258, "loss": 1.7071, "step": 9030 }, { "epoch": 2.415820416889364, "grad_norm": 1.255834937095642, "learning_rate": 0.0001725557349401229, "loss": 1.7474, "step": 9040 }, { "epoch": 2.418492784607162, "grad_norm": 1.499633550643921, "learning_rate": 0.0001724979348241293, "loss": 1.6629, "step": 9050 }, { "epoch": 2.42116515232496, "grad_norm": 1.3313318490982056, "learning_rate": 0.00017244008360850174, "loss": 1.7318, "step": 9060 }, { "epoch": 2.423837520042758, "grad_norm": 1.2691699266433716, "learning_rate": 0.00017238218133401625, "loss": 1.7433, "step": 9070 }, { "epoch": 2.426509887760556, "grad_norm": 1.3339135646820068, "learning_rate": 0.00017232422804148482, "loss": 1.6833, "step": 9080 }, { "epoch": 2.429182255478354, "grad_norm": 1.2071961164474487, "learning_rate": 0.0001722662237717554, "loss": 1.7443, "step": 9090 }, { "epoch": 2.431854623196152, "grad_norm": 1.2049094438552856, "learning_rate": 0.00017220816856571187, "loss": 1.6856, "step": 9100 }, { "epoch": 2.43452699091395, "grad_norm": 1.3422529697418213, "learning_rate": 0.000172150062464274, "loss": 1.7147, "step": 9110 }, { "epoch": 2.437199358631748, "grad_norm": 1.2806687355041504, "learning_rate": 0.00017209190550839746, "loss": 1.8169, "step": 9120 }, { "epoch": 2.4398717263495455, "grad_norm": 1.3218368291854858, "learning_rate": 0.00017203369773907375, "loss": 1.7606, "step": 9130 }, { "epoch": 2.4425440940673435, "grad_norm": 1.264967441558838, "learning_rate": 0.0001719754391973302, "loss": 1.7328, "step": 9140 }, { "epoch": 2.4452164617851415, "grad_norm": 1.3537585735321045, "learning_rate": 0.00017191712992422985, "loss": 1.7355, "step": 9150 }, { "epoch": 2.4478888295029395, "grad_norm": 1.3413550853729248, "learning_rate": 0.00017185876996087164, "loss": 1.6849, "step": 9160 }, { "epoch": 2.4505611972207375, "grad_norm": 1.3472052812576294, "learning_rate": 0.0001718003593483901, "loss": 1.7102, "step": 9170 }, { "epoch": 2.4532335649385355, "grad_norm": 1.3312984704971313, "learning_rate": 0.00017174189812795557, "loss": 1.802, "step": 9180 }, { "epoch": 2.4559059326563335, "grad_norm": 1.3904449939727783, "learning_rate": 0.000171683386340774, "loss": 1.7174, "step": 9190 }, { "epoch": 2.4585783003741315, "grad_norm": 1.4721754789352417, "learning_rate": 0.00017162482402808694, "loss": 1.7527, "step": 9200 }, { "epoch": 2.4612506680919295, "grad_norm": 1.2413989305496216, "learning_rate": 0.00017156621123117166, "loss": 1.7112, "step": 9210 }, { "epoch": 2.4639230358097275, "grad_norm": 1.3577944040298462, "learning_rate": 0.00017150754799134094, "loss": 1.7467, "step": 9220 }, { "epoch": 2.4665954035275255, "grad_norm": 1.378377079963684, "learning_rate": 0.00017144883434994313, "loss": 1.7219, "step": 9230 }, { "epoch": 2.4692677712453235, "grad_norm": 1.3738240003585815, "learning_rate": 0.0001713900703483621, "loss": 1.7736, "step": 9240 }, { "epoch": 2.4719401389631215, "grad_norm": 1.3451578617095947, "learning_rate": 0.00017133125602801722, "loss": 1.7312, "step": 9250 }, { "epoch": 2.4746125066809195, "grad_norm": 1.3625116348266602, "learning_rate": 0.0001712723914303633, "loss": 1.7264, "step": 9260 }, { "epoch": 2.477284874398717, "grad_norm": 1.248052716255188, "learning_rate": 0.0001712134765968907, "loss": 1.7155, "step": 9270 }, { "epoch": 2.479957242116515, "grad_norm": 1.4670758247375488, "learning_rate": 0.00017115451156912506, "loss": 1.6817, "step": 9280 }, { "epoch": 2.482629609834313, "grad_norm": 1.3838512897491455, "learning_rate": 0.0001710954963886274, "loss": 1.7273, "step": 9290 }, { "epoch": 2.485301977552111, "grad_norm": 1.2552757263183594, "learning_rate": 0.0001710364310969942, "loss": 1.7415, "step": 9300 }, { "epoch": 2.487974345269909, "grad_norm": 1.2451655864715576, "learning_rate": 0.0001709773157358572, "loss": 1.7138, "step": 9310 }, { "epoch": 2.490646712987707, "grad_norm": 1.3128032684326172, "learning_rate": 0.0001709181503468834, "loss": 1.7685, "step": 9320 }, { "epoch": 2.493319080705505, "grad_norm": 1.2741421461105347, "learning_rate": 0.00017085893497177502, "loss": 1.7489, "step": 9330 }, { "epoch": 2.495991448423303, "grad_norm": 1.3218576908111572, "learning_rate": 0.0001707996696522697, "loss": 1.7889, "step": 9340 }, { "epoch": 2.498663816141101, "grad_norm": 1.350038766860962, "learning_rate": 0.00017074035443014006, "loss": 1.7364, "step": 9350 }, { "epoch": 2.501336183858899, "grad_norm": 1.3044747114181519, "learning_rate": 0.0001706809893471941, "loss": 1.7715, "step": 9360 }, { "epoch": 2.504008551576697, "grad_norm": 1.3677699565887451, "learning_rate": 0.00017062157444527477, "loss": 1.7649, "step": 9370 }, { "epoch": 2.506680919294495, "grad_norm": 1.347490668296814, "learning_rate": 0.00017056210976626023, "loss": 1.7158, "step": 9380 }, { "epoch": 2.509353287012293, "grad_norm": 1.3666515350341797, "learning_rate": 0.00017050259535206382, "loss": 1.7001, "step": 9390 }, { "epoch": 2.512025654730091, "grad_norm": 1.380007266998291, "learning_rate": 0.00017044303124463372, "loss": 1.7905, "step": 9400 }, { "epoch": 2.514698022447889, "grad_norm": 1.2397706508636475, "learning_rate": 0.00017038341748595328, "loss": 1.7075, "step": 9410 }, { "epoch": 2.517370390165687, "grad_norm": 1.390709400177002, "learning_rate": 0.00017032375411804086, "loss": 1.7399, "step": 9420 }, { "epoch": 2.520042757883485, "grad_norm": 1.31925368309021, "learning_rate": 0.0001702640411829497, "loss": 1.7753, "step": 9430 }, { "epoch": 2.522715125601283, "grad_norm": 1.3578137159347534, "learning_rate": 0.00017020427872276806, "loss": 1.6879, "step": 9440 }, { "epoch": 2.525387493319081, "grad_norm": 1.3528056144714355, "learning_rate": 0.00017014446677961903, "loss": 1.7204, "step": 9450 }, { "epoch": 2.5280598610368785, "grad_norm": 1.3264003992080688, "learning_rate": 0.0001700846053956606, "loss": 1.6537, "step": 9460 }, { "epoch": 2.5307322287546765, "grad_norm": 1.285840392112732, "learning_rate": 0.00017002469461308573, "loss": 1.7186, "step": 9470 }, { "epoch": 2.5334045964724745, "grad_norm": 1.2316747903823853, "learning_rate": 0.00016996473447412197, "loss": 1.7763, "step": 9480 }, { "epoch": 2.5360769641902725, "grad_norm": 1.3861089944839478, "learning_rate": 0.00016990472502103186, "loss": 1.7207, "step": 9490 }, { "epoch": 2.5387493319080705, "grad_norm": 1.3715717792510986, "learning_rate": 0.00016984466629611254, "loss": 1.7232, "step": 9500 }, { "epoch": 2.5414216996258685, "grad_norm": 1.5018290281295776, "learning_rate": 0.00016978455834169606, "loss": 1.7424, "step": 9510 }, { "epoch": 2.5440940673436665, "grad_norm": 1.26276695728302, "learning_rate": 0.000169724401200149, "loss": 1.7195, "step": 9520 }, { "epoch": 2.5467664350614645, "grad_norm": 1.4196491241455078, "learning_rate": 0.00016966419491387267, "loss": 1.788, "step": 9530 }, { "epoch": 2.5494388027792625, "grad_norm": 1.3643399477005005, "learning_rate": 0.0001696039395253031, "loss": 1.7935, "step": 9540 }, { "epoch": 2.5521111704970605, "grad_norm": 1.230284333229065, "learning_rate": 0.00016954363507691076, "loss": 1.6665, "step": 9550 }, { "epoch": 2.5547835382148585, "grad_norm": 1.2991338968276978, "learning_rate": 0.00016948328161120086, "loss": 1.7156, "step": 9560 }, { "epoch": 2.5574559059326565, "grad_norm": 1.5775761604309082, "learning_rate": 0.00016942287917071308, "loss": 1.6749, "step": 9570 }, { "epoch": 2.560128273650454, "grad_norm": 1.3274940252304077, "learning_rate": 0.00016936242779802164, "loss": 1.7407, "step": 9580 }, { "epoch": 2.562800641368252, "grad_norm": 1.2171717882156372, "learning_rate": 0.0001693019275357352, "loss": 1.7929, "step": 9590 }, { "epoch": 2.56547300908605, "grad_norm": 1.3335037231445312, "learning_rate": 0.00016924137842649702, "loss": 1.7101, "step": 9600 }, { "epoch": 2.568145376803848, "grad_norm": 1.2525278329849243, "learning_rate": 0.00016918078051298457, "loss": 1.7031, "step": 9610 }, { "epoch": 2.570817744521646, "grad_norm": 1.2409635782241821, "learning_rate": 0.0001691201338379099, "loss": 1.7306, "step": 9620 }, { "epoch": 2.573490112239444, "grad_norm": 1.2902002334594727, "learning_rate": 0.0001690594384440194, "loss": 1.7211, "step": 9630 }, { "epoch": 2.576162479957242, "grad_norm": 1.3910725116729736, "learning_rate": 0.00016899869437409372, "loss": 1.7647, "step": 9640 }, { "epoch": 2.57883484767504, "grad_norm": 1.4463739395141602, "learning_rate": 0.0001689379016709479, "loss": 1.7101, "step": 9650 }, { "epoch": 2.581507215392838, "grad_norm": 1.390336036682129, "learning_rate": 0.0001688770603774312, "loss": 1.6808, "step": 9660 }, { "epoch": 2.584179583110636, "grad_norm": 1.2885065078735352, "learning_rate": 0.0001688161705364272, "loss": 1.7203, "step": 9670 }, { "epoch": 2.586851950828434, "grad_norm": 1.3509775400161743, "learning_rate": 0.00016875523219085363, "loss": 1.6905, "step": 9680 }, { "epoch": 2.589524318546232, "grad_norm": 1.29179048538208, "learning_rate": 0.0001686942453836624, "loss": 1.7753, "step": 9690 }, { "epoch": 2.59219668626403, "grad_norm": 1.3519556522369385, "learning_rate": 0.00016863321015783968, "loss": 1.743, "step": 9700 }, { "epoch": 2.594869053981828, "grad_norm": 1.3056449890136719, "learning_rate": 0.00016857212655640565, "loss": 1.7583, "step": 9710 }, { "epoch": 2.597541421699626, "grad_norm": 1.325150728225708, "learning_rate": 0.00016851099462241467, "loss": 1.7167, "step": 9720 }, { "epoch": 2.600213789417424, "grad_norm": 1.3329095840454102, "learning_rate": 0.00016844981439895505, "loss": 1.7032, "step": 9730 }, { "epoch": 2.602886157135222, "grad_norm": 1.1164215803146362, "learning_rate": 0.00016838858592914933, "loss": 1.6624, "step": 9740 }, { "epoch": 2.60555852485302, "grad_norm": 1.32456636428833, "learning_rate": 0.00016832730925615388, "loss": 1.7574, "step": 9750 }, { "epoch": 2.608230892570818, "grad_norm": 1.4649831056594849, "learning_rate": 0.0001682659844231591, "loss": 1.7048, "step": 9760 }, { "epoch": 2.610903260288616, "grad_norm": 1.3625433444976807, "learning_rate": 0.00016820461147338942, "loss": 1.7026, "step": 9770 }, { "epoch": 2.6135756280064135, "grad_norm": 1.3138645887374878, "learning_rate": 0.000168143190450103, "loss": 1.7745, "step": 9780 }, { "epoch": 2.6162479957242115, "grad_norm": 1.2378613948822021, "learning_rate": 0.00016808172139659209, "loss": 1.6838, "step": 9790 }, { "epoch": 2.6189203634420095, "grad_norm": 1.6066712141036987, "learning_rate": 0.00016802020435618267, "loss": 1.7235, "step": 9800 }, { "epoch": 2.6215927311598075, "grad_norm": 1.3208084106445312, "learning_rate": 0.00016795863937223453, "loss": 1.6804, "step": 9810 }, { "epoch": 2.6242650988776055, "grad_norm": 1.2170714139938354, "learning_rate": 0.00016789702648814135, "loss": 1.7057, "step": 9820 }, { "epoch": 2.6269374665954035, "grad_norm": 1.3493729829788208, "learning_rate": 0.00016783536574733047, "loss": 1.6769, "step": 9830 }, { "epoch": 2.6296098343132015, "grad_norm": 1.3735666275024414, "learning_rate": 0.00016777365719326305, "loss": 1.7213, "step": 9840 }, { "epoch": 2.6322822020309995, "grad_norm": 1.315576434135437, "learning_rate": 0.0001677119008694339, "loss": 1.7572, "step": 9850 }, { "epoch": 2.6349545697487975, "grad_norm": 1.2990847826004028, "learning_rate": 0.00016765009681937149, "loss": 1.741, "step": 9860 }, { "epoch": 2.6376269374665955, "grad_norm": 1.2594319581985474, "learning_rate": 0.00016758824508663795, "loss": 1.718, "step": 9870 }, { "epoch": 2.6402993051843935, "grad_norm": 1.3621447086334229, "learning_rate": 0.00016752634571482903, "loss": 1.6852, "step": 9880 }, { "epoch": 2.6429716729021915, "grad_norm": 1.4120498895645142, "learning_rate": 0.00016746439874757406, "loss": 1.7506, "step": 9890 }, { "epoch": 2.645644040619989, "grad_norm": 1.1625776290893555, "learning_rate": 0.00016740240422853582, "loss": 1.7878, "step": 9900 }, { "epoch": 2.648316408337787, "grad_norm": 1.3444859981536865, "learning_rate": 0.00016734036220141077, "loss": 1.711, "step": 9910 }, { "epoch": 2.650988776055585, "grad_norm": 1.290184736251831, "learning_rate": 0.0001672782727099288, "loss": 1.6725, "step": 9920 }, { "epoch": 2.653661143773383, "grad_norm": 1.3869785070419312, "learning_rate": 0.00016721613579785317, "loss": 1.7347, "step": 9930 }, { "epoch": 2.656333511491181, "grad_norm": 1.3091959953308105, "learning_rate": 0.00016715395150898065, "loss": 1.7358, "step": 9940 }, { "epoch": 2.659005879208979, "grad_norm": 1.4104623794555664, "learning_rate": 0.00016709171988714134, "loss": 1.7717, "step": 9950 }, { "epoch": 2.661678246926777, "grad_norm": 1.270210862159729, "learning_rate": 0.00016702944097619877, "loss": 1.7246, "step": 9960 }, { "epoch": 2.664350614644575, "grad_norm": 1.3403785228729248, "learning_rate": 0.00016696711482004975, "loss": 1.7422, "step": 9970 }, { "epoch": 2.667022982362373, "grad_norm": 1.3155403137207031, "learning_rate": 0.00016690474146262445, "loss": 1.7393, "step": 9980 }, { "epoch": 2.669695350080171, "grad_norm": 1.2800922393798828, "learning_rate": 0.00016684232094788621, "loss": 1.7407, "step": 9990 }, { "epoch": 2.672367717797969, "grad_norm": 1.3908976316452026, "learning_rate": 0.00016677985331983174, "loss": 1.7189, "step": 10000 }, { "epoch": 2.675040085515767, "grad_norm": 1.28983736038208, "learning_rate": 0.00016671733862249087, "loss": 1.6803, "step": 10010 }, { "epoch": 2.677712453233565, "grad_norm": 1.2946239709854126, "learning_rate": 0.00016665477689992657, "loss": 1.6751, "step": 10020 }, { "epoch": 2.680384820951363, "grad_norm": 1.310894250869751, "learning_rate": 0.000166592168196235, "loss": 1.7841, "step": 10030 }, { "epoch": 2.683057188669161, "grad_norm": 1.3421567678451538, "learning_rate": 0.00016652951255554556, "loss": 1.7249, "step": 10040 }, { "epoch": 2.685729556386959, "grad_norm": 1.2867803573608398, "learning_rate": 0.00016646681002202053, "loss": 1.7287, "step": 10050 }, { "epoch": 2.688401924104757, "grad_norm": 1.28252112865448, "learning_rate": 0.00016640406063985532, "loss": 1.7645, "step": 10060 }, { "epoch": 2.691074291822555, "grad_norm": 1.3790788650512695, "learning_rate": 0.00016634126445327837, "loss": 1.6884, "step": 10070 }, { "epoch": 2.693746659540353, "grad_norm": 1.3141494989395142, "learning_rate": 0.00016627842150655116, "loss": 1.7077, "step": 10080 }, { "epoch": 2.696419027258151, "grad_norm": 1.4122681617736816, "learning_rate": 0.00016621553184396797, "loss": 1.7857, "step": 10090 }, { "epoch": 2.699091394975949, "grad_norm": 1.212904930114746, "learning_rate": 0.00016615259550985628, "loss": 1.6774, "step": 10100 }, { "epoch": 2.7017637626937465, "grad_norm": 1.3065979480743408, "learning_rate": 0.00016608961254857614, "loss": 1.7002, "step": 10110 }, { "epoch": 2.7044361304115445, "grad_norm": 1.3611199855804443, "learning_rate": 0.00016602658300452072, "loss": 1.7818, "step": 10120 }, { "epoch": 2.7071084981293425, "grad_norm": 1.3592082262039185, "learning_rate": 0.0001659635069221159, "loss": 1.6907, "step": 10130 }, { "epoch": 2.7097808658471405, "grad_norm": 1.3596382141113281, "learning_rate": 0.00016590038434582038, "loss": 1.7305, "step": 10140 }, { "epoch": 2.7124532335649385, "grad_norm": 1.226095199584961, "learning_rate": 0.00016583721532012567, "loss": 1.7599, "step": 10150 }, { "epoch": 2.7151256012827365, "grad_norm": 1.2843650579452515, "learning_rate": 0.00016577399988955595, "loss": 1.6564, "step": 10160 }, { "epoch": 2.7177979690005345, "grad_norm": 1.2752437591552734, "learning_rate": 0.00016571073809866816, "loss": 1.7466, "step": 10170 }, { "epoch": 2.7204703367183325, "grad_norm": 1.2111462354660034, "learning_rate": 0.00016564742999205194, "loss": 1.7266, "step": 10180 }, { "epoch": 2.7231427044361305, "grad_norm": 1.2403128147125244, "learning_rate": 0.00016558407561432953, "loss": 1.741, "step": 10190 }, { "epoch": 2.7258150721539285, "grad_norm": 1.2999945878982544, "learning_rate": 0.0001655206750101558, "loss": 1.7946, "step": 10200 }, { "epoch": 2.7284874398717265, "grad_norm": 1.2286380529403687, "learning_rate": 0.00016545722822421813, "loss": 1.7352, "step": 10210 }, { "epoch": 2.7311598075895245, "grad_norm": 1.4907112121582031, "learning_rate": 0.0001653937353012366, "loss": 1.6907, "step": 10220 }, { "epoch": 2.733832175307322, "grad_norm": 1.3053269386291504, "learning_rate": 0.00016533019628596368, "loss": 1.7078, "step": 10230 }, { "epoch": 2.73650454302512, "grad_norm": 1.367099642753601, "learning_rate": 0.00016526661122318444, "loss": 1.704, "step": 10240 }, { "epoch": 2.739176910742918, "grad_norm": 1.4227197170257568, "learning_rate": 0.00016520298015771626, "loss": 1.7224, "step": 10250 }, { "epoch": 2.741849278460716, "grad_norm": 1.3679789304733276, "learning_rate": 0.0001651393031344091, "loss": 1.7932, "step": 10260 }, { "epoch": 2.744521646178514, "grad_norm": 1.3766450881958008, "learning_rate": 0.00016507558019814516, "loss": 1.6428, "step": 10270 }, { "epoch": 2.747194013896312, "grad_norm": 1.4413139820098877, "learning_rate": 0.00016501181139383917, "loss": 1.6424, "step": 10280 }, { "epoch": 2.74986638161411, "grad_norm": 1.2802283763885498, "learning_rate": 0.00016494799676643803, "loss": 1.7878, "step": 10290 }, { "epoch": 2.752538749331908, "grad_norm": 1.3058050870895386, "learning_rate": 0.000164884136360921, "loss": 1.7315, "step": 10300 }, { "epoch": 2.755211117049706, "grad_norm": 1.2209022045135498, "learning_rate": 0.00016482023022229968, "loss": 1.7429, "step": 10310 }, { "epoch": 2.757883484767504, "grad_norm": 1.3601993322372437, "learning_rate": 0.0001647562783956178, "loss": 1.8099, "step": 10320 }, { "epoch": 2.760555852485302, "grad_norm": 1.326979398727417, "learning_rate": 0.00016469228092595137, "loss": 1.7229, "step": 10330 }, { "epoch": 2.7632282202031, "grad_norm": 1.2629777193069458, "learning_rate": 0.00016462823785840848, "loss": 1.8033, "step": 10340 }, { "epoch": 2.765900587920898, "grad_norm": 1.466591477394104, "learning_rate": 0.00016456414923812944, "loss": 1.7294, "step": 10350 }, { "epoch": 2.768572955638696, "grad_norm": 1.3726177215576172, "learning_rate": 0.00016450001511028665, "loss": 1.7337, "step": 10360 }, { "epoch": 2.771245323356494, "grad_norm": 1.3623446226119995, "learning_rate": 0.00016443583552008456, "loss": 1.6891, "step": 10370 }, { "epoch": 2.773917691074292, "grad_norm": 1.264294981956482, "learning_rate": 0.0001643716105127597, "loss": 1.6886, "step": 10380 }, { "epoch": 2.77659005879209, "grad_norm": 1.3817180395126343, "learning_rate": 0.00016430734013358055, "loss": 1.7559, "step": 10390 }, { "epoch": 2.779262426509888, "grad_norm": 1.3496696949005127, "learning_rate": 0.00016424302442784764, "loss": 1.6906, "step": 10400 }, { "epoch": 2.781934794227686, "grad_norm": 1.4543319940567017, "learning_rate": 0.00016417866344089346, "loss": 1.7465, "step": 10410 }, { "epoch": 2.784607161945484, "grad_norm": 1.3868132829666138, "learning_rate": 0.00016411425721808226, "loss": 1.7322, "step": 10420 }, { "epoch": 2.7872795296632815, "grad_norm": 1.2394338846206665, "learning_rate": 0.00016404980580481042, "loss": 1.7826, "step": 10430 }, { "epoch": 2.7899518973810795, "grad_norm": 1.2655469179153442, "learning_rate": 0.00016398530924650602, "loss": 1.6935, "step": 10440 }, { "epoch": 2.7926242650988775, "grad_norm": 1.3503713607788086, "learning_rate": 0.0001639207675886289, "loss": 1.7606, "step": 10450 }, { "epoch": 2.7952966328166755, "grad_norm": 1.290440559387207, "learning_rate": 0.00016385618087667086, "loss": 1.7148, "step": 10460 }, { "epoch": 2.7979690005344735, "grad_norm": 1.3137966394424438, "learning_rate": 0.0001637915491561553, "loss": 1.7471, "step": 10470 }, { "epoch": 2.8006413682522715, "grad_norm": 1.3273319005966187, "learning_rate": 0.00016372687247263746, "loss": 1.739, "step": 10480 }, { "epoch": 2.8033137359700695, "grad_norm": 1.428066372871399, "learning_rate": 0.00016366215087170417, "loss": 1.7515, "step": 10490 }, { "epoch": 2.8059861036878675, "grad_norm": 1.3554637432098389, "learning_rate": 0.00016359738439897402, "loss": 1.7182, "step": 10500 }, { "epoch": 2.8086584714056655, "grad_norm": 1.374074101448059, "learning_rate": 0.0001635325731000972, "loss": 1.7339, "step": 10510 }, { "epoch": 2.8113308391234635, "grad_norm": 1.3375787734985352, "learning_rate": 0.00016346771702075542, "loss": 1.6936, "step": 10520 }, { "epoch": 2.8140032068412615, "grad_norm": 1.3112101554870605, "learning_rate": 0.00016340281620666204, "loss": 1.6997, "step": 10530 }, { "epoch": 2.8166755745590595, "grad_norm": 1.3300225734710693, "learning_rate": 0.0001633378707035619, "loss": 1.677, "step": 10540 }, { "epoch": 2.819347942276857, "grad_norm": 1.3386642932891846, "learning_rate": 0.00016327288055723136, "loss": 1.7043, "step": 10550 }, { "epoch": 2.822020309994655, "grad_norm": 1.2374159097671509, "learning_rate": 0.00016320784581347827, "loss": 1.7164, "step": 10560 }, { "epoch": 2.824692677712453, "grad_norm": 1.4122684001922607, "learning_rate": 0.00016314276651814184, "loss": 1.7111, "step": 10570 }, { "epoch": 2.827365045430251, "grad_norm": 1.2185890674591064, "learning_rate": 0.00016307764271709281, "loss": 1.7596, "step": 10580 }, { "epoch": 2.830037413148049, "grad_norm": 1.3645682334899902, "learning_rate": 0.00016301247445623312, "loss": 1.6581, "step": 10590 }, { "epoch": 2.832709780865847, "grad_norm": 1.2973945140838623, "learning_rate": 0.0001629472617814962, "loss": 1.7805, "step": 10600 }, { "epoch": 2.835382148583645, "grad_norm": 1.334075689315796, "learning_rate": 0.00016288200473884667, "loss": 1.806, "step": 10610 }, { "epoch": 2.838054516301443, "grad_norm": 1.2988075017929077, "learning_rate": 0.00016281670337428054, "loss": 1.6806, "step": 10620 }, { "epoch": 2.840726884019241, "grad_norm": 1.379639744758606, "learning_rate": 0.00016275135773382494, "loss": 1.6805, "step": 10630 }, { "epoch": 2.843399251737039, "grad_norm": 1.4113703966140747, "learning_rate": 0.00016268596786353832, "loss": 1.7433, "step": 10640 }, { "epoch": 2.846071619454837, "grad_norm": 1.301796555519104, "learning_rate": 0.0001626205338095102, "loss": 1.7348, "step": 10650 }, { "epoch": 2.848743987172635, "grad_norm": 1.2673159837722778, "learning_rate": 0.00016255505561786127, "loss": 1.7428, "step": 10660 }, { "epoch": 2.851416354890433, "grad_norm": 1.3145058155059814, "learning_rate": 0.0001624895333347434, "loss": 1.7835, "step": 10670 }, { "epoch": 2.854088722608231, "grad_norm": 1.3564109802246094, "learning_rate": 0.00016242396700633945, "loss": 1.7186, "step": 10680 }, { "epoch": 2.856761090326029, "grad_norm": 1.294067144393921, "learning_rate": 0.0001623583566788634, "loss": 1.7799, "step": 10690 }, { "epoch": 2.859433458043827, "grad_norm": 1.3368945121765137, "learning_rate": 0.00016229270239856018, "loss": 1.8102, "step": 10700 }, { "epoch": 2.862105825761625, "grad_norm": 1.353661060333252, "learning_rate": 0.0001622270042117057, "loss": 1.7566, "step": 10710 }, { "epoch": 2.864778193479423, "grad_norm": 1.3188835382461548, "learning_rate": 0.0001621612621646069, "loss": 1.7552, "step": 10720 }, { "epoch": 2.867450561197221, "grad_norm": 1.3148976564407349, "learning_rate": 0.00016209547630360155, "loss": 1.7441, "step": 10730 }, { "epoch": 2.870122928915019, "grad_norm": 1.2544513940811157, "learning_rate": 0.0001620296466750583, "loss": 1.7836, "step": 10740 }, { "epoch": 2.8727952966328165, "grad_norm": 1.249030590057373, "learning_rate": 0.0001619637733253767, "loss": 1.7693, "step": 10750 }, { "epoch": 2.8754676643506145, "grad_norm": 1.238337516784668, "learning_rate": 0.00016189785630098716, "loss": 1.7058, "step": 10760 }, { "epoch": 2.8781400320684125, "grad_norm": 1.3537184000015259, "learning_rate": 0.0001618318956483507, "loss": 1.7217, "step": 10770 }, { "epoch": 2.8808123997862105, "grad_norm": 1.2351958751678467, "learning_rate": 0.00016176589141395924, "loss": 1.7181, "step": 10780 }, { "epoch": 2.8834847675040085, "grad_norm": 1.3087337017059326, "learning_rate": 0.00016169984364433537, "loss": 1.7465, "step": 10790 }, { "epoch": 2.8861571352218065, "grad_norm": 1.2616442441940308, "learning_rate": 0.0001616337523860324, "loss": 1.7807, "step": 10800 }, { "epoch": 2.8888295029396045, "grad_norm": 1.2221183776855469, "learning_rate": 0.00016156761768563427, "loss": 1.6754, "step": 10810 }, { "epoch": 2.8915018706574025, "grad_norm": 1.399147391319275, "learning_rate": 0.00016150143958975548, "loss": 1.7301, "step": 10820 }, { "epoch": 2.8941742383752005, "grad_norm": 1.2853881120681763, "learning_rate": 0.00016143521814504118, "loss": 1.7429, "step": 10830 }, { "epoch": 2.8968466060929985, "grad_norm": 1.3160619735717773, "learning_rate": 0.00016136895339816714, "loss": 1.7433, "step": 10840 }, { "epoch": 2.8995189738107965, "grad_norm": 1.3412164449691772, "learning_rate": 0.0001613026453958395, "loss": 1.6782, "step": 10850 }, { "epoch": 2.9021913415285945, "grad_norm": 1.538735032081604, "learning_rate": 0.00016123629418479505, "loss": 1.7102, "step": 10860 }, { "epoch": 2.904863709246392, "grad_norm": 1.3501207828521729, "learning_rate": 0.00016116989981180085, "loss": 1.7055, "step": 10870 }, { "epoch": 2.90753607696419, "grad_norm": 1.1933869123458862, "learning_rate": 0.00016110346232365452, "loss": 1.745, "step": 10880 }, { "epoch": 2.910208444681988, "grad_norm": 1.3364607095718384, "learning_rate": 0.00016103698176718405, "loss": 1.7083, "step": 10890 }, { "epoch": 2.912880812399786, "grad_norm": 1.2646677494049072, "learning_rate": 0.00016097045818924777, "loss": 1.7698, "step": 10900 }, { "epoch": 2.915553180117584, "grad_norm": 1.337547779083252, "learning_rate": 0.00016090389163673434, "loss": 1.7008, "step": 10910 }, { "epoch": 2.918225547835382, "grad_norm": 1.3661710023880005, "learning_rate": 0.00016083728215656267, "loss": 1.7276, "step": 10920 }, { "epoch": 2.92089791555318, "grad_norm": 1.3688632249832153, "learning_rate": 0.00016077062979568196, "loss": 1.6726, "step": 10930 }, { "epoch": 2.923570283270978, "grad_norm": 1.3069576025009155, "learning_rate": 0.0001607039346010717, "loss": 1.7281, "step": 10940 }, { "epoch": 2.926242650988776, "grad_norm": 1.3173613548278809, "learning_rate": 0.00016063719661974142, "loss": 1.7207, "step": 10950 }, { "epoch": 2.928915018706574, "grad_norm": 1.3447896242141724, "learning_rate": 0.00016057041589873094, "loss": 1.7599, "step": 10960 }, { "epoch": 2.931587386424372, "grad_norm": 1.4540988206863403, "learning_rate": 0.00016050359248511015, "loss": 1.661, "step": 10970 }, { "epoch": 2.93425975414217, "grad_norm": 1.2639456987380981, "learning_rate": 0.00016043672642597901, "loss": 1.64, "step": 10980 }, { "epoch": 2.936932121859968, "grad_norm": 1.3014018535614014, "learning_rate": 0.00016036981776846763, "loss": 1.6938, "step": 10990 }, { "epoch": 2.939604489577766, "grad_norm": 1.3340500593185425, "learning_rate": 0.00016030286655973605, "loss": 1.7315, "step": 11000 }, { "epoch": 2.942276857295564, "grad_norm": 1.3241417407989502, "learning_rate": 0.0001602358728469743, "loss": 1.7756, "step": 11010 }, { "epoch": 2.944949225013362, "grad_norm": 1.4531340599060059, "learning_rate": 0.00016016883667740247, "loss": 1.764, "step": 11020 }, { "epoch": 2.94762159273116, "grad_norm": 1.360221266746521, "learning_rate": 0.00016010175809827047, "loss": 1.7168, "step": 11030 }, { "epoch": 2.950293960448958, "grad_norm": 1.288360595703125, "learning_rate": 0.00016003463715685812, "loss": 1.7297, "step": 11040 }, { "epoch": 2.952966328166756, "grad_norm": 1.3397499322891235, "learning_rate": 0.00015996747390047519, "loss": 1.7434, "step": 11050 }, { "epoch": 2.955638695884554, "grad_norm": 1.356138825416565, "learning_rate": 0.00015990026837646115, "loss": 1.7398, "step": 11060 }, { "epoch": 2.9583110636023515, "grad_norm": 1.2560276985168457, "learning_rate": 0.00015983302063218532, "loss": 1.8056, "step": 11070 }, { "epoch": 2.9609834313201495, "grad_norm": 1.293848991394043, "learning_rate": 0.00015976573071504682, "loss": 1.7335, "step": 11080 }, { "epoch": 2.9636557990379475, "grad_norm": 1.3045257329940796, "learning_rate": 0.00015969839867247435, "loss": 1.7713, "step": 11090 }, { "epoch": 2.9663281667557455, "grad_norm": 1.3143424987792969, "learning_rate": 0.00015963102455192652, "loss": 1.7589, "step": 11100 }, { "epoch": 2.9690005344735435, "grad_norm": 1.3248462677001953, "learning_rate": 0.00015956360840089142, "loss": 1.7045, "step": 11110 }, { "epoch": 2.9716729021913415, "grad_norm": 1.3211400508880615, "learning_rate": 0.00015949615026688684, "loss": 1.7464, "step": 11120 }, { "epoch": 2.9743452699091395, "grad_norm": 1.4016636610031128, "learning_rate": 0.00015942865019746014, "loss": 1.701, "step": 11130 }, { "epoch": 2.9770176376269375, "grad_norm": 1.300790786743164, "learning_rate": 0.00015936110824018825, "loss": 1.7487, "step": 11140 }, { "epoch": 2.9796900053447355, "grad_norm": 1.4114218950271606, "learning_rate": 0.00015929352444267762, "loss": 1.6506, "step": 11150 }, { "epoch": 2.9823623730625335, "grad_norm": 1.2759777307510376, "learning_rate": 0.0001592258988525642, "loss": 1.7607, "step": 11160 }, { "epoch": 2.9850347407803315, "grad_norm": 1.2985563278198242, "learning_rate": 0.0001591582315175134, "loss": 1.7276, "step": 11170 }, { "epoch": 2.9877071084981295, "grad_norm": 1.3185759782791138, "learning_rate": 0.00015909052248521996, "loss": 1.7113, "step": 11180 }, { "epoch": 2.990379476215927, "grad_norm": 1.4013677835464478, "learning_rate": 0.0001590227718034082, "loss": 1.7043, "step": 11190 }, { "epoch": 2.993051843933725, "grad_norm": 1.3771142959594727, "learning_rate": 0.00015895497951983162, "loss": 1.7113, "step": 11200 }, { "epoch": 2.995724211651523, "grad_norm": 1.4461156129837036, "learning_rate": 0.00015888714568227313, "loss": 1.689, "step": 11210 }, { "epoch": 2.998396579369321, "grad_norm": 1.3167692422866821, "learning_rate": 0.0001588192703385449, "loss": 1.7975, "step": 11220 }, { "epoch": 3.001068947087119, "grad_norm": 1.246795415878296, "learning_rate": 0.0001587513535364884, "loss": 1.6658, "step": 11230 }, { "epoch": 3.003741314804917, "grad_norm": 1.34390389919281, "learning_rate": 0.00015868339532397425, "loss": 1.5372, "step": 11240 }, { "epoch": 3.006413682522715, "grad_norm": 1.3438529968261719, "learning_rate": 0.00015861539574890226, "loss": 1.6494, "step": 11250 }, { "epoch": 3.009086050240513, "grad_norm": 1.4086190462112427, "learning_rate": 0.00015854735485920152, "loss": 1.5712, "step": 11260 }, { "epoch": 3.011758417958311, "grad_norm": 1.4233211278915405, "learning_rate": 0.0001584792727028301, "loss": 1.5603, "step": 11270 }, { "epoch": 3.014430785676109, "grad_norm": 1.5091338157653809, "learning_rate": 0.0001584111493277752, "loss": 1.5916, "step": 11280 }, { "epoch": 3.017103153393907, "grad_norm": 1.4274498224258423, "learning_rate": 0.0001583429847820531, "loss": 1.5357, "step": 11290 }, { "epoch": 3.019775521111705, "grad_norm": 1.9058419466018677, "learning_rate": 0.000158274779113709, "loss": 1.5132, "step": 11300 }, { "epoch": 3.022447888829503, "grad_norm": 1.4838693141937256, "learning_rate": 0.00015820653237081728, "loss": 1.5447, "step": 11310 }, { "epoch": 3.025120256547301, "grad_norm": 1.347912311553955, "learning_rate": 0.0001581382446014811, "loss": 1.5856, "step": 11320 }, { "epoch": 3.027792624265099, "grad_norm": 1.5340098142623901, "learning_rate": 0.00015806991585383257, "loss": 1.5716, "step": 11330 }, { "epoch": 3.030464991982897, "grad_norm": 1.4221084117889404, "learning_rate": 0.00015800154617603269, "loss": 1.4937, "step": 11340 }, { "epoch": 3.033137359700695, "grad_norm": 1.35970938205719, "learning_rate": 0.00015793313561627135, "loss": 1.4901, "step": 11350 }, { "epoch": 3.035809727418493, "grad_norm": 1.438905954360962, "learning_rate": 0.00015786468422276728, "loss": 1.5713, "step": 11360 }, { "epoch": 3.038482095136291, "grad_norm": 1.491072177886963, "learning_rate": 0.00015779619204376786, "loss": 1.6207, "step": 11370 }, { "epoch": 3.0411544628540885, "grad_norm": 1.4626355171203613, "learning_rate": 0.0001577276591275493, "loss": 1.4127, "step": 11380 }, { "epoch": 3.0438268305718865, "grad_norm": 1.398729681968689, "learning_rate": 0.00015765908552241658, "loss": 1.5265, "step": 11390 }, { "epoch": 3.0464991982896845, "grad_norm": 1.3984798192977905, "learning_rate": 0.00015759047127670325, "loss": 1.656, "step": 11400 }, { "epoch": 3.0491715660074825, "grad_norm": 1.559378981590271, "learning_rate": 0.00015752181643877155, "loss": 1.5467, "step": 11410 }, { "epoch": 3.0518439337252805, "grad_norm": 1.4860576391220093, "learning_rate": 0.00015745312105701237, "loss": 1.5323, "step": 11420 }, { "epoch": 3.0545163014430785, "grad_norm": 1.4080946445465088, "learning_rate": 0.00015738438517984513, "loss": 1.5656, "step": 11430 }, { "epoch": 3.0571886691608765, "grad_norm": 1.4330800771713257, "learning_rate": 0.00015731560885571782, "loss": 1.6246, "step": 11440 }, { "epoch": 3.0598610368786745, "grad_norm": 1.4812606573104858, "learning_rate": 0.00015724679213310685, "loss": 1.6035, "step": 11450 }, { "epoch": 3.0625334045964725, "grad_norm": 1.4903162717819214, "learning_rate": 0.00015717793506051726, "loss": 1.6082, "step": 11460 }, { "epoch": 3.0652057723142705, "grad_norm": 1.451586365699768, "learning_rate": 0.0001571090376864824, "loss": 1.5459, "step": 11470 }, { "epoch": 3.0678781400320685, "grad_norm": 1.4366097450256348, "learning_rate": 0.00015704010005956415, "loss": 1.596, "step": 11480 }, { "epoch": 3.0705505077498665, "grad_norm": 1.4396469593048096, "learning_rate": 0.0001569711222283526, "loss": 1.5666, "step": 11490 }, { "epoch": 3.0732228754676645, "grad_norm": 1.4093619585037231, "learning_rate": 0.0001569021042414663, "loss": 1.5452, "step": 11500 }, { "epoch": 3.0758952431854625, "grad_norm": 1.4156750440597534, "learning_rate": 0.00015683304614755208, "loss": 1.6241, "step": 11510 }, { "epoch": 3.0785676109032605, "grad_norm": 1.5277137756347656, "learning_rate": 0.00015676394799528503, "loss": 1.5817, "step": 11520 }, { "epoch": 3.081239978621058, "grad_norm": 1.405486822128296, "learning_rate": 0.00015669480983336842, "loss": 1.5807, "step": 11530 }, { "epoch": 3.083912346338856, "grad_norm": 1.6159441471099854, "learning_rate": 0.00015662563171053382, "loss": 1.5476, "step": 11540 }, { "epoch": 3.086584714056654, "grad_norm": 1.4131660461425781, "learning_rate": 0.0001565564136755409, "loss": 1.575, "step": 11550 }, { "epoch": 3.089257081774452, "grad_norm": 1.4706740379333496, "learning_rate": 0.00015648715577717753, "loss": 1.5856, "step": 11560 }, { "epoch": 3.09192944949225, "grad_norm": 1.5544694662094116, "learning_rate": 0.00015641785806425957, "loss": 1.5473, "step": 11570 }, { "epoch": 3.094601817210048, "grad_norm": 1.465657114982605, "learning_rate": 0.00015634852058563103, "loss": 1.5396, "step": 11580 }, { "epoch": 3.097274184927846, "grad_norm": 1.37485933303833, "learning_rate": 0.00015627914339016386, "loss": 1.5261, "step": 11590 }, { "epoch": 3.099946552645644, "grad_norm": 1.4936021566390991, "learning_rate": 0.00015620972652675817, "loss": 1.5454, "step": 11600 }, { "epoch": 3.102618920363442, "grad_norm": 1.3922851085662842, "learning_rate": 0.00015614027004434187, "loss": 1.5764, "step": 11610 }, { "epoch": 3.10529128808124, "grad_norm": 1.4229692220687866, "learning_rate": 0.0001560707739918708, "loss": 1.5583, "step": 11620 }, { "epoch": 3.107963655799038, "grad_norm": 1.4172704219818115, "learning_rate": 0.0001560012384183288, "loss": 1.5571, "step": 11630 }, { "epoch": 3.110636023516836, "grad_norm": 1.5273038148880005, "learning_rate": 0.0001559316633727275, "loss": 1.5861, "step": 11640 }, { "epoch": 3.113308391234634, "grad_norm": 1.4015402793884277, "learning_rate": 0.00015586204890410637, "loss": 1.5473, "step": 11650 }, { "epoch": 3.115980758952432, "grad_norm": 1.5113670825958252, "learning_rate": 0.0001557923950615326, "loss": 1.6162, "step": 11660 }, { "epoch": 3.11865312667023, "grad_norm": 1.4393116235733032, "learning_rate": 0.0001557227018941012, "loss": 1.6128, "step": 11670 }, { "epoch": 3.121325494388028, "grad_norm": 1.5794994831085205, "learning_rate": 0.00015565296945093491, "loss": 1.5698, "step": 11680 }, { "epoch": 3.123997862105826, "grad_norm": 1.395870566368103, "learning_rate": 0.0001555831977811842, "loss": 1.6017, "step": 11690 }, { "epoch": 3.1266702298236235, "grad_norm": 1.6304802894592285, "learning_rate": 0.00015551338693402692, "loss": 1.583, "step": 11700 }, { "epoch": 3.1293425975414215, "grad_norm": 1.4940779209136963, "learning_rate": 0.00015544353695866892, "loss": 1.5248, "step": 11710 }, { "epoch": 3.1320149652592195, "grad_norm": 1.4320130348205566, "learning_rate": 0.0001553736479043433, "loss": 1.5636, "step": 11720 }, { "epoch": 3.1346873329770175, "grad_norm": 1.405187964439392, "learning_rate": 0.00015530371982031097, "loss": 1.5589, "step": 11730 }, { "epoch": 3.1373597006948155, "grad_norm": 1.5311636924743652, "learning_rate": 0.00015523375275586015, "loss": 1.4999, "step": 11740 }, { "epoch": 3.1400320684126135, "grad_norm": 1.614567756652832, "learning_rate": 0.0001551637467603066, "loss": 1.5888, "step": 11750 }, { "epoch": 3.1427044361304115, "grad_norm": 1.5212658643722534, "learning_rate": 0.0001550937018829936, "loss": 1.6661, "step": 11760 }, { "epoch": 3.1453768038482095, "grad_norm": 1.5886166095733643, "learning_rate": 0.00015502361817329167, "loss": 1.5447, "step": 11770 }, { "epoch": 3.1480491715660075, "grad_norm": 1.5095372200012207, "learning_rate": 0.00015495349568059888, "loss": 1.5925, "step": 11780 }, { "epoch": 3.1507215392838055, "grad_norm": 1.6610372066497803, "learning_rate": 0.00015488333445434047, "loss": 1.5864, "step": 11790 }, { "epoch": 3.1533939070016035, "grad_norm": 1.493183970451355, "learning_rate": 0.0001548131345439691, "loss": 1.5641, "step": 11800 }, { "epoch": 3.1560662747194015, "grad_norm": 1.4638108015060425, "learning_rate": 0.00015474289599896467, "loss": 1.5694, "step": 11810 }, { "epoch": 3.1587386424371995, "grad_norm": 1.4211833477020264, "learning_rate": 0.0001546726188688343, "loss": 1.6706, "step": 11820 }, { "epoch": 3.1614110101549975, "grad_norm": 1.4523656368255615, "learning_rate": 0.0001546023032031123, "loss": 1.6533, "step": 11830 }, { "epoch": 3.1640833778727955, "grad_norm": 1.50687575340271, "learning_rate": 0.00015453194905136014, "loss": 1.5646, "step": 11840 }, { "epoch": 3.1667557455905935, "grad_norm": 1.4127225875854492, "learning_rate": 0.00015446155646316645, "loss": 1.5041, "step": 11850 }, { "epoch": 3.169428113308391, "grad_norm": 1.570069432258606, "learning_rate": 0.00015439112548814687, "loss": 1.571, "step": 11860 }, { "epoch": 3.172100481026189, "grad_norm": 1.3592219352722168, "learning_rate": 0.0001543206561759442, "loss": 1.6437, "step": 11870 }, { "epoch": 3.174772848743987, "grad_norm": 1.427656888961792, "learning_rate": 0.0001542501485762282, "loss": 1.5855, "step": 11880 }, { "epoch": 3.177445216461785, "grad_norm": 1.4631425142288208, "learning_rate": 0.00015417960273869565, "loss": 1.6285, "step": 11890 }, { "epoch": 3.180117584179583, "grad_norm": 1.522918701171875, "learning_rate": 0.00015410901871307022, "loss": 1.5572, "step": 11900 }, { "epoch": 3.182789951897381, "grad_norm": 1.433417797088623, "learning_rate": 0.0001540383965491026, "loss": 1.5254, "step": 11910 }, { "epoch": 3.185462319615179, "grad_norm": 1.5127241611480713, "learning_rate": 0.00015396773629657026, "loss": 1.6015, "step": 11920 }, { "epoch": 3.188134687332977, "grad_norm": 1.500370979309082, "learning_rate": 0.00015389703800527756, "loss": 1.5406, "step": 11930 }, { "epoch": 3.190807055050775, "grad_norm": 1.4559721946716309, "learning_rate": 0.00015382630172505564, "loss": 1.574, "step": 11940 }, { "epoch": 3.193479422768573, "grad_norm": 1.5691871643066406, "learning_rate": 0.00015375552750576248, "loss": 1.5687, "step": 11950 }, { "epoch": 3.196151790486371, "grad_norm": 2.147005319595337, "learning_rate": 0.00015368471539728277, "loss": 1.6227, "step": 11960 }, { "epoch": 3.198824158204169, "grad_norm": 1.44551420211792, "learning_rate": 0.00015361386544952791, "loss": 1.6064, "step": 11970 }, { "epoch": 3.201496525921967, "grad_norm": 1.506608486175537, "learning_rate": 0.00015354297771243594, "loss": 1.5777, "step": 11980 }, { "epoch": 3.204168893639765, "grad_norm": 1.6357446908950806, "learning_rate": 0.00015347205223597155, "loss": 1.5225, "step": 11990 }, { "epoch": 3.206841261357563, "grad_norm": 1.5898518562316895, "learning_rate": 0.00015340108907012606, "loss": 1.5726, "step": 12000 }, { "epoch": 3.209513629075361, "grad_norm": 1.3775098323822021, "learning_rate": 0.00015333008826491735, "loss": 1.4897, "step": 12010 }, { "epoch": 3.212185996793159, "grad_norm": 1.532679796218872, "learning_rate": 0.00015325904987038974, "loss": 1.621, "step": 12020 }, { "epoch": 3.2148583645109565, "grad_norm": 1.4138544797897339, "learning_rate": 0.0001531879739366142, "loss": 1.6127, "step": 12030 }, { "epoch": 3.2175307322287545, "grad_norm": 1.481067419052124, "learning_rate": 0.00015311686051368805, "loss": 1.5911, "step": 12040 }, { "epoch": 3.2202030999465525, "grad_norm": 1.4436348676681519, "learning_rate": 0.00015304570965173504, "loss": 1.5967, "step": 12050 }, { "epoch": 3.2228754676643505, "grad_norm": 1.6716563701629639, "learning_rate": 0.0001529745214009053, "loss": 1.5598, "step": 12060 }, { "epoch": 3.2255478353821485, "grad_norm": 1.4454665184020996, "learning_rate": 0.00015290329581137545, "loss": 1.5709, "step": 12070 }, { "epoch": 3.2282202030999465, "grad_norm": 1.512351393699646, "learning_rate": 0.00015283203293334826, "loss": 1.6501, "step": 12080 }, { "epoch": 3.2308925708177445, "grad_norm": 1.4209846258163452, "learning_rate": 0.0001527607328170528, "loss": 1.6023, "step": 12090 }, { "epoch": 3.2335649385355425, "grad_norm": 1.5086133480072021, "learning_rate": 0.0001526893955127445, "loss": 1.561, "step": 12100 }, { "epoch": 3.2362373062533405, "grad_norm": 1.4856555461883545, "learning_rate": 0.0001526180210707049, "loss": 1.642, "step": 12110 }, { "epoch": 3.2389096739711385, "grad_norm": 1.5898791551589966, "learning_rate": 0.00015254660954124174, "loss": 1.6302, "step": 12120 }, { "epoch": 3.2415820416889365, "grad_norm": 1.4988596439361572, "learning_rate": 0.00015247516097468893, "loss": 1.5737, "step": 12130 }, { "epoch": 3.2442544094067345, "grad_norm": 1.62245512008667, "learning_rate": 0.00015240367542140642, "loss": 1.5576, "step": 12140 }, { "epoch": 3.2469267771245325, "grad_norm": 1.5361400842666626, "learning_rate": 0.00015233215293178036, "loss": 1.6377, "step": 12150 }, { "epoch": 3.2495991448423305, "grad_norm": 1.448956847190857, "learning_rate": 0.00015226059355622276, "loss": 1.6119, "step": 12160 }, { "epoch": 3.252271512560128, "grad_norm": 1.6497639417648315, "learning_rate": 0.00015218899734517177, "loss": 1.5715, "step": 12170 }, { "epoch": 3.254943880277926, "grad_norm": 1.4061638116836548, "learning_rate": 0.00015211736434909146, "loss": 1.601, "step": 12180 }, { "epoch": 3.257616247995724, "grad_norm": 1.408265233039856, "learning_rate": 0.00015204569461847175, "loss": 1.581, "step": 12190 }, { "epoch": 3.260288615713522, "grad_norm": 1.5317623615264893, "learning_rate": 0.0001519739882038286, "loss": 1.6239, "step": 12200 }, { "epoch": 3.26296098343132, "grad_norm": 1.4291706085205078, "learning_rate": 0.0001519022451557037, "loss": 1.5913, "step": 12210 }, { "epoch": 3.265633351149118, "grad_norm": 1.454978346824646, "learning_rate": 0.0001518304655246646, "loss": 1.5593, "step": 12220 }, { "epoch": 3.268305718866916, "grad_norm": 1.5863350629806519, "learning_rate": 0.00015175864936130466, "loss": 1.5995, "step": 12230 }, { "epoch": 3.270978086584714, "grad_norm": 1.6901382207870483, "learning_rate": 0.00015168679671624303, "loss": 1.5685, "step": 12240 }, { "epoch": 3.273650454302512, "grad_norm": 1.4609698057174683, "learning_rate": 0.0001516149076401244, "loss": 1.5684, "step": 12250 }, { "epoch": 3.27632282202031, "grad_norm": 1.5242557525634766, "learning_rate": 0.00015154298218361935, "loss": 1.5681, "step": 12260 }, { "epoch": 3.278995189738108, "grad_norm": 1.4671461582183838, "learning_rate": 0.00015147102039742396, "loss": 1.6675, "step": 12270 }, { "epoch": 3.281667557455906, "grad_norm": 1.7080386877059937, "learning_rate": 0.00015139902233226, "loss": 1.5686, "step": 12280 }, { "epoch": 3.284339925173704, "grad_norm": 1.4940818548202515, "learning_rate": 0.0001513269880388747, "loss": 1.6327, "step": 12290 }, { "epoch": 3.287012292891502, "grad_norm": 1.4788765907287598, "learning_rate": 0.00015125491756804097, "loss": 1.6308, "step": 12300 }, { "epoch": 3.2896846606093, "grad_norm": 1.609084963798523, "learning_rate": 0.00015118281097055712, "loss": 1.5448, "step": 12310 }, { "epoch": 3.292357028327098, "grad_norm": 1.4446818828582764, "learning_rate": 0.000151110668297247, "loss": 1.5586, "step": 12320 }, { "epoch": 3.295029396044896, "grad_norm": 1.4681098461151123, "learning_rate": 0.00015103848959895978, "loss": 1.6612, "step": 12330 }, { "epoch": 3.297701763762694, "grad_norm": 1.649408221244812, "learning_rate": 0.00015096627492657008, "loss": 1.5422, "step": 12340 }, { "epoch": 3.3003741314804915, "grad_norm": 1.343523383140564, "learning_rate": 0.0001508940243309779, "loss": 1.5831, "step": 12350 }, { "epoch": 3.3030464991982895, "grad_norm": 1.6102079153060913, "learning_rate": 0.00015082173786310857, "loss": 1.6207, "step": 12360 }, { "epoch": 3.3057188669160875, "grad_norm": 1.4566102027893066, "learning_rate": 0.00015074941557391265, "loss": 1.6514, "step": 12370 }, { "epoch": 3.3083912346338855, "grad_norm": 1.4235904216766357, "learning_rate": 0.00015067705751436593, "loss": 1.6241, "step": 12380 }, { "epoch": 3.3110636023516835, "grad_norm": 1.3748440742492676, "learning_rate": 0.00015060466373546954, "loss": 1.6408, "step": 12390 }, { "epoch": 3.3137359700694815, "grad_norm": 1.5889629125595093, "learning_rate": 0.0001505322342882496, "loss": 1.4889, "step": 12400 }, { "epoch": 3.3164083377872795, "grad_norm": 1.455016851425171, "learning_rate": 0.00015045976922375756, "loss": 1.5592, "step": 12410 }, { "epoch": 3.3190807055050775, "grad_norm": 1.4698208570480347, "learning_rate": 0.0001503872685930698, "loss": 1.6376, "step": 12420 }, { "epoch": 3.3217530732228755, "grad_norm": 1.435241937637329, "learning_rate": 0.00015031473244728793, "loss": 1.6346, "step": 12430 }, { "epoch": 3.3244254409406735, "grad_norm": 1.4528361558914185, "learning_rate": 0.0001502421608375385, "loss": 1.546, "step": 12440 }, { "epoch": 3.3270978086584715, "grad_norm": 1.5781196355819702, "learning_rate": 0.00015016955381497304, "loss": 1.5376, "step": 12450 }, { "epoch": 3.3297701763762695, "grad_norm": 1.466631531715393, "learning_rate": 0.0001500969114307681, "loss": 1.6496, "step": 12460 }, { "epoch": 3.3324425440940675, "grad_norm": 1.4700963497161865, "learning_rate": 0.00015002423373612514, "loss": 1.5212, "step": 12470 }, { "epoch": 3.3351149118118655, "grad_norm": 1.5303999185562134, "learning_rate": 0.00014995152078227045, "loss": 1.615, "step": 12480 }, { "epoch": 3.3377872795296635, "grad_norm": 1.5123428106307983, "learning_rate": 0.00014987877262045526, "loss": 1.6091, "step": 12490 }, { "epoch": 3.340459647247461, "grad_norm": 1.481543779373169, "learning_rate": 0.00014980598930195555, "loss": 1.5989, "step": 12500 }, { "epoch": 3.343132014965259, "grad_norm": 1.5110771656036377, "learning_rate": 0.00014973317087807212, "loss": 1.6372, "step": 12510 }, { "epoch": 3.345804382683057, "grad_norm": 1.4414563179016113, "learning_rate": 0.00014966031740013048, "loss": 1.6062, "step": 12520 }, { "epoch": 3.348476750400855, "grad_norm": 1.542820692062378, "learning_rate": 0.00014958742891948092, "loss": 1.632, "step": 12530 }, { "epoch": 3.351149118118653, "grad_norm": 1.557808518409729, "learning_rate": 0.00014951450548749827, "loss": 1.6191, "step": 12540 }, { "epoch": 3.353821485836451, "grad_norm": 1.4685314893722534, "learning_rate": 0.00014944154715558215, "loss": 1.6241, "step": 12550 }, { "epoch": 3.356493853554249, "grad_norm": 1.5369318723678589, "learning_rate": 0.0001493685539751566, "loss": 1.5917, "step": 12560 }, { "epoch": 3.359166221272047, "grad_norm": 1.484265685081482, "learning_rate": 0.00014929552599767044, "loss": 1.5976, "step": 12570 }, { "epoch": 3.361838588989845, "grad_norm": 1.4627116918563843, "learning_rate": 0.00014922246327459684, "loss": 1.5765, "step": 12580 }, { "epoch": 3.364510956707643, "grad_norm": 1.5683720111846924, "learning_rate": 0.0001491493658574335, "loss": 1.6757, "step": 12590 }, { "epoch": 3.367183324425441, "grad_norm": 1.498773217201233, "learning_rate": 0.00014907623379770263, "loss": 1.5233, "step": 12600 }, { "epoch": 3.369855692143239, "grad_norm": 1.517832636833191, "learning_rate": 0.00014900306714695082, "loss": 1.6862, "step": 12610 }, { "epoch": 3.372528059861037, "grad_norm": 1.419629693031311, "learning_rate": 0.00014892986595674903, "loss": 1.5996, "step": 12620 }, { "epoch": 3.375200427578835, "grad_norm": 1.5775679349899292, "learning_rate": 0.0001488566302786926, "loss": 1.5628, "step": 12630 }, { "epoch": 3.377872795296633, "grad_norm": 1.6346704959869385, "learning_rate": 0.00014878336016440114, "loss": 1.6387, "step": 12640 }, { "epoch": 3.380545163014431, "grad_norm": 1.4277915954589844, "learning_rate": 0.00014871005566551853, "loss": 1.6293, "step": 12650 }, { "epoch": 3.383217530732229, "grad_norm": 1.3590073585510254, "learning_rate": 0.00014863671683371297, "loss": 1.5521, "step": 12660 }, { "epoch": 3.385889898450027, "grad_norm": 1.6856508255004883, "learning_rate": 0.0001485633437206767, "loss": 1.6168, "step": 12670 }, { "epoch": 3.3885622661678245, "grad_norm": 1.4158910512924194, "learning_rate": 0.0001484899363781263, "loss": 1.5644, "step": 12680 }, { "epoch": 3.3912346338856225, "grad_norm": 1.4972407817840576, "learning_rate": 0.00014841649485780234, "loss": 1.6706, "step": 12690 }, { "epoch": 3.3939070016034205, "grad_norm": 1.4520623683929443, "learning_rate": 0.00014834301921146955, "loss": 1.6666, "step": 12700 }, { "epoch": 3.3965793693212185, "grad_norm": 1.3872112035751343, "learning_rate": 0.0001482695094909167, "loss": 1.5605, "step": 12710 }, { "epoch": 3.3992517370390165, "grad_norm": 1.4994367361068726, "learning_rate": 0.00014819596574795657, "loss": 1.5943, "step": 12720 }, { "epoch": 3.4019241047568145, "grad_norm": 1.5546753406524658, "learning_rate": 0.00014812238803442598, "loss": 1.6166, "step": 12730 }, { "epoch": 3.4045964724746125, "grad_norm": 1.5407750606536865, "learning_rate": 0.00014804877640218553, "loss": 1.6469, "step": 12740 }, { "epoch": 3.4072688401924105, "grad_norm": 1.43174409866333, "learning_rate": 0.00014797513090311986, "loss": 1.6406, "step": 12750 }, { "epoch": 3.4099412079102085, "grad_norm": 1.5952435731887817, "learning_rate": 0.00014790145158913756, "loss": 1.6278, "step": 12760 }, { "epoch": 3.4126135756280065, "grad_norm": 1.6369998455047607, "learning_rate": 0.00014782773851217086, "loss": 1.5891, "step": 12770 }, { "epoch": 3.4152859433458045, "grad_norm": 1.4865986108779907, "learning_rate": 0.00014775399172417588, "loss": 1.5867, "step": 12780 }, { "epoch": 3.4179583110636025, "grad_norm": 1.5650240182876587, "learning_rate": 0.0001476802112771325, "loss": 1.6074, "step": 12790 }, { "epoch": 3.4206306787814005, "grad_norm": 1.4547903537750244, "learning_rate": 0.00014760639722304436, "loss": 1.6048, "step": 12800 }, { "epoch": 3.4233030464991985, "grad_norm": 1.4975963830947876, "learning_rate": 0.00014753254961393872, "loss": 1.5787, "step": 12810 }, { "epoch": 3.425975414216996, "grad_norm": 1.6132854223251343, "learning_rate": 0.00014745866850186654, "loss": 1.6289, "step": 12820 }, { "epoch": 3.428647781934794, "grad_norm": 1.3988043069839478, "learning_rate": 0.00014738475393890235, "loss": 1.6492, "step": 12830 }, { "epoch": 3.431320149652592, "grad_norm": 1.5922125577926636, "learning_rate": 0.00014731080597714432, "loss": 1.6155, "step": 12840 }, { "epoch": 3.43399251737039, "grad_norm": 1.4675812721252441, "learning_rate": 0.0001472368246687141, "loss": 1.6425, "step": 12850 }, { "epoch": 3.436664885088188, "grad_norm": 1.4547349214553833, "learning_rate": 0.00014716281006575685, "loss": 1.6382, "step": 12860 }, { "epoch": 3.439337252805986, "grad_norm": 1.455011248588562, "learning_rate": 0.00014708876222044122, "loss": 1.6046, "step": 12870 }, { "epoch": 3.442009620523784, "grad_norm": 1.469198226928711, "learning_rate": 0.00014701468118495935, "loss": 1.6146, "step": 12880 }, { "epoch": 3.444681988241582, "grad_norm": 1.5194551944732666, "learning_rate": 0.0001469405670115267, "loss": 1.59, "step": 12890 }, { "epoch": 3.44735435595938, "grad_norm": 1.4832775592803955, "learning_rate": 0.000146866419752382, "loss": 1.5836, "step": 12900 }, { "epoch": 3.450026723677178, "grad_norm": 1.5915850400924683, "learning_rate": 0.00014679223945978748, "loss": 1.6516, "step": 12910 }, { "epoch": 3.452699091394976, "grad_norm": 1.58090341091156, "learning_rate": 0.00014671802618602855, "loss": 1.6118, "step": 12920 }, { "epoch": 3.455371459112774, "grad_norm": 1.323840856552124, "learning_rate": 0.00014664377998341391, "loss": 1.57, "step": 12930 }, { "epoch": 3.458043826830572, "grad_norm": 1.7080026865005493, "learning_rate": 0.00014656950090427543, "loss": 1.5692, "step": 12940 }, { "epoch": 3.46071619454837, "grad_norm": 1.403754472732544, "learning_rate": 0.00014649518900096815, "loss": 1.5972, "step": 12950 }, { "epoch": 3.463388562266168, "grad_norm": 1.4818941354751587, "learning_rate": 0.00014642084432587028, "loss": 1.6036, "step": 12960 }, { "epoch": 3.466060929983966, "grad_norm": 1.4573633670806885, "learning_rate": 0.00014634646693138315, "loss": 1.626, "step": 12970 }, { "epoch": 3.468733297701764, "grad_norm": 1.5643197298049927, "learning_rate": 0.00014627205686993107, "loss": 1.622, "step": 12980 }, { "epoch": 3.471405665419562, "grad_norm": 1.3899352550506592, "learning_rate": 0.0001461976141939615, "loss": 1.6618, "step": 12990 }, { "epoch": 3.4740780331373595, "grad_norm": 1.5522507429122925, "learning_rate": 0.0001461231389559447, "loss": 1.5913, "step": 13000 }, { "epoch": 3.4767504008551575, "grad_norm": 1.4575941562652588, "learning_rate": 0.00014604863120837403, "loss": 1.6049, "step": 13010 }, { "epoch": 3.4794227685729555, "grad_norm": 1.5314279794692993, "learning_rate": 0.00014597409100376577, "loss": 1.6405, "step": 13020 }, { "epoch": 3.4820951362907535, "grad_norm": 1.4816808700561523, "learning_rate": 0.000145899518394659, "loss": 1.5926, "step": 13030 }, { "epoch": 3.4847675040085515, "grad_norm": 1.5141303539276123, "learning_rate": 0.00014582491343361563, "loss": 1.5615, "step": 13040 }, { "epoch": 3.4874398717263495, "grad_norm": 1.5823702812194824, "learning_rate": 0.0001457502761732205, "loss": 1.6383, "step": 13050 }, { "epoch": 3.4901122394441475, "grad_norm": 1.444441795349121, "learning_rate": 0.0001456756066660811, "loss": 1.5515, "step": 13060 }, { "epoch": 3.4927846071619455, "grad_norm": 1.5254098176956177, "learning_rate": 0.00014560090496482765, "loss": 1.6191, "step": 13070 }, { "epoch": 3.4954569748797435, "grad_norm": 1.6265779733657837, "learning_rate": 0.00014552617112211308, "loss": 1.5522, "step": 13080 }, { "epoch": 3.4981293425975415, "grad_norm": 1.4543230533599854, "learning_rate": 0.00014545140519061308, "loss": 1.5356, "step": 13090 }, { "epoch": 3.5008017103153395, "grad_norm": 1.4384841918945312, "learning_rate": 0.0001453766072230258, "loss": 1.514, "step": 13100 }, { "epoch": 3.5034740780331375, "grad_norm": 1.6042413711547852, "learning_rate": 0.000145301777272072, "loss": 1.6351, "step": 13110 }, { "epoch": 3.5061464457509355, "grad_norm": 1.4373027086257935, "learning_rate": 0.00014522691539049508, "loss": 1.6038, "step": 13120 }, { "epoch": 3.508818813468733, "grad_norm": 1.5652923583984375, "learning_rate": 0.00014515202163106088, "loss": 1.6038, "step": 13130 }, { "epoch": 3.511491181186531, "grad_norm": 1.4025564193725586, "learning_rate": 0.0001450770960465577, "loss": 1.5811, "step": 13140 }, { "epoch": 3.514163548904329, "grad_norm": 1.440600037574768, "learning_rate": 0.00014500213868979625, "loss": 1.5859, "step": 13150 }, { "epoch": 3.516835916622127, "grad_norm": 1.5395610332489014, "learning_rate": 0.00014492714961360977, "loss": 1.6319, "step": 13160 }, { "epoch": 3.519508284339925, "grad_norm": 1.471100926399231, "learning_rate": 0.00014485212887085363, "loss": 1.5959, "step": 13170 }, { "epoch": 3.522180652057723, "grad_norm": 1.5483871698379517, "learning_rate": 0.00014477707651440572, "loss": 1.5763, "step": 13180 }, { "epoch": 3.524853019775521, "grad_norm": 1.4020414352416992, "learning_rate": 0.00014470199259716612, "loss": 1.6219, "step": 13190 }, { "epoch": 3.527525387493319, "grad_norm": 1.3737884759902954, "learning_rate": 0.00014462687717205718, "loss": 1.6411, "step": 13200 }, { "epoch": 3.530197755211117, "grad_norm": 1.392750859260559, "learning_rate": 0.00014455173029202345, "loss": 1.6043, "step": 13210 }, { "epoch": 3.532870122928915, "grad_norm": 1.5100160837173462, "learning_rate": 0.00014447655201003164, "loss": 1.621, "step": 13220 }, { "epoch": 3.535542490646713, "grad_norm": 1.4548851251602173, "learning_rate": 0.00014440134237907065, "loss": 1.6193, "step": 13230 }, { "epoch": 3.538214858364511, "grad_norm": 1.6431235074996948, "learning_rate": 0.00014432610145215138, "loss": 1.6293, "step": 13240 }, { "epoch": 3.540887226082309, "grad_norm": 1.6478720903396606, "learning_rate": 0.0001442508292823069, "loss": 1.6586, "step": 13250 }, { "epoch": 3.543559593800107, "grad_norm": 1.408124327659607, "learning_rate": 0.00014417552592259217, "loss": 1.6555, "step": 13260 }, { "epoch": 3.546231961517905, "grad_norm": 1.6119136810302734, "learning_rate": 0.00014410019142608427, "loss": 1.7086, "step": 13270 }, { "epoch": 3.548904329235703, "grad_norm": 1.6675114631652832, "learning_rate": 0.0001440248258458821, "loss": 1.5894, "step": 13280 }, { "epoch": 3.551576696953501, "grad_norm": 1.4569134712219238, "learning_rate": 0.00014394942923510662, "loss": 1.5982, "step": 13290 }, { "epoch": 3.554249064671299, "grad_norm": 1.5121387243270874, "learning_rate": 0.0001438740016469005, "loss": 1.6715, "step": 13300 }, { "epoch": 3.556921432389097, "grad_norm": 1.4674450159072876, "learning_rate": 0.00014379854313442835, "loss": 1.5981, "step": 13310 }, { "epoch": 3.559593800106895, "grad_norm": 1.4840753078460693, "learning_rate": 0.00014372305375087653, "loss": 1.6548, "step": 13320 }, { "epoch": 3.5622661678246925, "grad_norm": 1.6692088842391968, "learning_rate": 0.0001436475335494532, "loss": 1.7056, "step": 13330 }, { "epoch": 3.5649385355424905, "grad_norm": 1.4009300470352173, "learning_rate": 0.0001435719825833882, "loss": 1.6539, "step": 13340 }, { "epoch": 3.5676109032602885, "grad_norm": 1.411410927772522, "learning_rate": 0.00014349640090593308, "loss": 1.5697, "step": 13350 }, { "epoch": 3.5702832709780865, "grad_norm": 1.4743436574935913, "learning_rate": 0.000143420788570361, "loss": 1.5776, "step": 13360 }, { "epoch": 3.5729556386958845, "grad_norm": 1.4356075525283813, "learning_rate": 0.00014334514562996682, "loss": 1.5673, "step": 13370 }, { "epoch": 3.5756280064136825, "grad_norm": 1.4502203464508057, "learning_rate": 0.00014326947213806677, "loss": 1.5098, "step": 13380 }, { "epoch": 3.5783003741314805, "grad_norm": 1.4926453828811646, "learning_rate": 0.0001431937681479989, "loss": 1.6879, "step": 13390 }, { "epoch": 3.5809727418492785, "grad_norm": 1.4945552349090576, "learning_rate": 0.00014311803371312257, "loss": 1.5889, "step": 13400 }, { "epoch": 3.5836451095670765, "grad_norm": 1.5944979190826416, "learning_rate": 0.0001430422688868186, "loss": 1.6233, "step": 13410 }, { "epoch": 3.5863174772848745, "grad_norm": 1.4976544380187988, "learning_rate": 0.00014296647372248928, "loss": 1.5894, "step": 13420 }, { "epoch": 3.5889898450026725, "grad_norm": 1.353517770767212, "learning_rate": 0.0001428906482735583, "loss": 1.6334, "step": 13430 }, { "epoch": 3.5916622127204705, "grad_norm": 1.5863207578659058, "learning_rate": 0.00014281479259347062, "loss": 1.6212, "step": 13440 }, { "epoch": 3.594334580438268, "grad_norm": 1.4160728454589844, "learning_rate": 0.0001427389067356926, "loss": 1.5923, "step": 13450 }, { "epoch": 3.597006948156066, "grad_norm": 1.4250168800354004, "learning_rate": 0.00014266299075371182, "loss": 1.6324, "step": 13460 }, { "epoch": 3.599679315873864, "grad_norm": 1.418119192123413, "learning_rate": 0.00014258704470103704, "loss": 1.6647, "step": 13470 }, { "epoch": 3.602351683591662, "grad_norm": 1.6640735864639282, "learning_rate": 0.00014251106863119839, "loss": 1.5728, "step": 13480 }, { "epoch": 3.60502405130946, "grad_norm": 1.7105728387832642, "learning_rate": 0.00014243506259774695, "loss": 1.5729, "step": 13490 }, { "epoch": 3.607696419027258, "grad_norm": 1.4894355535507202, "learning_rate": 0.00014235902665425503, "loss": 1.6117, "step": 13500 }, { "epoch": 3.610368786745056, "grad_norm": 1.5188177824020386, "learning_rate": 0.00014228296085431598, "loss": 1.6428, "step": 13510 }, { "epoch": 3.613041154462854, "grad_norm": 1.4988083839416504, "learning_rate": 0.0001422068652515443, "loss": 1.6426, "step": 13520 }, { "epoch": 3.615713522180652, "grad_norm": 1.4839959144592285, "learning_rate": 0.00014213073989957533, "loss": 1.5665, "step": 13530 }, { "epoch": 3.61838588989845, "grad_norm": 1.4822163581848145, "learning_rate": 0.0001420545848520655, "loss": 1.6193, "step": 13540 }, { "epoch": 3.621058257616248, "grad_norm": 1.5349560976028442, "learning_rate": 0.00014197840016269211, "loss": 1.565, "step": 13550 }, { "epoch": 3.623730625334046, "grad_norm": 1.4710910320281982, "learning_rate": 0.00014190218588515342, "loss": 1.5949, "step": 13560 }, { "epoch": 3.626402993051844, "grad_norm": 1.5057989358901978, "learning_rate": 0.0001418259420731685, "loss": 1.6028, "step": 13570 }, { "epoch": 3.629075360769642, "grad_norm": 1.491237998008728, "learning_rate": 0.00014174966878047714, "loss": 1.7041, "step": 13580 }, { "epoch": 3.63174772848744, "grad_norm": 1.5255022048950195, "learning_rate": 0.0001416733660608401, "loss": 1.5834, "step": 13590 }, { "epoch": 3.634420096205238, "grad_norm": 1.4522159099578857, "learning_rate": 0.00014159703396803876, "loss": 1.6645, "step": 13600 }, { "epoch": 3.637092463923036, "grad_norm": 1.438297986984253, "learning_rate": 0.00014152067255587526, "loss": 1.5807, "step": 13610 }, { "epoch": 3.639764831640834, "grad_norm": 1.5111329555511475, "learning_rate": 0.00014144428187817234, "loss": 1.5751, "step": 13620 }, { "epoch": 3.642437199358632, "grad_norm": 1.5307103395462036, "learning_rate": 0.0001413678619887734, "loss": 1.6652, "step": 13630 }, { "epoch": 3.64510956707643, "grad_norm": 1.4978127479553223, "learning_rate": 0.0001412914129415425, "loss": 1.6253, "step": 13640 }, { "epoch": 3.6477819347942275, "grad_norm": 1.4403648376464844, "learning_rate": 0.0001412149347903641, "loss": 1.5672, "step": 13650 }, { "epoch": 3.6504543025120255, "grad_norm": 1.7367560863494873, "learning_rate": 0.0001411384275891433, "loss": 1.6484, "step": 13660 }, { "epoch": 3.6531266702298235, "grad_norm": 1.4811859130859375, "learning_rate": 0.00014106189139180566, "loss": 1.695, "step": 13670 }, { "epoch": 3.6557990379476215, "grad_norm": 1.4210959672927856, "learning_rate": 0.0001409853262522971, "loss": 1.6099, "step": 13680 }, { "epoch": 3.6584714056654195, "grad_norm": 1.5033975839614868, "learning_rate": 0.00014090873222458408, "loss": 1.6033, "step": 13690 }, { "epoch": 3.6611437733832175, "grad_norm": 1.6621878147125244, "learning_rate": 0.00014083210936265323, "loss": 1.6461, "step": 13700 }, { "epoch": 3.6638161411010155, "grad_norm": 1.5738985538482666, "learning_rate": 0.0001407554577205117, "loss": 1.6294, "step": 13710 }, { "epoch": 3.6664885088188135, "grad_norm": 1.5229177474975586, "learning_rate": 0.0001406787773521867, "loss": 1.5999, "step": 13720 }, { "epoch": 3.6691608765366115, "grad_norm": 1.506054401397705, "learning_rate": 0.000140602068311726, "loss": 1.5744, "step": 13730 }, { "epoch": 3.6718332442544095, "grad_norm": 1.5278657674789429, "learning_rate": 0.00014052533065319727, "loss": 1.645, "step": 13740 }, { "epoch": 3.6745056119722075, "grad_norm": 1.3956048488616943, "learning_rate": 0.00014044856443068852, "loss": 1.6006, "step": 13750 }, { "epoch": 3.6771779796900055, "grad_norm": 1.4022210836410522, "learning_rate": 0.00014037176969830789, "loss": 1.5826, "step": 13760 }, { "epoch": 3.6798503474078035, "grad_norm": 1.491211175918579, "learning_rate": 0.00014029494651018353, "loss": 1.6265, "step": 13770 }, { "epoch": 3.682522715125601, "grad_norm": 1.6359549760818481, "learning_rate": 0.0001402180949204637, "loss": 1.5713, "step": 13780 }, { "epoch": 3.685195082843399, "grad_norm": 1.328031063079834, "learning_rate": 0.0001401412149833167, "loss": 1.6198, "step": 13790 }, { "epoch": 3.687867450561197, "grad_norm": 1.4872769117355347, "learning_rate": 0.00014006430675293077, "loss": 1.7155, "step": 13800 }, { "epoch": 3.690539818278995, "grad_norm": 1.4812895059585571, "learning_rate": 0.00013998737028351414, "loss": 1.5703, "step": 13810 }, { "epoch": 3.693212185996793, "grad_norm": 1.5284749269485474, "learning_rate": 0.00013991040562929486, "loss": 1.6417, "step": 13820 }, { "epoch": 3.695884553714591, "grad_norm": 1.4381073713302612, "learning_rate": 0.00013983341284452093, "loss": 1.6485, "step": 13830 }, { "epoch": 3.698556921432389, "grad_norm": 1.5141340494155884, "learning_rate": 0.00013975639198346014, "loss": 1.6118, "step": 13840 }, { "epoch": 3.701229289150187, "grad_norm": 1.5383323431015015, "learning_rate": 0.0001396793431004001, "loss": 1.6584, "step": 13850 }, { "epoch": 3.703901656867985, "grad_norm": 1.5171921253204346, "learning_rate": 0.0001396022662496481, "loss": 1.6573, "step": 13860 }, { "epoch": 3.706574024585783, "grad_norm": 1.4966976642608643, "learning_rate": 0.0001395251614855312, "loss": 1.6719, "step": 13870 }, { "epoch": 3.709246392303581, "grad_norm": 1.4085315465927124, "learning_rate": 0.00013944802886239617, "loss": 1.5832, "step": 13880 }, { "epoch": 3.711918760021379, "grad_norm": 1.5608574151992798, "learning_rate": 0.00013937086843460932, "loss": 1.6529, "step": 13890 }, { "epoch": 3.714591127739177, "grad_norm": 1.4574732780456543, "learning_rate": 0.00013929368025655662, "loss": 1.5927, "step": 13900 }, { "epoch": 3.717263495456975, "grad_norm": 1.6187883615493774, "learning_rate": 0.00013921646438264356, "loss": 1.6188, "step": 13910 }, { "epoch": 3.719935863174773, "grad_norm": 1.416913628578186, "learning_rate": 0.00013913922086729525, "loss": 1.6252, "step": 13920 }, { "epoch": 3.722608230892571, "grad_norm": 1.4689831733703613, "learning_rate": 0.00013906194976495616, "loss": 1.6159, "step": 13930 }, { "epoch": 3.725280598610369, "grad_norm": 1.452686071395874, "learning_rate": 0.00013898465113009026, "loss": 1.5895, "step": 13940 }, { "epoch": 3.727952966328167, "grad_norm": 1.4064078330993652, "learning_rate": 0.0001389073250171809, "loss": 1.6013, "step": 13950 }, { "epoch": 3.730625334045965, "grad_norm": 1.5133086442947388, "learning_rate": 0.00013882997148073086, "loss": 1.6002, "step": 13960 }, { "epoch": 3.733297701763763, "grad_norm": 1.4783916473388672, "learning_rate": 0.00013875259057526224, "loss": 1.6582, "step": 13970 }, { "epoch": 3.7359700694815605, "grad_norm": 1.5024664402008057, "learning_rate": 0.0001386751823553163, "loss": 1.6967, "step": 13980 }, { "epoch": 3.7386424371993585, "grad_norm": 1.5081539154052734, "learning_rate": 0.00013859774687545372, "loss": 1.6207, "step": 13990 }, { "epoch": 3.7413148049171565, "grad_norm": 1.4246116876602173, "learning_rate": 0.00013852028419025433, "loss": 1.6206, "step": 14000 }, { "epoch": 3.7439871726349545, "grad_norm": 1.4178860187530518, "learning_rate": 0.00013844279435431712, "loss": 1.5968, "step": 14010 }, { "epoch": 3.7466595403527525, "grad_norm": 1.4298408031463623, "learning_rate": 0.0001383652774222602, "loss": 1.6642, "step": 14020 }, { "epoch": 3.7493319080705505, "grad_norm": 1.5386979579925537, "learning_rate": 0.00013828773344872082, "loss": 1.6282, "step": 14030 }, { "epoch": 3.7520042757883485, "grad_norm": 1.481295108795166, "learning_rate": 0.0001382101624883553, "loss": 1.6478, "step": 14040 }, { "epoch": 3.7546766435061465, "grad_norm": 1.4294100999832153, "learning_rate": 0.00013813256459583898, "loss": 1.6048, "step": 14050 }, { "epoch": 3.7573490112239445, "grad_norm": 1.4785627126693726, "learning_rate": 0.00013805493982586611, "loss": 1.6811, "step": 14060 }, { "epoch": 3.7600213789417425, "grad_norm": 1.4725021123886108, "learning_rate": 0.00013797728823314992, "loss": 1.661, "step": 14070 }, { "epoch": 3.7626937466595405, "grad_norm": 1.6216282844543457, "learning_rate": 0.00013789960987242264, "loss": 1.6109, "step": 14080 }, { "epoch": 3.7653661143773385, "grad_norm": 1.6100808382034302, "learning_rate": 0.00013782190479843522, "loss": 1.6065, "step": 14090 }, { "epoch": 3.768038482095136, "grad_norm": 1.464152216911316, "learning_rate": 0.00013774417306595756, "loss": 1.6429, "step": 14100 }, { "epoch": 3.770710849812934, "grad_norm": 1.5179142951965332, "learning_rate": 0.0001376664147297782, "loss": 1.5804, "step": 14110 }, { "epoch": 3.773383217530732, "grad_norm": 1.4735183715820312, "learning_rate": 0.00013758862984470467, "loss": 1.6397, "step": 14120 }, { "epoch": 3.77605558524853, "grad_norm": 1.6094744205474854, "learning_rate": 0.000137510818465563, "loss": 1.6399, "step": 14130 }, { "epoch": 3.778727952966328, "grad_norm": 1.3748862743377686, "learning_rate": 0.0001374329806471979, "loss": 1.5782, "step": 14140 }, { "epoch": 3.781400320684126, "grad_norm": 1.5021231174468994, "learning_rate": 0.00013735511644447288, "loss": 1.6185, "step": 14150 }, { "epoch": 3.784072688401924, "grad_norm": 1.4829655885696411, "learning_rate": 0.0001372772259122699, "loss": 1.564, "step": 14160 }, { "epoch": 3.786745056119722, "grad_norm": 1.4910268783569336, "learning_rate": 0.0001371993091054896, "loss": 1.5831, "step": 14170 }, { "epoch": 3.78941742383752, "grad_norm": 1.4876508712768555, "learning_rate": 0.00013712136607905095, "loss": 1.6153, "step": 14180 }, { "epoch": 3.792089791555318, "grad_norm": 1.646272897720337, "learning_rate": 0.00013704339688789158, "loss": 1.6347, "step": 14190 }, { "epoch": 3.794762159273116, "grad_norm": 1.5087896585464478, "learning_rate": 0.0001369654015869675, "loss": 1.6687, "step": 14200 }, { "epoch": 3.797434526990914, "grad_norm": 1.557450294494629, "learning_rate": 0.00013688738023125315, "loss": 1.6415, "step": 14210 }, { "epoch": 3.800106894708712, "grad_norm": 1.4506268501281738, "learning_rate": 0.0001368093328757412, "loss": 1.5379, "step": 14220 }, { "epoch": 3.80277926242651, "grad_norm": 1.3444366455078125, "learning_rate": 0.00013673125957544286, "loss": 1.6135, "step": 14230 }, { "epoch": 3.805451630144308, "grad_norm": 1.3975130319595337, "learning_rate": 0.0001366531603853875, "loss": 1.6524, "step": 14240 }, { "epoch": 3.808123997862106, "grad_norm": 1.4078421592712402, "learning_rate": 0.00013657503536062271, "loss": 1.685, "step": 14250 }, { "epoch": 3.810796365579904, "grad_norm": 1.5333263874053955, "learning_rate": 0.00013649688455621438, "loss": 1.6449, "step": 14260 }, { "epoch": 3.813468733297702, "grad_norm": 1.5773098468780518, "learning_rate": 0.00013641870802724655, "loss": 1.5746, "step": 14270 }, { "epoch": 3.8161411010155, "grad_norm": 1.3503774404525757, "learning_rate": 0.00013634050582882128, "loss": 1.5675, "step": 14280 }, { "epoch": 3.818813468733298, "grad_norm": 1.4837092161178589, "learning_rate": 0.00013626227801605888, "loss": 1.677, "step": 14290 }, { "epoch": 3.8214858364510955, "grad_norm": 1.3931994438171387, "learning_rate": 0.00013618402464409763, "loss": 1.5816, "step": 14300 }, { "epoch": 3.8241582041688935, "grad_norm": 1.601622223854065, "learning_rate": 0.00013610574576809382, "loss": 1.5674, "step": 14310 }, { "epoch": 3.8268305718866915, "grad_norm": 1.4701857566833496, "learning_rate": 0.00013602744144322178, "loss": 1.6683, "step": 14320 }, { "epoch": 3.8295029396044895, "grad_norm": 1.4149401187896729, "learning_rate": 0.00013594911172467374, "loss": 1.6973, "step": 14330 }, { "epoch": 3.8321753073222875, "grad_norm": 1.4293116331100464, "learning_rate": 0.00013587075666765975, "loss": 1.6284, "step": 14340 }, { "epoch": 3.8348476750400855, "grad_norm": 1.3892247676849365, "learning_rate": 0.00013579237632740788, "loss": 1.6965, "step": 14350 }, { "epoch": 3.8375200427578835, "grad_norm": 1.5815978050231934, "learning_rate": 0.00013571397075916384, "loss": 1.5702, "step": 14360 }, { "epoch": 3.8401924104756815, "grad_norm": 1.4898732900619507, "learning_rate": 0.00013563554001819135, "loss": 1.5876, "step": 14370 }, { "epoch": 3.8428647781934795, "grad_norm": 1.4967395067214966, "learning_rate": 0.00013555708415977162, "loss": 1.6004, "step": 14380 }, { "epoch": 3.8455371459112775, "grad_norm": 1.3928459882736206, "learning_rate": 0.00013547860323920373, "loss": 1.6381, "step": 14390 }, { "epoch": 3.8482095136290755, "grad_norm": 1.4960623979568481, "learning_rate": 0.00013540009731180435, "loss": 1.6381, "step": 14400 }, { "epoch": 3.8508818813468735, "grad_norm": 1.4093217849731445, "learning_rate": 0.00013532156643290784, "loss": 1.6366, "step": 14410 }, { "epoch": 3.853554249064671, "grad_norm": 1.447858214378357, "learning_rate": 0.00013524301065786606, "loss": 1.6263, "step": 14420 }, { "epoch": 3.856226616782469, "grad_norm": 1.536370873451233, "learning_rate": 0.00013516443004204853, "loss": 1.741, "step": 14430 }, { "epoch": 3.858898984500267, "grad_norm": 1.43016517162323, "learning_rate": 0.0001350858246408422, "loss": 1.6889, "step": 14440 }, { "epoch": 3.861571352218065, "grad_norm": 1.4906656742095947, "learning_rate": 0.00013500719450965147, "loss": 1.5983, "step": 14450 }, { "epoch": 3.864243719935863, "grad_norm": 1.5198860168457031, "learning_rate": 0.00013492853970389825, "loss": 1.675, "step": 14460 }, { "epoch": 3.866916087653661, "grad_norm": 1.5043227672576904, "learning_rate": 0.00013484986027902175, "loss": 1.6472, "step": 14470 }, { "epoch": 3.869588455371459, "grad_norm": 1.4240082502365112, "learning_rate": 0.00013477115629047863, "loss": 1.6388, "step": 14480 }, { "epoch": 3.872260823089257, "grad_norm": 1.4360988140106201, "learning_rate": 0.00013469242779374282, "loss": 1.6289, "step": 14490 }, { "epoch": 3.874933190807055, "grad_norm": 1.422788143157959, "learning_rate": 0.00013461367484430547, "loss": 1.5923, "step": 14500 }, { "epoch": 3.877605558524853, "grad_norm": 1.4307031631469727, "learning_rate": 0.00013453489749767504, "loss": 1.6479, "step": 14510 }, { "epoch": 3.880277926242651, "grad_norm": 1.4923681020736694, "learning_rate": 0.00013445609580937714, "loss": 1.6039, "step": 14520 }, { "epoch": 3.882950293960449, "grad_norm": 1.5482001304626465, "learning_rate": 0.00013437726983495462, "loss": 1.6784, "step": 14530 }, { "epoch": 3.885622661678247, "grad_norm": 1.447283387184143, "learning_rate": 0.0001342984196299673, "loss": 1.5706, "step": 14540 }, { "epoch": 3.888295029396045, "grad_norm": 1.4632364511489868, "learning_rate": 0.00013421954524999225, "loss": 1.6004, "step": 14550 }, { "epoch": 3.890967397113843, "grad_norm": 1.7275340557098389, "learning_rate": 0.00013414064675062342, "loss": 1.647, "step": 14560 }, { "epoch": 3.893639764831641, "grad_norm": 1.4087649583816528, "learning_rate": 0.00013406172418747191, "loss": 1.6432, "step": 14570 }, { "epoch": 3.896312132549439, "grad_norm": 1.3862338066101074, "learning_rate": 0.00013398277761616565, "loss": 1.6009, "step": 14580 }, { "epoch": 3.898984500267237, "grad_norm": 1.5114810466766357, "learning_rate": 0.00013390380709234955, "loss": 1.5917, "step": 14590 }, { "epoch": 3.901656867985035, "grad_norm": 1.423058032989502, "learning_rate": 0.0001338248126716854, "loss": 1.6769, "step": 14600 }, { "epoch": 3.904329235702833, "grad_norm": 1.4373550415039062, "learning_rate": 0.00013374579440985192, "loss": 1.6135, "step": 14610 }, { "epoch": 3.9070016034206305, "grad_norm": 1.6480869054794312, "learning_rate": 0.0001336667523625444, "loss": 1.5833, "step": 14620 }, { "epoch": 3.9096739711384285, "grad_norm": 1.372557520866394, "learning_rate": 0.00013358768658547517, "loss": 1.6504, "step": 14630 }, { "epoch": 3.9123463388562265, "grad_norm": 1.424797534942627, "learning_rate": 0.00013350859713437305, "loss": 1.6278, "step": 14640 }, { "epoch": 3.9150187065740245, "grad_norm": 1.443160057067871, "learning_rate": 0.00013342948406498377, "loss": 1.6827, "step": 14650 }, { "epoch": 3.9176910742918225, "grad_norm": 1.5389927625656128, "learning_rate": 0.0001333503474330695, "loss": 1.6948, "step": 14660 }, { "epoch": 3.9203634420096205, "grad_norm": 1.5167105197906494, "learning_rate": 0.00013327118729440914, "loss": 1.6306, "step": 14670 }, { "epoch": 3.9230358097274185, "grad_norm": 1.5564993619918823, "learning_rate": 0.00013319200370479813, "loss": 1.527, "step": 14680 }, { "epoch": 3.9257081774452165, "grad_norm": 1.3414462804794312, "learning_rate": 0.0001331127967200485, "loss": 1.6159, "step": 14690 }, { "epoch": 3.9283805451630145, "grad_norm": 1.637369155883789, "learning_rate": 0.0001330335663959886, "loss": 1.5953, "step": 14700 }, { "epoch": 3.9310529128808125, "grad_norm": 1.5273038148880005, "learning_rate": 0.00013295431278846339, "loss": 1.6665, "step": 14710 }, { "epoch": 3.9337252805986105, "grad_norm": 1.38784658908844, "learning_rate": 0.0001328750359533342, "loss": 1.6488, "step": 14720 }, { "epoch": 3.9363976483164085, "grad_norm": 1.5924921035766602, "learning_rate": 0.00013279573594647873, "loss": 1.617, "step": 14730 }, { "epoch": 3.939070016034206, "grad_norm": 1.6329870223999023, "learning_rate": 0.000132716412823791, "loss": 1.5561, "step": 14740 }, { "epoch": 3.941742383752004, "grad_norm": 1.403551697731018, "learning_rate": 0.00013263706664118127, "loss": 1.6606, "step": 14750 }, { "epoch": 3.944414751469802, "grad_norm": 1.438462734222412, "learning_rate": 0.00013255769745457616, "loss": 1.6405, "step": 14760 }, { "epoch": 3.9470871191876, "grad_norm": 1.7514116764068604, "learning_rate": 0.00013247830531991846, "loss": 1.6367, "step": 14770 }, { "epoch": 3.949759486905398, "grad_norm": 1.4804353713989258, "learning_rate": 0.0001323988902931671, "loss": 1.6431, "step": 14780 }, { "epoch": 3.952431854623196, "grad_norm": 1.52631676197052, "learning_rate": 0.00013231945243029724, "loss": 1.6394, "step": 14790 }, { "epoch": 3.955104222340994, "grad_norm": 1.5103986263275146, "learning_rate": 0.00013223999178730002, "loss": 1.6736, "step": 14800 }, { "epoch": 3.957776590058792, "grad_norm": 1.4855271577835083, "learning_rate": 0.00013216050842018267, "loss": 1.6506, "step": 14810 }, { "epoch": 3.96044895777659, "grad_norm": 1.7139270305633545, "learning_rate": 0.0001320810023849685, "loss": 1.6167, "step": 14820 }, { "epoch": 3.963121325494388, "grad_norm": 1.4682615995407104, "learning_rate": 0.0001320014737376967, "loss": 1.6672, "step": 14830 }, { "epoch": 3.965793693212186, "grad_norm": 1.5905976295471191, "learning_rate": 0.00013192192253442248, "loss": 1.635, "step": 14840 }, { "epoch": 3.968466060929984, "grad_norm": 1.48699152469635, "learning_rate": 0.00013184234883121694, "loss": 1.638, "step": 14850 }, { "epoch": 3.971138428647782, "grad_norm": 1.4956756830215454, "learning_rate": 0.00013176275268416698, "loss": 1.6672, "step": 14860 }, { "epoch": 3.97381079636558, "grad_norm": 1.6715842485427856, "learning_rate": 0.00013168313414937537, "loss": 1.6804, "step": 14870 }, { "epoch": 3.976483164083378, "grad_norm": 1.3169384002685547, "learning_rate": 0.00013160349328296061, "loss": 1.6972, "step": 14880 }, { "epoch": 3.979155531801176, "grad_norm": 1.6806117296218872, "learning_rate": 0.00013152383014105706, "loss": 1.6283, "step": 14890 }, { "epoch": 3.981827899518974, "grad_norm": 1.4955217838287354, "learning_rate": 0.00013144414477981467, "loss": 1.5623, "step": 14900 }, { "epoch": 3.984500267236772, "grad_norm": 1.4891384840011597, "learning_rate": 0.000131364437255399, "loss": 1.6444, "step": 14910 }, { "epoch": 3.98717263495457, "grad_norm": 1.5744833946228027, "learning_rate": 0.0001312847076239914, "loss": 1.6152, "step": 14920 }, { "epoch": 3.989845002672368, "grad_norm": 1.3829927444458008, "learning_rate": 0.00013120495594178867, "loss": 1.6469, "step": 14930 }, { "epoch": 3.9925173703901655, "grad_norm": 1.3851839303970337, "learning_rate": 0.00013112518226500322, "loss": 1.6489, "step": 14940 }, { "epoch": 3.9951897381079635, "grad_norm": 1.3766522407531738, "learning_rate": 0.0001310453866498629, "loss": 1.6432, "step": 14950 }, { "epoch": 3.9978621058257615, "grad_norm": 1.4151705503463745, "learning_rate": 0.00013096556915261109, "loss": 1.5371, "step": 14960 }, { "epoch": 4.00053447354356, "grad_norm": 1.4186418056488037, "learning_rate": 0.00013088572982950658, "loss": 1.6407, "step": 14970 }, { "epoch": 4.003206841261358, "grad_norm": 1.7752667665481567, "learning_rate": 0.00013080586873682345, "loss": 1.5291, "step": 14980 }, { "epoch": 4.005879208979156, "grad_norm": 1.5340584516525269, "learning_rate": 0.0001307259859308513, "loss": 1.483, "step": 14990 }, { "epoch": 4.008551576696954, "grad_norm": 1.703906774520874, "learning_rate": 0.00013064608146789487, "loss": 1.376, "step": 15000 }, { "epoch": 4.011223944414751, "grad_norm": 1.5323362350463867, "learning_rate": 0.00013056615540427426, "loss": 1.5141, "step": 15010 }, { "epoch": 4.013896312132549, "grad_norm": 1.5463310480117798, "learning_rate": 0.00013048620779632474, "loss": 1.4556, "step": 15020 }, { "epoch": 4.016568679850347, "grad_norm": 1.6218100786209106, "learning_rate": 0.0001304062387003968, "loss": 1.3566, "step": 15030 }, { "epoch": 4.019241047568145, "grad_norm": 1.5737180709838867, "learning_rate": 0.00013032624817285607, "loss": 1.4114, "step": 15040 }, { "epoch": 4.021913415285943, "grad_norm": 1.626188039779663, "learning_rate": 0.00013024623627008335, "loss": 1.4558, "step": 15050 }, { "epoch": 4.024585783003741, "grad_norm": 1.8518829345703125, "learning_rate": 0.0001301662030484743, "loss": 1.4462, "step": 15060 }, { "epoch": 4.027258150721539, "grad_norm": 1.687781572341919, "learning_rate": 0.00013008614856443984, "loss": 1.4625, "step": 15070 }, { "epoch": 4.029930518439337, "grad_norm": 1.7048814296722412, "learning_rate": 0.00013000607287440578, "loss": 1.5125, "step": 15080 }, { "epoch": 4.032602886157135, "grad_norm": 1.6215217113494873, "learning_rate": 0.00012992597603481287, "loss": 1.4001, "step": 15090 }, { "epoch": 4.035275253874933, "grad_norm": 1.71282958984375, "learning_rate": 0.00012984585810211675, "loss": 1.4038, "step": 15100 }, { "epoch": 4.037947621592731, "grad_norm": 1.5622607469558716, "learning_rate": 0.00012976571913278793, "loss": 1.3764, "step": 15110 }, { "epoch": 4.040619989310529, "grad_norm": 1.5343408584594727, "learning_rate": 0.00012968555918331184, "loss": 1.449, "step": 15120 }, { "epoch": 4.043292357028327, "grad_norm": 1.6914194822311401, "learning_rate": 0.00012960537831018863, "loss": 1.4227, "step": 15130 }, { "epoch": 4.045964724746125, "grad_norm": 1.560380458831787, "learning_rate": 0.00012952517656993312, "loss": 1.4633, "step": 15140 }, { "epoch": 4.048637092463923, "grad_norm": 1.57892644405365, "learning_rate": 0.00012944495401907496, "loss": 1.4584, "step": 15150 }, { "epoch": 4.051309460181721, "grad_norm": 1.5968350172042847, "learning_rate": 0.00012936471071415845, "loss": 1.4349, "step": 15160 }, { "epoch": 4.053981827899519, "grad_norm": 1.717559576034546, "learning_rate": 0.00012928444671174246, "loss": 1.471, "step": 15170 }, { "epoch": 4.056654195617317, "grad_norm": 1.6292623281478882, "learning_rate": 0.00012920416206840048, "loss": 1.5261, "step": 15180 }, { "epoch": 4.059326563335115, "grad_norm": 1.6947565078735352, "learning_rate": 0.00012912385684072053, "loss": 1.4252, "step": 15190 }, { "epoch": 4.061998931052913, "grad_norm": 1.6276607513427734, "learning_rate": 0.00012904353108530517, "loss": 1.4703, "step": 15200 }, { "epoch": 4.064671298770711, "grad_norm": 1.6942775249481201, "learning_rate": 0.00012896318485877147, "loss": 1.5128, "step": 15210 }, { "epoch": 4.067343666488509, "grad_norm": 1.716341495513916, "learning_rate": 0.00012888281821775077, "loss": 1.5169, "step": 15220 }, { "epoch": 4.070016034206307, "grad_norm": 1.8272207975387573, "learning_rate": 0.00012880243121888898, "loss": 1.4746, "step": 15230 }, { "epoch": 4.072688401924105, "grad_norm": 1.6025763750076294, "learning_rate": 0.00012872202391884624, "loss": 1.4425, "step": 15240 }, { "epoch": 4.075360769641903, "grad_norm": 1.7037829160690308, "learning_rate": 0.00012864159637429712, "loss": 1.4231, "step": 15250 }, { "epoch": 4.078033137359701, "grad_norm": 1.6595052480697632, "learning_rate": 0.00012856114864193027, "loss": 1.5113, "step": 15260 }, { "epoch": 4.080705505077499, "grad_norm": 1.6561627388000488, "learning_rate": 0.00012848068077844874, "loss": 1.5073, "step": 15270 }, { "epoch": 4.083377872795297, "grad_norm": 1.6377439498901367, "learning_rate": 0.00012840019284056972, "loss": 1.5123, "step": 15280 }, { "epoch": 4.086050240513095, "grad_norm": 1.4633361101150513, "learning_rate": 0.00012831968488502454, "loss": 1.4576, "step": 15290 }, { "epoch": 4.088722608230893, "grad_norm": 1.716450572013855, "learning_rate": 0.00012823915696855858, "loss": 1.4567, "step": 15300 }, { "epoch": 4.091394975948691, "grad_norm": 1.7311489582061768, "learning_rate": 0.00012815860914793144, "loss": 1.3882, "step": 15310 }, { "epoch": 4.094067343666489, "grad_norm": 1.7296059131622314, "learning_rate": 0.00012807804147991654, "loss": 1.4566, "step": 15320 }, { "epoch": 4.096739711384286, "grad_norm": 1.5362845659255981, "learning_rate": 0.0001279974540213015, "loss": 1.3942, "step": 15330 }, { "epoch": 4.099412079102084, "grad_norm": 1.4967952966690063, "learning_rate": 0.00012791684682888777, "loss": 1.4921, "step": 15340 }, { "epoch": 4.102084446819882, "grad_norm": 1.711885690689087, "learning_rate": 0.00012783621995949077, "loss": 1.467, "step": 15350 }, { "epoch": 4.10475681453768, "grad_norm": 1.605841875076294, "learning_rate": 0.00012775557346993965, "loss": 1.4144, "step": 15360 }, { "epoch": 4.107429182255478, "grad_norm": 1.652406930923462, "learning_rate": 0.00012767490741707758, "loss": 1.4517, "step": 15370 }, { "epoch": 4.110101549973276, "grad_norm": 1.6053305864334106, "learning_rate": 0.0001275942218577614, "loss": 1.4935, "step": 15380 }, { "epoch": 4.112773917691074, "grad_norm": 1.6521443128585815, "learning_rate": 0.00012751351684886172, "loss": 1.4737, "step": 15390 }, { "epoch": 4.115446285408872, "grad_norm": 1.6109788417816162, "learning_rate": 0.0001274327924472629, "loss": 1.5309, "step": 15400 }, { "epoch": 4.11811865312667, "grad_norm": 1.7511069774627686, "learning_rate": 0.00012735204870986292, "loss": 1.449, "step": 15410 }, { "epoch": 4.120791020844468, "grad_norm": 1.600865364074707, "learning_rate": 0.00012727128569357343, "loss": 1.5098, "step": 15420 }, { "epoch": 4.123463388562266, "grad_norm": 1.8842839002609253, "learning_rate": 0.0001271905034553196, "loss": 1.4775, "step": 15430 }, { "epoch": 4.126135756280064, "grad_norm": 1.5898122787475586, "learning_rate": 0.00012710970205204023, "loss": 1.4892, "step": 15440 }, { "epoch": 4.128808123997862, "grad_norm": 1.571650743484497, "learning_rate": 0.00012702888154068762, "loss": 1.4514, "step": 15450 }, { "epoch": 4.13148049171566, "grad_norm": 1.657235860824585, "learning_rate": 0.00012694804197822742, "loss": 1.4535, "step": 15460 }, { "epoch": 4.134152859433458, "grad_norm": 1.6750986576080322, "learning_rate": 0.00012686718342163887, "loss": 1.4641, "step": 15470 }, { "epoch": 4.136825227151256, "grad_norm": 1.49517023563385, "learning_rate": 0.00012678630592791453, "loss": 1.4382, "step": 15480 }, { "epoch": 4.139497594869054, "grad_norm": 1.6910736560821533, "learning_rate": 0.00012670540955406025, "loss": 1.4938, "step": 15490 }, { "epoch": 4.142169962586852, "grad_norm": 1.5621329545974731, "learning_rate": 0.00012662449435709528, "loss": 1.4439, "step": 15500 }, { "epoch": 4.14484233030465, "grad_norm": 1.5824596881866455, "learning_rate": 0.00012654356039405214, "loss": 1.4253, "step": 15510 }, { "epoch": 4.147514698022448, "grad_norm": 1.777213454246521, "learning_rate": 0.00012646260772197646, "loss": 1.4832, "step": 15520 }, { "epoch": 4.150187065740246, "grad_norm": 1.6388694047927856, "learning_rate": 0.00012638163639792715, "loss": 1.5706, "step": 15530 }, { "epoch": 4.152859433458044, "grad_norm": 1.7183681726455688, "learning_rate": 0.0001263006464789763, "loss": 1.477, "step": 15540 }, { "epoch": 4.155531801175842, "grad_norm": 1.6367969512939453, "learning_rate": 0.00012621963802220896, "loss": 1.5303, "step": 15550 }, { "epoch": 4.15820416889364, "grad_norm": 1.7480429410934448, "learning_rate": 0.00012613861108472344, "loss": 1.3677, "step": 15560 }, { "epoch": 4.160876536611438, "grad_norm": 1.5859206914901733, "learning_rate": 0.00012605756572363095, "loss": 1.4675, "step": 15570 }, { "epoch": 4.163548904329236, "grad_norm": 1.7138357162475586, "learning_rate": 0.00012597650199605567, "loss": 1.4469, "step": 15580 }, { "epoch": 4.166221272047034, "grad_norm": 1.7989431619644165, "learning_rate": 0.00012589541995913482, "loss": 1.4298, "step": 15590 }, { "epoch": 4.168893639764832, "grad_norm": 1.6668016910552979, "learning_rate": 0.00012581431967001845, "loss": 1.4636, "step": 15600 }, { "epoch": 4.17156600748263, "grad_norm": 1.6256632804870605, "learning_rate": 0.00012573320118586956, "loss": 1.5082, "step": 15610 }, { "epoch": 4.174238375200428, "grad_norm": 1.6245003938674927, "learning_rate": 0.0001256520645638638, "loss": 1.5126, "step": 15620 }, { "epoch": 4.176910742918226, "grad_norm": 1.5841891765594482, "learning_rate": 0.0001255709098611898, "loss": 1.5234, "step": 15630 }, { "epoch": 4.179583110636024, "grad_norm": 1.6938221454620361, "learning_rate": 0.00012548973713504883, "loss": 1.513, "step": 15640 }, { "epoch": 4.182255478353822, "grad_norm": 1.8873212337493896, "learning_rate": 0.0001254085464426549, "loss": 1.5582, "step": 15650 }, { "epoch": 4.184927846071619, "grad_norm": 1.723900556564331, "learning_rate": 0.0001253273378412347, "loss": 1.4572, "step": 15660 }, { "epoch": 4.187600213789417, "grad_norm": 1.6873656511306763, "learning_rate": 0.0001252461113880274, "loss": 1.4842, "step": 15670 }, { "epoch": 4.190272581507215, "grad_norm": 1.7792420387268066, "learning_rate": 0.000125164867140285, "loss": 1.5768, "step": 15680 }, { "epoch": 4.192944949225013, "grad_norm": 1.718456745147705, "learning_rate": 0.00012508360515527182, "loss": 1.5009, "step": 15690 }, { "epoch": 4.195617316942811, "grad_norm": 1.7421867847442627, "learning_rate": 0.0001250023254902648, "loss": 1.4752, "step": 15700 }, { "epoch": 4.198289684660609, "grad_norm": 1.6288411617279053, "learning_rate": 0.00012492102820255334, "loss": 1.4887, "step": 15710 }, { "epoch": 4.200962052378407, "grad_norm": 1.5560775995254517, "learning_rate": 0.00012483971334943916, "loss": 1.5078, "step": 15720 }, { "epoch": 4.203634420096205, "grad_norm": 1.8239293098449707, "learning_rate": 0.0001247583809882365, "loss": 1.4998, "step": 15730 }, { "epoch": 4.206306787814003, "grad_norm": 1.5634268522262573, "learning_rate": 0.00012467703117627184, "loss": 1.4761, "step": 15740 }, { "epoch": 4.208979155531801, "grad_norm": 1.6948027610778809, "learning_rate": 0.000124595663970884, "loss": 1.4968, "step": 15750 }, { "epoch": 4.211651523249599, "grad_norm": 1.6174324750900269, "learning_rate": 0.00012451427942942404, "loss": 1.4994, "step": 15760 }, { "epoch": 4.214323890967397, "grad_norm": 1.7737600803375244, "learning_rate": 0.00012443287760925532, "loss": 1.3916, "step": 15770 }, { "epoch": 4.216996258685195, "grad_norm": 1.6498640775680542, "learning_rate": 0.00012435145856775324, "loss": 1.4857, "step": 15780 }, { "epoch": 4.219668626402993, "grad_norm": 1.6566587686538696, "learning_rate": 0.0001242700223623054, "loss": 1.5357, "step": 15790 }, { "epoch": 4.222340994120791, "grad_norm": 1.753976821899414, "learning_rate": 0.00012418856905031158, "loss": 1.4841, "step": 15800 }, { "epoch": 4.225013361838589, "grad_norm": 1.6846094131469727, "learning_rate": 0.00012410709868918352, "loss": 1.4906, "step": 15810 }, { "epoch": 4.227685729556387, "grad_norm": 1.6260082721710205, "learning_rate": 0.00012402561133634493, "loss": 1.5374, "step": 15820 }, { "epoch": 4.230358097274185, "grad_norm": 1.675095796585083, "learning_rate": 0.00012394410704923167, "loss": 1.5468, "step": 15830 }, { "epoch": 4.233030464991983, "grad_norm": 1.6872878074645996, "learning_rate": 0.00012386258588529137, "loss": 1.528, "step": 15840 }, { "epoch": 4.235702832709781, "grad_norm": 1.683913230895996, "learning_rate": 0.00012378104790198368, "loss": 1.5133, "step": 15850 }, { "epoch": 4.238375200427579, "grad_norm": 1.6218829154968262, "learning_rate": 0.00012369949315678, "loss": 1.4151, "step": 15860 }, { "epoch": 4.241047568145377, "grad_norm": 1.6738678216934204, "learning_rate": 0.00012361792170716355, "loss": 1.4968, "step": 15870 }, { "epoch": 4.243719935863175, "grad_norm": 1.6918460130691528, "learning_rate": 0.00012353633361062943, "loss": 1.4572, "step": 15880 }, { "epoch": 4.246392303580973, "grad_norm": 1.720625638961792, "learning_rate": 0.00012345472892468445, "loss": 1.5016, "step": 15890 }, { "epoch": 4.249064671298771, "grad_norm": 1.7121583223342896, "learning_rate": 0.00012337310770684697, "loss": 1.5141, "step": 15900 }, { "epoch": 4.251737039016569, "grad_norm": 1.6069037914276123, "learning_rate": 0.0001232914700146471, "loss": 1.4759, "step": 15910 }, { "epoch": 4.254409406734367, "grad_norm": 1.7477941513061523, "learning_rate": 0.00012320981590562666, "loss": 1.4975, "step": 15920 }, { "epoch": 4.257081774452165, "grad_norm": 1.6874366998672485, "learning_rate": 0.0001231281454373389, "loss": 1.4427, "step": 15930 }, { "epoch": 4.259754142169963, "grad_norm": 1.6226550340652466, "learning_rate": 0.0001230464586673486, "loss": 1.4729, "step": 15940 }, { "epoch": 4.262426509887761, "grad_norm": 1.574035882949829, "learning_rate": 0.0001229647556532321, "loss": 1.4162, "step": 15950 }, { "epoch": 4.265098877605558, "grad_norm": 1.7540584802627563, "learning_rate": 0.0001228830364525772, "loss": 1.5068, "step": 15960 }, { "epoch": 4.267771245323356, "grad_norm": 1.5365920066833496, "learning_rate": 0.0001228013011229831, "loss": 1.5271, "step": 15970 }, { "epoch": 4.270443613041154, "grad_norm": 1.6674189567565918, "learning_rate": 0.00012271954972206025, "loss": 1.4932, "step": 15980 }, { "epoch": 4.273115980758952, "grad_norm": 1.8557534217834473, "learning_rate": 0.00012263778230743057, "loss": 1.3994, "step": 15990 }, { "epoch": 4.27578834847675, "grad_norm": 1.6800191402435303, "learning_rate": 0.00012255599893672722, "loss": 1.5023, "step": 16000 }, { "epoch": 4.278460716194548, "grad_norm": 1.7056418657302856, "learning_rate": 0.00012247419966759463, "loss": 1.5013, "step": 16010 }, { "epoch": 4.281133083912346, "grad_norm": 1.7103627920150757, "learning_rate": 0.00012239238455768835, "loss": 1.5767, "step": 16020 }, { "epoch": 4.283805451630144, "grad_norm": 1.6329611539840698, "learning_rate": 0.0001223105536646752, "loss": 1.5312, "step": 16030 }, { "epoch": 4.286477819347942, "grad_norm": 1.7396610975265503, "learning_rate": 0.00012222870704623308, "loss": 1.5558, "step": 16040 }, { "epoch": 4.28915018706574, "grad_norm": 1.755860686302185, "learning_rate": 0.00012214684476005098, "loss": 1.4537, "step": 16050 }, { "epoch": 4.291822554783538, "grad_norm": 1.6175237894058228, "learning_rate": 0.00012206496686382891, "loss": 1.4715, "step": 16060 }, { "epoch": 4.294494922501336, "grad_norm": 1.626566767692566, "learning_rate": 0.00012198307341527789, "loss": 1.4918, "step": 16070 }, { "epoch": 4.297167290219134, "grad_norm": 1.6249829530715942, "learning_rate": 0.00012190116447211995, "loss": 1.5604, "step": 16080 }, { "epoch": 4.299839657936932, "grad_norm": 1.971280813217163, "learning_rate": 0.00012181924009208794, "loss": 1.4767, "step": 16090 }, { "epoch": 4.30251202565473, "grad_norm": 1.6820917129516602, "learning_rate": 0.00012173730033292571, "loss": 1.4629, "step": 16100 }, { "epoch": 4.305184393372528, "grad_norm": 1.669192910194397, "learning_rate": 0.00012165534525238789, "loss": 1.4328, "step": 16110 }, { "epoch": 4.307856761090326, "grad_norm": 1.638898253440857, "learning_rate": 0.0001215733749082399, "loss": 1.4909, "step": 16120 }, { "epoch": 4.310529128808124, "grad_norm": 1.646751880645752, "learning_rate": 0.00012149138935825792, "loss": 1.4865, "step": 16130 }, { "epoch": 4.313201496525922, "grad_norm": 1.757340908050537, "learning_rate": 0.00012140938866022888, "loss": 1.4045, "step": 16140 }, { "epoch": 4.31587386424372, "grad_norm": 1.6209243535995483, "learning_rate": 0.00012132737287195036, "loss": 1.4834, "step": 16150 }, { "epoch": 4.318546231961518, "grad_norm": 1.7393606901168823, "learning_rate": 0.00012124534205123057, "loss": 1.5536, "step": 16160 }, { "epoch": 4.321218599679316, "grad_norm": 1.7389954328536987, "learning_rate": 0.00012116329625588836, "loss": 1.5147, "step": 16170 }, { "epoch": 4.323890967397114, "grad_norm": 1.6584761142730713, "learning_rate": 0.00012108123554375306, "loss": 1.5098, "step": 16180 }, { "epoch": 4.326563335114912, "grad_norm": 1.5832411050796509, "learning_rate": 0.0001209991599726646, "loss": 1.4712, "step": 16190 }, { "epoch": 4.32923570283271, "grad_norm": 1.6773817539215088, "learning_rate": 0.00012091706960047329, "loss": 1.5145, "step": 16200 }, { "epoch": 4.331908070550508, "grad_norm": 1.6375465393066406, "learning_rate": 0.00012083496448503999, "loss": 1.4662, "step": 16210 }, { "epoch": 4.334580438268306, "grad_norm": 1.6932520866394043, "learning_rate": 0.00012075284468423582, "loss": 1.5005, "step": 16220 }, { "epoch": 4.337252805986104, "grad_norm": 1.6839436292648315, "learning_rate": 0.00012067071025594234, "loss": 1.5137, "step": 16230 }, { "epoch": 4.339925173703902, "grad_norm": 1.9438105821609497, "learning_rate": 0.00012058856125805142, "loss": 1.5039, "step": 16240 }, { "epoch": 4.3425975414217, "grad_norm": 1.6157655715942383, "learning_rate": 0.00012050639774846515, "loss": 1.4942, "step": 16250 }, { "epoch": 4.345269909139498, "grad_norm": 1.721543788909912, "learning_rate": 0.00012042421978509586, "loss": 1.5226, "step": 16260 }, { "epoch": 4.347942276857296, "grad_norm": 1.6050511598587036, "learning_rate": 0.00012034202742586611, "loss": 1.5069, "step": 16270 }, { "epoch": 4.350614644575094, "grad_norm": 1.7026658058166504, "learning_rate": 0.00012025982072870853, "loss": 1.5875, "step": 16280 }, { "epoch": 4.353287012292892, "grad_norm": 1.7754448652267456, "learning_rate": 0.000120177599751566, "loss": 1.5895, "step": 16290 }, { "epoch": 4.35595938001069, "grad_norm": 1.7176076173782349, "learning_rate": 0.00012009536455239123, "loss": 1.5354, "step": 16300 }, { "epoch": 4.358631747728487, "grad_norm": 1.6290333271026611, "learning_rate": 0.00012001311518914719, "loss": 1.4675, "step": 16310 }, { "epoch": 4.361304115446285, "grad_norm": 1.5428414344787598, "learning_rate": 0.00011993085171980671, "loss": 1.5288, "step": 16320 }, { "epoch": 4.363976483164083, "grad_norm": 1.5654804706573486, "learning_rate": 0.00011984857420235263, "loss": 1.4936, "step": 16330 }, { "epoch": 4.366648850881881, "grad_norm": 1.573370099067688, "learning_rate": 0.00011976628269477759, "loss": 1.4682, "step": 16340 }, { "epoch": 4.369321218599679, "grad_norm": 1.6399669647216797, "learning_rate": 0.00011968397725508416, "loss": 1.5217, "step": 16350 }, { "epoch": 4.371993586317477, "grad_norm": 1.6087729930877686, "learning_rate": 0.00011960165794128473, "loss": 1.462, "step": 16360 }, { "epoch": 4.374665954035275, "grad_norm": 1.5650618076324463, "learning_rate": 0.0001195193248114015, "loss": 1.4895, "step": 16370 }, { "epoch": 4.377338321753073, "grad_norm": 1.6232175827026367, "learning_rate": 0.00011943697792346629, "loss": 1.4741, "step": 16380 }, { "epoch": 4.380010689470871, "grad_norm": 1.6423583030700684, "learning_rate": 0.00011935461733552075, "loss": 1.5686, "step": 16390 }, { "epoch": 4.382683057188669, "grad_norm": 1.905605673789978, "learning_rate": 0.00011927224310561613, "loss": 1.5631, "step": 16400 }, { "epoch": 4.385355424906467, "grad_norm": 1.5372464656829834, "learning_rate": 0.00011918985529181333, "loss": 1.5095, "step": 16410 }, { "epoch": 4.388027792624265, "grad_norm": 1.6715388298034668, "learning_rate": 0.0001191074539521827, "loss": 1.4995, "step": 16420 }, { "epoch": 4.390700160342063, "grad_norm": 1.7411582469940186, "learning_rate": 0.00011902503914480428, "loss": 1.4601, "step": 16430 }, { "epoch": 4.393372528059861, "grad_norm": 1.6783335208892822, "learning_rate": 0.00011894261092776752, "loss": 1.6192, "step": 16440 }, { "epoch": 4.396044895777659, "grad_norm": 1.5689774751663208, "learning_rate": 0.00011886016935917136, "loss": 1.5356, "step": 16450 }, { "epoch": 4.398717263495457, "grad_norm": 1.7067049741744995, "learning_rate": 0.00011877771449712414, "loss": 1.5107, "step": 16460 }, { "epoch": 4.401389631213255, "grad_norm": 1.7084238529205322, "learning_rate": 0.00011869524639974349, "loss": 1.5365, "step": 16470 }, { "epoch": 4.404061998931053, "grad_norm": 1.608877420425415, "learning_rate": 0.0001186127651251565, "loss": 1.5091, "step": 16480 }, { "epoch": 4.406734366648851, "grad_norm": 1.7160688638687134, "learning_rate": 0.0001185302707314995, "loss": 1.5292, "step": 16490 }, { "epoch": 4.409406734366649, "grad_norm": 1.552913784980774, "learning_rate": 0.000118447763276918, "loss": 1.5217, "step": 16500 }, { "epoch": 4.412079102084447, "grad_norm": 1.9324225187301636, "learning_rate": 0.00011836524281956678, "loss": 1.5129, "step": 16510 }, { "epoch": 4.414751469802245, "grad_norm": 1.7439939975738525, "learning_rate": 0.00011828270941760981, "loss": 1.4675, "step": 16520 }, { "epoch": 4.417423837520043, "grad_norm": 1.7479952573776245, "learning_rate": 0.00011820016312922015, "loss": 1.5736, "step": 16530 }, { "epoch": 4.420096205237841, "grad_norm": 1.6934566497802734, "learning_rate": 0.0001181176040125799, "loss": 1.5506, "step": 16540 }, { "epoch": 4.422768572955639, "grad_norm": 1.6633288860321045, "learning_rate": 0.00011803503212588027, "loss": 1.5214, "step": 16550 }, { "epoch": 4.425440940673437, "grad_norm": 1.7800343036651611, "learning_rate": 0.00011795244752732145, "loss": 1.4409, "step": 16560 }, { "epoch": 4.428113308391235, "grad_norm": 1.7088680267333984, "learning_rate": 0.00011786985027511259, "loss": 1.4628, "step": 16570 }, { "epoch": 4.430785676109033, "grad_norm": 1.6736171245574951, "learning_rate": 0.00011778724042747172, "loss": 1.4649, "step": 16580 }, { "epoch": 4.433458043826831, "grad_norm": 1.578446865081787, "learning_rate": 0.00011770461804262578, "loss": 1.4735, "step": 16590 }, { "epoch": 4.436130411544629, "grad_norm": 1.6248250007629395, "learning_rate": 0.00011762198317881059, "loss": 1.5512, "step": 16600 }, { "epoch": 4.438802779262426, "grad_norm": 1.6464059352874756, "learning_rate": 0.00011753933589427073, "loss": 1.5397, "step": 16610 }, { "epoch": 4.441475146980224, "grad_norm": 1.5229616165161133, "learning_rate": 0.00011745667624725945, "loss": 1.5168, "step": 16620 }, { "epoch": 4.444147514698022, "grad_norm": 1.659442663192749, "learning_rate": 0.00011737400429603888, "loss": 1.5667, "step": 16630 }, { "epoch": 4.44681988241582, "grad_norm": 1.4989534616470337, "learning_rate": 0.00011729132009887967, "loss": 1.5182, "step": 16640 }, { "epoch": 4.449492250133618, "grad_norm": 1.6228270530700684, "learning_rate": 0.00011720862371406123, "loss": 1.5465, "step": 16650 }, { "epoch": 4.452164617851416, "grad_norm": 1.6722487211227417, "learning_rate": 0.00011712591519987141, "loss": 1.4773, "step": 16660 }, { "epoch": 4.454836985569214, "grad_norm": 1.5463364124298096, "learning_rate": 0.00011704319461460675, "loss": 1.5465, "step": 16670 }, { "epoch": 4.457509353287012, "grad_norm": 1.6173169612884521, "learning_rate": 0.00011696046201657222, "loss": 1.5153, "step": 16680 }, { "epoch": 4.46018172100481, "grad_norm": 1.6405445337295532, "learning_rate": 0.00011687771746408134, "loss": 1.5327, "step": 16690 }, { "epoch": 4.462854088722608, "grad_norm": 1.771985411643982, "learning_rate": 0.00011679496101545593, "loss": 1.5819, "step": 16700 }, { "epoch": 4.465526456440406, "grad_norm": 1.4997650384902954, "learning_rate": 0.00011671219272902629, "loss": 1.589, "step": 16710 }, { "epoch": 4.468198824158204, "grad_norm": 1.574514627456665, "learning_rate": 0.00011662941266313101, "loss": 1.5826, "step": 16720 }, { "epoch": 4.470871191876002, "grad_norm": 1.5952101945877075, "learning_rate": 0.00011654662087611707, "loss": 1.5426, "step": 16730 }, { "epoch": 4.4735435595938, "grad_norm": 1.6968765258789062, "learning_rate": 0.00011646381742633959, "loss": 1.5159, "step": 16740 }, { "epoch": 4.476215927311598, "grad_norm": 1.6814135313034058, "learning_rate": 0.00011638100237216197, "loss": 1.4628, "step": 16750 }, { "epoch": 4.478888295029396, "grad_norm": 1.6689420938491821, "learning_rate": 0.0001162981757719558, "loss": 1.4763, "step": 16760 }, { "epoch": 4.481560662747194, "grad_norm": 1.742651343345642, "learning_rate": 0.00011621533768410087, "loss": 1.4145, "step": 16770 }, { "epoch": 4.484233030464992, "grad_norm": 1.593358039855957, "learning_rate": 0.00011613248816698488, "loss": 1.5209, "step": 16780 }, { "epoch": 4.48690539818279, "grad_norm": 1.813662052154541, "learning_rate": 0.00011604962727900375, "loss": 1.492, "step": 16790 }, { "epoch": 4.489577765900588, "grad_norm": 1.4908579587936401, "learning_rate": 0.00011596675507856138, "loss": 1.5273, "step": 16800 }, { "epoch": 4.492250133618386, "grad_norm": 1.5751073360443115, "learning_rate": 0.0001158838716240696, "loss": 1.5151, "step": 16810 }, { "epoch": 4.494922501336184, "grad_norm": 1.8383218050003052, "learning_rate": 0.00011580097697394823, "loss": 1.5644, "step": 16820 }, { "epoch": 4.497594869053982, "grad_norm": 1.5725160837173462, "learning_rate": 0.00011571807118662492, "loss": 1.5439, "step": 16830 }, { "epoch": 4.50026723677178, "grad_norm": 1.8009822368621826, "learning_rate": 0.00011563515432053522, "loss": 1.4875, "step": 16840 }, { "epoch": 4.502939604489578, "grad_norm": 1.4893590211868286, "learning_rate": 0.00011555222643412249, "loss": 1.4788, "step": 16850 }, { "epoch": 4.505611972207376, "grad_norm": 1.7862300872802734, "learning_rate": 0.00011546928758583777, "loss": 1.4898, "step": 16860 }, { "epoch": 4.508284339925174, "grad_norm": 1.8359789848327637, "learning_rate": 0.00011538633783413997, "loss": 1.5299, "step": 16870 }, { "epoch": 4.510956707642972, "grad_norm": 1.6124062538146973, "learning_rate": 0.00011530337723749559, "loss": 1.5457, "step": 16880 }, { "epoch": 4.51362907536077, "grad_norm": 1.6466174125671387, "learning_rate": 0.00011522040585437879, "loss": 1.4771, "step": 16890 }, { "epoch": 4.516301443078568, "grad_norm": 1.9064375162124634, "learning_rate": 0.0001151374237432713, "loss": 1.5142, "step": 16900 }, { "epoch": 4.518973810796366, "grad_norm": 1.6905407905578613, "learning_rate": 0.00011505443096266249, "loss": 1.5282, "step": 16910 }, { "epoch": 4.521646178514164, "grad_norm": 1.6533970832824707, "learning_rate": 0.0001149714275710492, "loss": 1.5009, "step": 16920 }, { "epoch": 4.524318546231962, "grad_norm": 1.6736377477645874, "learning_rate": 0.00011488841362693576, "loss": 1.5616, "step": 16930 }, { "epoch": 4.52699091394976, "grad_norm": 1.7624212503433228, "learning_rate": 0.00011480538918883395, "loss": 1.5338, "step": 16940 }, { "epoch": 4.529663281667558, "grad_norm": 1.6372759342193604, "learning_rate": 0.00011472235431526288, "loss": 1.594, "step": 16950 }, { "epoch": 4.532335649385355, "grad_norm": 1.683350920677185, "learning_rate": 0.0001146393090647491, "loss": 1.534, "step": 16960 }, { "epoch": 4.535008017103153, "grad_norm": 1.6282875537872314, "learning_rate": 0.00011455625349582648, "loss": 1.5676, "step": 16970 }, { "epoch": 4.537680384820951, "grad_norm": 1.816609263420105, "learning_rate": 0.00011447318766703606, "loss": 1.4262, "step": 16980 }, { "epoch": 4.540352752538749, "grad_norm": 1.822721242904663, "learning_rate": 0.00011439011163692623, "loss": 1.5414, "step": 16990 }, { "epoch": 4.543025120256547, "grad_norm": 1.5583539009094238, "learning_rate": 0.00011430702546405246, "loss": 1.4702, "step": 17000 }, { "epoch": 4.545697487974345, "grad_norm": 1.673153042793274, "learning_rate": 0.00011422392920697747, "loss": 1.5011, "step": 17010 }, { "epoch": 4.548369855692143, "grad_norm": 1.710513949394226, "learning_rate": 0.00011414082292427102, "loss": 1.5034, "step": 17020 }, { "epoch": 4.551042223409941, "grad_norm": 1.7203640937805176, "learning_rate": 0.00011405770667450996, "loss": 1.5271, "step": 17030 }, { "epoch": 4.553714591127739, "grad_norm": 1.5803667306900024, "learning_rate": 0.00011397458051627816, "loss": 1.5608, "step": 17040 }, { "epoch": 4.556386958845537, "grad_norm": 1.6926594972610474, "learning_rate": 0.00011389144450816653, "loss": 1.5609, "step": 17050 }, { "epoch": 4.559059326563335, "grad_norm": 1.5539196729660034, "learning_rate": 0.00011380829870877281, "loss": 1.5476, "step": 17060 }, { "epoch": 4.561731694281133, "grad_norm": 1.6851810216903687, "learning_rate": 0.00011372514317670171, "loss": 1.4733, "step": 17070 }, { "epoch": 4.564404061998931, "grad_norm": 1.6946110725402832, "learning_rate": 0.00011364197797056482, "loss": 1.5316, "step": 17080 }, { "epoch": 4.567076429716729, "grad_norm": 1.5859239101409912, "learning_rate": 0.00011355880314898053, "loss": 1.5671, "step": 17090 }, { "epoch": 4.569748797434527, "grad_norm": 1.6291956901550293, "learning_rate": 0.00011347561877057396, "loss": 1.5179, "step": 17100 }, { "epoch": 4.572421165152325, "grad_norm": 1.643264651298523, "learning_rate": 0.00011339242489397699, "loss": 1.5202, "step": 17110 }, { "epoch": 4.575093532870123, "grad_norm": 1.6885887384414673, "learning_rate": 0.00011330922157782828, "loss": 1.505, "step": 17120 }, { "epoch": 4.577765900587921, "grad_norm": 1.5524781942367554, "learning_rate": 0.00011322600888077303, "loss": 1.5031, "step": 17130 }, { "epoch": 4.580438268305719, "grad_norm": 1.680329442024231, "learning_rate": 0.0001131427868614631, "loss": 1.4235, "step": 17140 }, { "epoch": 4.583110636023517, "grad_norm": 1.6419285535812378, "learning_rate": 0.00011305955557855689, "loss": 1.5403, "step": 17150 }, { "epoch": 4.585783003741315, "grad_norm": 1.6222217082977295, "learning_rate": 0.00011297631509071942, "loss": 1.5205, "step": 17160 }, { "epoch": 4.588455371459113, "grad_norm": 1.6735419034957886, "learning_rate": 0.00011289306545662206, "loss": 1.6031, "step": 17170 }, { "epoch": 4.591127739176911, "grad_norm": 1.6568113565444946, "learning_rate": 0.00011280980673494278, "loss": 1.5723, "step": 17180 }, { "epoch": 4.593800106894709, "grad_norm": 1.6171001195907593, "learning_rate": 0.00011272653898436578, "loss": 1.4823, "step": 17190 }, { "epoch": 4.596472474612507, "grad_norm": 1.6146831512451172, "learning_rate": 0.00011264326226358177, "loss": 1.5652, "step": 17200 }, { "epoch": 4.599144842330305, "grad_norm": 1.5950102806091309, "learning_rate": 0.00011255997663128774, "loss": 1.465, "step": 17210 }, { "epoch": 4.601817210048103, "grad_norm": 1.6530253887176514, "learning_rate": 0.00011247668214618693, "loss": 1.4929, "step": 17220 }, { "epoch": 4.604489577765901, "grad_norm": 1.644036889076233, "learning_rate": 0.00011239337886698887, "loss": 1.4702, "step": 17230 }, { "epoch": 4.607161945483698, "grad_norm": 1.6774314641952515, "learning_rate": 0.00011231006685240923, "loss": 1.5021, "step": 17240 }, { "epoch": 4.609834313201496, "grad_norm": 1.683488130569458, "learning_rate": 0.0001122267461611699, "loss": 1.5395, "step": 17250 }, { "epoch": 4.612506680919294, "grad_norm": 1.850111722946167, "learning_rate": 0.00011214341685199883, "loss": 1.5249, "step": 17260 }, { "epoch": 4.615179048637092, "grad_norm": 1.778827428817749, "learning_rate": 0.00011206007898363007, "loss": 1.4886, "step": 17270 }, { "epoch": 4.61785141635489, "grad_norm": 1.686880350112915, "learning_rate": 0.0001119767326148037, "loss": 1.4967, "step": 17280 }, { "epoch": 4.620523784072688, "grad_norm": 1.7504363059997559, "learning_rate": 0.00011189337780426583, "loss": 1.5259, "step": 17290 }, { "epoch": 4.623196151790486, "grad_norm": 1.7304437160491943, "learning_rate": 0.00011181001461076843, "loss": 1.6043, "step": 17300 }, { "epoch": 4.625868519508284, "grad_norm": 1.5700219869613647, "learning_rate": 0.00011172664309306943, "loss": 1.4881, "step": 17310 }, { "epoch": 4.628540887226082, "grad_norm": 1.7662694454193115, "learning_rate": 0.00011164326330993268, "loss": 1.5357, "step": 17320 }, { "epoch": 4.63121325494388, "grad_norm": 1.661639928817749, "learning_rate": 0.00011155987532012779, "loss": 1.5531, "step": 17330 }, { "epoch": 4.633885622661678, "grad_norm": 1.7782138586044312, "learning_rate": 0.00011147647918243013, "loss": 1.5453, "step": 17340 }, { "epoch": 4.636557990379476, "grad_norm": 1.8270082473754883, "learning_rate": 0.0001113930749556209, "loss": 1.5348, "step": 17350 }, { "epoch": 4.639230358097274, "grad_norm": 1.7289249897003174, "learning_rate": 0.00011130966269848691, "loss": 1.5346, "step": 17360 }, { "epoch": 4.641902725815072, "grad_norm": 1.5998317003250122, "learning_rate": 0.00011122624246982072, "loss": 1.522, "step": 17370 }, { "epoch": 4.64457509353287, "grad_norm": 1.8454827070236206, "learning_rate": 0.0001111428143284204, "loss": 1.5138, "step": 17380 }, { "epoch": 4.647247461250668, "grad_norm": 1.699512243270874, "learning_rate": 0.00011105937833308968, "loss": 1.5008, "step": 17390 }, { "epoch": 4.649919828968466, "grad_norm": 1.6202223300933838, "learning_rate": 0.00011097593454263784, "loss": 1.4387, "step": 17400 }, { "epoch": 4.652592196686264, "grad_norm": 1.6622673273086548, "learning_rate": 0.00011089248301587962, "loss": 1.4613, "step": 17410 }, { "epoch": 4.655264564404062, "grad_norm": 1.6387486457824707, "learning_rate": 0.00011080902381163519, "loss": 1.5228, "step": 17420 }, { "epoch": 4.65793693212186, "grad_norm": 1.6574199199676514, "learning_rate": 0.00011072555698873013, "loss": 1.5268, "step": 17430 }, { "epoch": 4.660609299839658, "grad_norm": 1.704759955406189, "learning_rate": 0.00011064208260599542, "loss": 1.5739, "step": 17440 }, { "epoch": 4.663281667557456, "grad_norm": 2.1619670391082764, "learning_rate": 0.00011055860072226743, "loss": 1.5523, "step": 17450 }, { "epoch": 4.665954035275254, "grad_norm": 1.734423041343689, "learning_rate": 0.00011047511139638767, "loss": 1.5457, "step": 17460 }, { "epoch": 4.668626402993052, "grad_norm": 1.6960095167160034, "learning_rate": 0.000110391614687203, "loss": 1.5086, "step": 17470 }, { "epoch": 4.67129877071085, "grad_norm": 1.6343101263046265, "learning_rate": 0.00011030811065356548, "loss": 1.5039, "step": 17480 }, { "epoch": 4.673971138428648, "grad_norm": 1.7738159894943237, "learning_rate": 0.00011022459935433231, "loss": 1.5295, "step": 17490 }, { "epoch": 4.676643506146446, "grad_norm": 1.6277084350585938, "learning_rate": 0.00011014108084836576, "loss": 1.5171, "step": 17500 }, { "epoch": 4.679315873864244, "grad_norm": 1.579516887664795, "learning_rate": 0.00011005755519453328, "loss": 1.5303, "step": 17510 }, { "epoch": 4.681988241582042, "grad_norm": 1.6907621622085571, "learning_rate": 0.00010997402245170734, "loss": 1.4913, "step": 17520 }, { "epoch": 4.68466060929984, "grad_norm": 1.5866539478302002, "learning_rate": 0.00010989048267876529, "loss": 1.5551, "step": 17530 }, { "epoch": 4.687332977017638, "grad_norm": 1.681728482246399, "learning_rate": 0.00010980693593458962, "loss": 1.502, "step": 17540 }, { "epoch": 4.690005344735436, "grad_norm": 1.8035271167755127, "learning_rate": 0.00010972338227806754, "loss": 1.4714, "step": 17550 }, { "epoch": 4.692677712453234, "grad_norm": 1.6702238321304321, "learning_rate": 0.00010963982176809126, "loss": 1.5588, "step": 17560 }, { "epoch": 4.695350080171032, "grad_norm": 1.6105695962905884, "learning_rate": 0.00010955625446355784, "loss": 1.6007, "step": 17570 }, { "epoch": 4.69802244788883, "grad_norm": 1.6845314502716064, "learning_rate": 0.000109472680423369, "loss": 1.5524, "step": 17580 }, { "epoch": 4.700694815606628, "grad_norm": 1.7203104496002197, "learning_rate": 0.00010938909970643134, "loss": 1.5257, "step": 17590 }, { "epoch": 4.703367183324426, "grad_norm": 1.7428655624389648, "learning_rate": 0.00010930551237165609, "loss": 1.5282, "step": 17600 }, { "epoch": 4.706039551042223, "grad_norm": 1.62808358669281, "learning_rate": 0.00010922191847795919, "loss": 1.5423, "step": 17610 }, { "epoch": 4.708711918760021, "grad_norm": 1.7725077867507935, "learning_rate": 0.00010913831808426113, "loss": 1.4918, "step": 17620 }, { "epoch": 4.711384286477819, "grad_norm": 1.71577787399292, "learning_rate": 0.00010905471124948707, "loss": 1.4809, "step": 17630 }, { "epoch": 4.714056654195617, "grad_norm": 1.6842659711837769, "learning_rate": 0.00010897109803256667, "loss": 1.5723, "step": 17640 }, { "epoch": 4.716729021913415, "grad_norm": 1.6058441400527954, "learning_rate": 0.00010888747849243406, "loss": 1.4779, "step": 17650 }, { "epoch": 4.719401389631213, "grad_norm": 1.6452794075012207, "learning_rate": 0.00010880385268802789, "loss": 1.5023, "step": 17660 }, { "epoch": 4.722073757349011, "grad_norm": 1.5528175830841064, "learning_rate": 0.00010872022067829115, "loss": 1.5177, "step": 17670 }, { "epoch": 4.724746125066809, "grad_norm": 1.8851854801177979, "learning_rate": 0.00010863658252217125, "loss": 1.5116, "step": 17680 }, { "epoch": 4.727418492784607, "grad_norm": 1.567244052886963, "learning_rate": 0.00010855293827862002, "loss": 1.5115, "step": 17690 }, { "epoch": 4.730090860502405, "grad_norm": 1.7209495306015015, "learning_rate": 0.00010846928800659333, "loss": 1.5098, "step": 17700 }, { "epoch": 4.732763228220203, "grad_norm": 1.6979482173919678, "learning_rate": 0.00010838563176505158, "loss": 1.5321, "step": 17710 }, { "epoch": 4.735435595938001, "grad_norm": 1.5735869407653809, "learning_rate": 0.0001083019696129592, "loss": 1.5287, "step": 17720 }, { "epoch": 4.738107963655799, "grad_norm": 1.691196084022522, "learning_rate": 0.00010821830160928486, "loss": 1.5122, "step": 17730 }, { "epoch": 4.740780331373597, "grad_norm": 1.6603938341140747, "learning_rate": 0.00010813462781300133, "loss": 1.4964, "step": 17740 }, { "epoch": 4.743452699091395, "grad_norm": 1.6050552129745483, "learning_rate": 0.00010805094828308544, "loss": 1.5483, "step": 17750 }, { "epoch": 4.746125066809193, "grad_norm": 1.6820952892303467, "learning_rate": 0.0001079672630785181, "loss": 1.5085, "step": 17760 }, { "epoch": 4.748797434526991, "grad_norm": 1.7059487104415894, "learning_rate": 0.00010788357225828427, "loss": 1.6139, "step": 17770 }, { "epoch": 4.751469802244789, "grad_norm": 1.7276294231414795, "learning_rate": 0.00010779987588137272, "loss": 1.4786, "step": 17780 }, { "epoch": 4.754142169962587, "grad_norm": 1.722737193107605, "learning_rate": 0.00010771617400677624, "loss": 1.5177, "step": 17790 }, { "epoch": 4.756814537680385, "grad_norm": 1.7700971364974976, "learning_rate": 0.00010763246669349154, "loss": 1.4759, "step": 17800 }, { "epoch": 4.759486905398183, "grad_norm": 1.8075792789459229, "learning_rate": 0.00010754875400051907, "loss": 1.4808, "step": 17810 }, { "epoch": 4.762159273115981, "grad_norm": 1.7821334600448608, "learning_rate": 0.00010746503598686305, "loss": 1.5287, "step": 17820 }, { "epoch": 4.764831640833779, "grad_norm": 1.6413185596466064, "learning_rate": 0.00010738131271153156, "loss": 1.4688, "step": 17830 }, { "epoch": 4.767504008551577, "grad_norm": 1.663130760192871, "learning_rate": 0.00010729758423353633, "loss": 1.5225, "step": 17840 }, { "epoch": 4.770176376269375, "grad_norm": 1.6460022926330566, "learning_rate": 0.00010721385061189275, "loss": 1.4966, "step": 17850 }, { "epoch": 4.772848743987173, "grad_norm": 1.723634123802185, "learning_rate": 0.00010713011190561983, "loss": 1.4607, "step": 17860 }, { "epoch": 4.775521111704971, "grad_norm": 1.6349022388458252, "learning_rate": 0.00010704636817374018, "loss": 1.4627, "step": 17870 }, { "epoch": 4.778193479422769, "grad_norm": 1.7217038869857788, "learning_rate": 0.00010696261947527999, "loss": 1.5699, "step": 17880 }, { "epoch": 4.780865847140566, "grad_norm": 1.7527496814727783, "learning_rate": 0.00010687886586926889, "loss": 1.5585, "step": 17890 }, { "epoch": 4.783538214858364, "grad_norm": 1.7060827016830444, "learning_rate": 0.00010679510741473997, "loss": 1.524, "step": 17900 }, { "epoch": 4.786210582576162, "grad_norm": 1.7142846584320068, "learning_rate": 0.00010671134417072979, "loss": 1.5867, "step": 17910 }, { "epoch": 4.78888295029396, "grad_norm": 1.581526517868042, "learning_rate": 0.00010662757619627821, "loss": 1.5185, "step": 17920 }, { "epoch": 4.791555318011758, "grad_norm": 1.7608598470687866, "learning_rate": 0.00010654380355042852, "loss": 1.5467, "step": 17930 }, { "epoch": 4.794227685729556, "grad_norm": 1.634638786315918, "learning_rate": 0.0001064600262922272, "loss": 1.5749, "step": 17940 }, { "epoch": 4.796900053447354, "grad_norm": 1.8929290771484375, "learning_rate": 0.00010637624448072408, "loss": 1.5064, "step": 17950 }, { "epoch": 4.799572421165152, "grad_norm": 1.6078649759292603, "learning_rate": 0.00010629245817497212, "loss": 1.5267, "step": 17960 }, { "epoch": 4.80224478888295, "grad_norm": 1.4439537525177002, "learning_rate": 0.00010620866743402749, "loss": 1.5473, "step": 17970 }, { "epoch": 4.804917156600748, "grad_norm": 1.642608642578125, "learning_rate": 0.00010612487231694946, "loss": 1.5439, "step": 17980 }, { "epoch": 4.807589524318546, "grad_norm": 1.7744070291519165, "learning_rate": 0.0001060410728828004, "loss": 1.4915, "step": 17990 }, { "epoch": 4.810261892036344, "grad_norm": 1.6814850568771362, "learning_rate": 0.00010595726919064574, "loss": 1.4888, "step": 18000 }, { "epoch": 4.812934259754142, "grad_norm": 1.6025012731552124, "learning_rate": 0.00010587346129955393, "loss": 1.5233, "step": 18010 }, { "epoch": 4.81560662747194, "grad_norm": 1.5257656574249268, "learning_rate": 0.00010578964926859627, "loss": 1.5285, "step": 18020 }, { "epoch": 4.818278995189738, "grad_norm": 1.627409815788269, "learning_rate": 0.00010570583315684708, "loss": 1.4537, "step": 18030 }, { "epoch": 4.820951362907536, "grad_norm": 1.6643608808517456, "learning_rate": 0.0001056220130233835, "loss": 1.6133, "step": 18040 }, { "epoch": 4.823623730625334, "grad_norm": 1.6544005870819092, "learning_rate": 0.00010553818892728562, "loss": 1.4923, "step": 18050 }, { "epoch": 4.826296098343132, "grad_norm": 1.677706003189087, "learning_rate": 0.00010545436092763614, "loss": 1.4986, "step": 18060 }, { "epoch": 4.82896846606093, "grad_norm": 1.5974152088165283, "learning_rate": 0.00010537052908352065, "loss": 1.5083, "step": 18070 }, { "epoch": 4.831640833778728, "grad_norm": 1.6557848453521729, "learning_rate": 0.00010528669345402742, "loss": 1.5287, "step": 18080 }, { "epoch": 4.834313201496526, "grad_norm": 1.7581212520599365, "learning_rate": 0.00010520285409824733, "loss": 1.5609, "step": 18090 }, { "epoch": 4.836985569214324, "grad_norm": 1.649861216545105, "learning_rate": 0.00010511901107527398, "loss": 1.5856, "step": 18100 }, { "epoch": 4.839657936932122, "grad_norm": 1.6427505016326904, "learning_rate": 0.00010503516444420345, "loss": 1.5213, "step": 18110 }, { "epoch": 4.84233030464992, "grad_norm": 1.7283744812011719, "learning_rate": 0.00010495131426413444, "loss": 1.5068, "step": 18120 }, { "epoch": 4.845002672367718, "grad_norm": 1.759529948234558, "learning_rate": 0.00010486746059416816, "loss": 1.4968, "step": 18130 }, { "epoch": 4.847675040085516, "grad_norm": 1.6878550052642822, "learning_rate": 0.00010478360349340823, "loss": 1.5685, "step": 18140 }, { "epoch": 4.850347407803314, "grad_norm": 1.6848292350769043, "learning_rate": 0.00010469974302096068, "loss": 1.5831, "step": 18150 }, { "epoch": 4.853019775521112, "grad_norm": 1.613651156425476, "learning_rate": 0.00010461587923593399, "loss": 1.5108, "step": 18160 }, { "epoch": 4.85569214323891, "grad_norm": 1.599210500717163, "learning_rate": 0.00010453201219743894, "loss": 1.4874, "step": 18170 }, { "epoch": 4.858364510956708, "grad_norm": 1.4573466777801514, "learning_rate": 0.0001044481419645885, "loss": 1.4947, "step": 18180 }, { "epoch": 4.861036878674506, "grad_norm": 1.7144567966461182, "learning_rate": 0.00010436426859649808, "loss": 1.5119, "step": 18190 }, { "epoch": 4.863709246392304, "grad_norm": 1.716574788093567, "learning_rate": 0.00010428039215228516, "loss": 1.5, "step": 18200 }, { "epoch": 4.866381614110102, "grad_norm": 1.664913535118103, "learning_rate": 0.00010419651269106947, "loss": 1.4667, "step": 18210 }, { "epoch": 4.8690539818279, "grad_norm": 1.7342499494552612, "learning_rate": 0.0001041126302719728, "loss": 1.5026, "step": 18220 }, { "epoch": 4.871726349545698, "grad_norm": 1.7548737525939941, "learning_rate": 0.00010402874495411902, "loss": 1.5029, "step": 18230 }, { "epoch": 4.874398717263496, "grad_norm": 1.5733733177185059, "learning_rate": 0.00010394485679663412, "loss": 1.5932, "step": 18240 }, { "epoch": 4.877071084981293, "grad_norm": 1.4801501035690308, "learning_rate": 0.00010386096585864609, "loss": 1.5043, "step": 18250 }, { "epoch": 4.879743452699091, "grad_norm": 1.5585309267044067, "learning_rate": 0.00010377707219928474, "loss": 1.5304, "step": 18260 }, { "epoch": 4.882415820416889, "grad_norm": 1.6687904596328735, "learning_rate": 0.00010369317587768199, "loss": 1.5446, "step": 18270 }, { "epoch": 4.885088188134687, "grad_norm": 1.7294179201126099, "learning_rate": 0.00010360927695297146, "loss": 1.5587, "step": 18280 }, { "epoch": 4.887760555852485, "grad_norm": 1.885548710823059, "learning_rate": 0.00010352537548428876, "loss": 1.498, "step": 18290 }, { "epoch": 4.890432923570283, "grad_norm": 1.6851317882537842, "learning_rate": 0.00010344147153077115, "loss": 1.5331, "step": 18300 }, { "epoch": 4.893105291288081, "grad_norm": 1.45623779296875, "learning_rate": 0.00010335756515155775, "loss": 1.499, "step": 18310 }, { "epoch": 4.895777659005879, "grad_norm": 1.7638206481933594, "learning_rate": 0.00010327365640578937, "loss": 1.514, "step": 18320 }, { "epoch": 4.898450026723677, "grad_norm": 1.7740068435668945, "learning_rate": 0.00010318974535260844, "loss": 1.4728, "step": 18330 }, { "epoch": 4.901122394441475, "grad_norm": 1.7067680358886719, "learning_rate": 0.00010310583205115905, "loss": 1.5582, "step": 18340 }, { "epoch": 4.903794762159273, "grad_norm": 1.7819015979766846, "learning_rate": 0.00010302191656058686, "loss": 1.538, "step": 18350 }, { "epoch": 4.906467129877071, "grad_norm": 1.7383277416229248, "learning_rate": 0.00010293799894003912, "loss": 1.559, "step": 18360 }, { "epoch": 4.909139497594869, "grad_norm": 1.5966684818267822, "learning_rate": 0.00010285407924866454, "loss": 1.5746, "step": 18370 }, { "epoch": 4.911811865312667, "grad_norm": 1.6552889347076416, "learning_rate": 0.00010277015754561328, "loss": 1.539, "step": 18380 }, { "epoch": 4.914484233030465, "grad_norm": 1.4528568983078003, "learning_rate": 0.00010268623389003691, "loss": 1.5546, "step": 18390 }, { "epoch": 4.917156600748263, "grad_norm": 1.4890824556350708, "learning_rate": 0.00010260230834108841, "loss": 1.5143, "step": 18400 }, { "epoch": 4.919828968466061, "grad_norm": 1.7056405544281006, "learning_rate": 0.00010251838095792215, "loss": 1.4425, "step": 18410 }, { "epoch": 4.922501336183859, "grad_norm": 1.6442804336547852, "learning_rate": 0.00010243445179969362, "loss": 1.6035, "step": 18420 }, { "epoch": 4.925173703901657, "grad_norm": 1.7363308668136597, "learning_rate": 0.00010235052092555972, "loss": 1.55, "step": 18430 }, { "epoch": 4.927846071619455, "grad_norm": 1.6665538549423218, "learning_rate": 0.00010226658839467855, "loss": 1.5404, "step": 18440 }, { "epoch": 4.930518439337253, "grad_norm": 1.6387377977371216, "learning_rate": 0.00010218265426620925, "loss": 1.497, "step": 18450 }, { "epoch": 4.933190807055051, "grad_norm": 1.5948083400726318, "learning_rate": 0.00010209871859931224, "loss": 1.4706, "step": 18460 }, { "epoch": 4.935863174772849, "grad_norm": 1.6902433633804321, "learning_rate": 0.00010201478145314888, "loss": 1.5247, "step": 18470 }, { "epoch": 4.938535542490647, "grad_norm": 1.7885191440582275, "learning_rate": 0.00010193084288688173, "loss": 1.5034, "step": 18480 }, { "epoch": 4.941207910208445, "grad_norm": 1.6652768850326538, "learning_rate": 0.00010184690295967422, "loss": 1.5281, "step": 18490 }, { "epoch": 4.943880277926243, "grad_norm": 1.7208900451660156, "learning_rate": 0.00010176296173069079, "loss": 1.4823, "step": 18500 }, { "epoch": 4.946552645644041, "grad_norm": 1.4993844032287598, "learning_rate": 0.00010167901925909681, "loss": 1.5828, "step": 18510 }, { "epoch": 4.949225013361839, "grad_norm": 1.6090344190597534, "learning_rate": 0.0001015950756040585, "loss": 1.5345, "step": 18520 }, { "epoch": 4.951897381079636, "grad_norm": 1.9685152769088745, "learning_rate": 0.00010151113082474295, "loss": 1.5182, "step": 18530 }, { "epoch": 4.954569748797434, "grad_norm": 1.5380126237869263, "learning_rate": 0.00010142718498031799, "loss": 1.5448, "step": 18540 }, { "epoch": 4.957242116515232, "grad_norm": 1.6140483617782593, "learning_rate": 0.00010134323812995221, "loss": 1.5863, "step": 18550 }, { "epoch": 4.95991448423303, "grad_norm": 1.865526556968689, "learning_rate": 0.00010125929033281499, "loss": 1.5463, "step": 18560 }, { "epoch": 4.962586851950828, "grad_norm": 1.67505943775177, "learning_rate": 0.00010117534164807628, "loss": 1.4375, "step": 18570 }, { "epoch": 4.965259219668626, "grad_norm": 1.5862981081008911, "learning_rate": 0.00010109139213490666, "loss": 1.5158, "step": 18580 }, { "epoch": 4.967931587386424, "grad_norm": 1.5448644161224365, "learning_rate": 0.00010100744185247737, "loss": 1.5918, "step": 18590 }, { "epoch": 4.970603955104222, "grad_norm": 1.7706352472305298, "learning_rate": 0.00010092349085996011, "loss": 1.4799, "step": 18600 }, { "epoch": 4.97327632282202, "grad_norm": 1.5707390308380127, "learning_rate": 0.00010083953921652718, "loss": 1.5097, "step": 18610 }, { "epoch": 4.975948690539818, "grad_norm": 1.6571247577667236, "learning_rate": 0.00010075558698135121, "loss": 1.5722, "step": 18620 }, { "epoch": 4.978621058257616, "grad_norm": 1.6878763437271118, "learning_rate": 0.00010067163421360535, "loss": 1.4935, "step": 18630 }, { "epoch": 4.981293425975414, "grad_norm": 1.627952218055725, "learning_rate": 0.00010058768097246307, "loss": 1.4999, "step": 18640 }, { "epoch": 4.983965793693212, "grad_norm": 1.6279401779174805, "learning_rate": 0.0001005037273170982, "loss": 1.496, "step": 18650 }, { "epoch": 4.98663816141101, "grad_norm": 1.705061674118042, "learning_rate": 0.00010041977330668484, "loss": 1.438, "step": 18660 }, { "epoch": 4.989310529128808, "grad_norm": 1.5573621988296509, "learning_rate": 0.00010033581900039735, "loss": 1.4353, "step": 18670 }, { "epoch": 4.991982896846606, "grad_norm": 1.6259018182754517, "learning_rate": 0.00010025186445741033, "loss": 1.5293, "step": 18680 }, { "epoch": 4.994655264564404, "grad_norm": 1.663500189781189, "learning_rate": 0.00010016790973689853, "loss": 1.5599, "step": 18690 }, { "epoch": 4.997327632282202, "grad_norm": 1.6640194654464722, "learning_rate": 0.00010008395489803677, "loss": 1.5134, "step": 18700 }, { "epoch": 5.0, "grad_norm": 1.5554745197296143, "learning_rate": 0.0001, "loss": 1.5918, "step": 18710 }, { "epoch": 5.002672367717798, "grad_norm": 1.7414125204086304, "learning_rate": 9.991604510196329e-05, "loss": 1.3742, "step": 18720 }, { "epoch": 5.005344735435596, "grad_norm": 1.72783625125885, "learning_rate": 9.983209026310149e-05, "loss": 1.3763, "step": 18730 }, { "epoch": 5.008017103153394, "grad_norm": 2.173156261444092, "learning_rate": 9.974813554258969e-05, "loss": 1.4874, "step": 18740 }, { "epoch": 5.010689470871192, "grad_norm": 1.8801006078720093, "learning_rate": 9.966418099960263e-05, "loss": 1.3718, "step": 18750 }, { "epoch": 5.01336183858899, "grad_norm": 1.846749186515808, "learning_rate": 9.95802266933152e-05, "loss": 1.4114, "step": 18760 }, { "epoch": 5.016034206306788, "grad_norm": 1.8073856830596924, "learning_rate": 9.949627268290185e-05, "loss": 1.3464, "step": 18770 }, { "epoch": 5.018706574024586, "grad_norm": 1.755150318145752, "learning_rate": 9.941231902753696e-05, "loss": 1.3483, "step": 18780 }, { "epoch": 5.021378941742384, "grad_norm": 1.779171347618103, "learning_rate": 9.932836578639466e-05, "loss": 1.3527, "step": 18790 }, { "epoch": 5.024051309460182, "grad_norm": 1.8565480709075928, "learning_rate": 9.924441301864881e-05, "loss": 1.3768, "step": 18800 }, { "epoch": 5.02672367717798, "grad_norm": 1.936449408531189, "learning_rate": 9.916046078347283e-05, "loss": 1.4007, "step": 18810 }, { "epoch": 5.029396044895778, "grad_norm": 1.9122281074523926, "learning_rate": 9.90765091400399e-05, "loss": 1.3241, "step": 18820 }, { "epoch": 5.032068412613576, "grad_norm": 1.670769214630127, "learning_rate": 9.899255814752263e-05, "loss": 1.3464, "step": 18830 }, { "epoch": 5.034740780331374, "grad_norm": 1.7659333944320679, "learning_rate": 9.890860786509337e-05, "loss": 1.3799, "step": 18840 }, { "epoch": 5.037413148049172, "grad_norm": 1.6679108142852783, "learning_rate": 9.882465835192377e-05, "loss": 1.365, "step": 18850 }, { "epoch": 5.04008551576697, "grad_norm": 1.6594696044921875, "learning_rate": 9.874070966718502e-05, "loss": 1.3707, "step": 18860 }, { "epoch": 5.042757883484768, "grad_norm": 1.8254822492599487, "learning_rate": 9.86567618700478e-05, "loss": 1.3905, "step": 18870 }, { "epoch": 5.045430251202565, "grad_norm": 1.815039038658142, "learning_rate": 9.857281501968206e-05, "loss": 1.4385, "step": 18880 }, { "epoch": 5.048102618920363, "grad_norm": 1.7348051071166992, "learning_rate": 9.848886917525707e-05, "loss": 1.3823, "step": 18890 }, { "epoch": 5.050774986638161, "grad_norm": 1.7569689750671387, "learning_rate": 9.840492439594152e-05, "loss": 1.3298, "step": 18900 }, { "epoch": 5.053447354355959, "grad_norm": 1.709765911102295, "learning_rate": 9.83209807409032e-05, "loss": 1.3331, "step": 18910 }, { "epoch": 5.056119722073757, "grad_norm": 1.85061514377594, "learning_rate": 9.823703826930922e-05, "loss": 1.4231, "step": 18920 }, { "epoch": 5.058792089791555, "grad_norm": 1.7782377004623413, "learning_rate": 9.815309704032583e-05, "loss": 1.3423, "step": 18930 }, { "epoch": 5.061464457509353, "grad_norm": 1.7693212032318115, "learning_rate": 9.806915711311828e-05, "loss": 1.3175, "step": 18940 }, { "epoch": 5.064136825227151, "grad_norm": 1.6982011795043945, "learning_rate": 9.798521854685113e-05, "loss": 1.3129, "step": 18950 }, { "epoch": 5.066809192944949, "grad_norm": 1.6515928506851196, "learning_rate": 9.790128140068782e-05, "loss": 1.3634, "step": 18960 }, { "epoch": 5.069481560662747, "grad_norm": 1.8829503059387207, "learning_rate": 9.781734573379077e-05, "loss": 1.3536, "step": 18970 }, { "epoch": 5.072153928380545, "grad_norm": 1.8078536987304688, "learning_rate": 9.773341160532148e-05, "loss": 1.2813, "step": 18980 }, { "epoch": 5.074826296098343, "grad_norm": 1.822912335395813, "learning_rate": 9.764947907444027e-05, "loss": 1.3712, "step": 18990 }, { "epoch": 5.077498663816141, "grad_norm": 1.8065963983535767, "learning_rate": 9.75655482003064e-05, "loss": 1.3676, "step": 19000 }, { "epoch": 5.080171031533939, "grad_norm": 1.9224086999893188, "learning_rate": 9.74816190420779e-05, "loss": 1.3197, "step": 19010 }, { "epoch": 5.082843399251737, "grad_norm": 1.7556531429290771, "learning_rate": 9.739769165891158e-05, "loss": 1.332, "step": 19020 }, { "epoch": 5.085515766969535, "grad_norm": 2.011889696121216, "learning_rate": 9.731376610996313e-05, "loss": 1.3945, "step": 19030 }, { "epoch": 5.088188134687333, "grad_norm": 1.82138192653656, "learning_rate": 9.722984245438678e-05, "loss": 1.4567, "step": 19040 }, { "epoch": 5.090860502405131, "grad_norm": 1.7994951009750366, "learning_rate": 9.714592075133548e-05, "loss": 1.3876, "step": 19050 }, { "epoch": 5.093532870122929, "grad_norm": 1.8251056671142578, "learning_rate": 9.706200105996089e-05, "loss": 1.3479, "step": 19060 }, { "epoch": 5.096205237840727, "grad_norm": 1.8530081510543823, "learning_rate": 9.697808343941313e-05, "loss": 1.4082, "step": 19070 }, { "epoch": 5.098877605558525, "grad_norm": 1.7855250835418701, "learning_rate": 9.689416794884098e-05, "loss": 1.3922, "step": 19080 }, { "epoch": 5.101549973276323, "grad_norm": 1.8199368715286255, "learning_rate": 9.681025464739161e-05, "loss": 1.4061, "step": 19090 }, { "epoch": 5.104222340994121, "grad_norm": 1.7470518350601196, "learning_rate": 9.672634359421064e-05, "loss": 1.3625, "step": 19100 }, { "epoch": 5.106894708711919, "grad_norm": 1.8709263801574707, "learning_rate": 9.664243484844226e-05, "loss": 1.4037, "step": 19110 }, { "epoch": 5.109567076429717, "grad_norm": 1.6874449253082275, "learning_rate": 9.655852846922889e-05, "loss": 1.3718, "step": 19120 }, { "epoch": 5.112239444147515, "grad_norm": 1.8321990966796875, "learning_rate": 9.647462451571127e-05, "loss": 1.3687, "step": 19130 }, { "epoch": 5.114911811865313, "grad_norm": 1.8551712036132812, "learning_rate": 9.639072304702856e-05, "loss": 1.461, "step": 19140 }, { "epoch": 5.117584179583111, "grad_norm": 1.9754506349563599, "learning_rate": 9.630682412231803e-05, "loss": 1.3965, "step": 19150 }, { "epoch": 5.120256547300909, "grad_norm": 1.7540966272354126, "learning_rate": 9.622292780071527e-05, "loss": 1.38, "step": 19160 }, { "epoch": 5.122928915018707, "grad_norm": 1.7249715328216553, "learning_rate": 9.613903414135394e-05, "loss": 1.3296, "step": 19170 }, { "epoch": 5.125601282736505, "grad_norm": 1.878151774406433, "learning_rate": 9.605514320336586e-05, "loss": 1.353, "step": 19180 }, { "epoch": 5.128273650454302, "grad_norm": 2.0917043685913086, "learning_rate": 9.597125504588099e-05, "loss": 1.4847, "step": 19190 }, { "epoch": 5.1309460181721, "grad_norm": 1.8457001447677612, "learning_rate": 9.588736972802726e-05, "loss": 1.4028, "step": 19200 }, { "epoch": 5.133618385889898, "grad_norm": 1.8538477420806885, "learning_rate": 9.580348730893054e-05, "loss": 1.4034, "step": 19210 }, { "epoch": 5.136290753607696, "grad_norm": 1.855852484703064, "learning_rate": 9.571960784771485e-05, "loss": 1.3972, "step": 19220 }, { "epoch": 5.138963121325494, "grad_norm": 1.7284225225448608, "learning_rate": 9.563573140350192e-05, "loss": 1.4122, "step": 19230 }, { "epoch": 5.141635489043292, "grad_norm": 1.8227636814117432, "learning_rate": 9.555185803541151e-05, "loss": 1.4259, "step": 19240 }, { "epoch": 5.14430785676109, "grad_norm": 1.6609549522399902, "learning_rate": 9.546798780256111e-05, "loss": 1.321, "step": 19250 }, { "epoch": 5.146980224478888, "grad_norm": 1.7828965187072754, "learning_rate": 9.5384120764066e-05, "loss": 1.4045, "step": 19260 }, { "epoch": 5.149652592196686, "grad_norm": 1.7603487968444824, "learning_rate": 9.530025697903934e-05, "loss": 1.3366, "step": 19270 }, { "epoch": 5.152324959914484, "grad_norm": 1.9478005170822144, "learning_rate": 9.521639650659182e-05, "loss": 1.3052, "step": 19280 }, { "epoch": 5.154997327632282, "grad_norm": 1.7383671998977661, "learning_rate": 9.513253940583186e-05, "loss": 1.48, "step": 19290 }, { "epoch": 5.15766969535008, "grad_norm": 1.86366868019104, "learning_rate": 9.504868573586558e-05, "loss": 1.4074, "step": 19300 }, { "epoch": 5.160342063067878, "grad_norm": 1.9816826581954956, "learning_rate": 9.496483555579658e-05, "loss": 1.3471, "step": 19310 }, { "epoch": 5.163014430785676, "grad_norm": 1.709796667098999, "learning_rate": 9.488098892472606e-05, "loss": 1.3413, "step": 19320 }, { "epoch": 5.165686798503474, "grad_norm": 1.8086800575256348, "learning_rate": 9.47971459017527e-05, "loss": 1.3767, "step": 19330 }, { "epoch": 5.168359166221272, "grad_norm": 1.8949642181396484, "learning_rate": 9.47133065459726e-05, "loss": 1.4077, "step": 19340 }, { "epoch": 5.17103153393907, "grad_norm": 1.7108567953109741, "learning_rate": 9.462947091647937e-05, "loss": 1.345, "step": 19350 }, { "epoch": 5.173703901656868, "grad_norm": 1.874477744102478, "learning_rate": 9.454563907236387e-05, "loss": 1.4406, "step": 19360 }, { "epoch": 5.176376269374666, "grad_norm": 2.145576238632202, "learning_rate": 9.44618110727144e-05, "loss": 1.373, "step": 19370 }, { "epoch": 5.179048637092464, "grad_norm": 1.8650093078613281, "learning_rate": 9.43779869766165e-05, "loss": 1.3812, "step": 19380 }, { "epoch": 5.181721004810262, "grad_norm": 1.8273286819458008, "learning_rate": 9.429416684315293e-05, "loss": 1.3712, "step": 19390 }, { "epoch": 5.18439337252806, "grad_norm": 1.8486456871032715, "learning_rate": 9.421035073140377e-05, "loss": 1.3746, "step": 19400 }, { "epoch": 5.187065740245858, "grad_norm": 1.9003373384475708, "learning_rate": 9.412653870044612e-05, "loss": 1.3877, "step": 19410 }, { "epoch": 5.189738107963656, "grad_norm": 2.0229334831237793, "learning_rate": 9.404273080935425e-05, "loss": 1.3323, "step": 19420 }, { "epoch": 5.192410475681454, "grad_norm": 1.6970276832580566, "learning_rate": 9.395892711719962e-05, "loss": 1.4055, "step": 19430 }, { "epoch": 5.195082843399252, "grad_norm": 1.801120400428772, "learning_rate": 9.387512768305059e-05, "loss": 1.376, "step": 19440 }, { "epoch": 5.19775521111705, "grad_norm": 1.8643522262573242, "learning_rate": 9.379133256597255e-05, "loss": 1.4128, "step": 19450 }, { "epoch": 5.200427578834848, "grad_norm": 1.7856494188308716, "learning_rate": 9.370754182502792e-05, "loss": 1.3659, "step": 19460 }, { "epoch": 5.203099946552646, "grad_norm": 1.9588515758514404, "learning_rate": 9.362375551927593e-05, "loss": 1.3671, "step": 19470 }, { "epoch": 5.205772314270444, "grad_norm": 1.8798905611038208, "learning_rate": 9.353997370777282e-05, "loss": 1.3511, "step": 19480 }, { "epoch": 5.208444681988242, "grad_norm": 1.9828431606292725, "learning_rate": 9.345619644957152e-05, "loss": 1.388, "step": 19490 }, { "epoch": 5.21111704970604, "grad_norm": 1.9904956817626953, "learning_rate": 9.337242380372181e-05, "loss": 1.3294, "step": 19500 }, { "epoch": 5.213789417423838, "grad_norm": 1.6589508056640625, "learning_rate": 9.328865582927024e-05, "loss": 1.3557, "step": 19510 }, { "epoch": 5.216461785141636, "grad_norm": 1.7825688123703003, "learning_rate": 9.320489258526007e-05, "loss": 1.3411, "step": 19520 }, { "epoch": 5.219134152859433, "grad_norm": 1.8600260019302368, "learning_rate": 9.312113413073113e-05, "loss": 1.3333, "step": 19530 }, { "epoch": 5.221806520577231, "grad_norm": 1.7367174625396729, "learning_rate": 9.303738052472003e-05, "loss": 1.3826, "step": 19540 }, { "epoch": 5.224478888295029, "grad_norm": 1.841676950454712, "learning_rate": 9.29536318262598e-05, "loss": 1.4269, "step": 19550 }, { "epoch": 5.227151256012827, "grad_norm": 1.6773523092269897, "learning_rate": 9.286988809438019e-05, "loss": 1.3821, "step": 19560 }, { "epoch": 5.229823623730625, "grad_norm": 1.700252890586853, "learning_rate": 9.278614938810729e-05, "loss": 1.37, "step": 19570 }, { "epoch": 5.232495991448423, "grad_norm": 2.0488553047180176, "learning_rate": 9.270241576646368e-05, "loss": 1.3832, "step": 19580 }, { "epoch": 5.235168359166221, "grad_norm": 1.926261305809021, "learning_rate": 9.261868728846845e-05, "loss": 1.3946, "step": 19590 }, { "epoch": 5.237840726884019, "grad_norm": 1.8437085151672363, "learning_rate": 9.2534964013137e-05, "loss": 1.3816, "step": 19600 }, { "epoch": 5.240513094601817, "grad_norm": 1.9436142444610596, "learning_rate": 9.245124599948096e-05, "loss": 1.3795, "step": 19610 }, { "epoch": 5.243185462319615, "grad_norm": 1.788064956665039, "learning_rate": 9.236753330650848e-05, "loss": 1.4214, "step": 19620 }, { "epoch": 5.245857830037413, "grad_norm": 1.8409327268600464, "learning_rate": 9.228382599322374e-05, "loss": 1.4142, "step": 19630 }, { "epoch": 5.248530197755211, "grad_norm": 1.763501763343811, "learning_rate": 9.22001241186273e-05, "loss": 1.3932, "step": 19640 }, { "epoch": 5.251202565473009, "grad_norm": 1.7721229791641235, "learning_rate": 9.211642774171578e-05, "loss": 1.317, "step": 19650 }, { "epoch": 5.253874933190807, "grad_norm": 1.8915736675262451, "learning_rate": 9.203273692148189e-05, "loss": 1.3491, "step": 19660 }, { "epoch": 5.256547300908605, "grad_norm": 1.7938752174377441, "learning_rate": 9.194905171691459e-05, "loss": 1.3978, "step": 19670 }, { "epoch": 5.259219668626403, "grad_norm": 1.881883144378662, "learning_rate": 9.186537218699872e-05, "loss": 1.4447, "step": 19680 }, { "epoch": 5.261892036344201, "grad_norm": 1.88962721824646, "learning_rate": 9.178169839071515e-05, "loss": 1.3895, "step": 19690 }, { "epoch": 5.264564404061999, "grad_norm": 1.8478615283966064, "learning_rate": 9.169803038704083e-05, "loss": 1.4221, "step": 19700 }, { "epoch": 5.267236771779797, "grad_norm": 1.848584771156311, "learning_rate": 9.161436823494843e-05, "loss": 1.4194, "step": 19710 }, { "epoch": 5.269909139497595, "grad_norm": 1.8469613790512085, "learning_rate": 9.15307119934067e-05, "loss": 1.346, "step": 19720 }, { "epoch": 5.272581507215393, "grad_norm": 1.7804778814315796, "learning_rate": 9.144706172138002e-05, "loss": 1.4627, "step": 19730 }, { "epoch": 5.275253874933191, "grad_norm": 1.8468170166015625, "learning_rate": 9.136341747782874e-05, "loss": 1.3987, "step": 19740 }, { "epoch": 5.277926242650989, "grad_norm": 1.8156429529190063, "learning_rate": 9.127977932170888e-05, "loss": 1.4457, "step": 19750 }, { "epoch": 5.280598610368787, "grad_norm": 1.8687725067138672, "learning_rate": 9.119614731197216e-05, "loss": 1.3922, "step": 19760 }, { "epoch": 5.283270978086585, "grad_norm": 1.7553884983062744, "learning_rate": 9.111252150756595e-05, "loss": 1.471, "step": 19770 }, { "epoch": 5.285943345804383, "grad_norm": 1.8026105165481567, "learning_rate": 9.102890196743337e-05, "loss": 1.4102, "step": 19780 }, { "epoch": 5.288615713522181, "grad_norm": 1.931124210357666, "learning_rate": 9.094528875051292e-05, "loss": 1.3855, "step": 19790 }, { "epoch": 5.291288081239979, "grad_norm": 1.6505416631698608, "learning_rate": 9.086168191573888e-05, "loss": 1.4462, "step": 19800 }, { "epoch": 5.293960448957777, "grad_norm": 1.8449018001556396, "learning_rate": 9.077808152204084e-05, "loss": 1.4887, "step": 19810 }, { "epoch": 5.296632816675575, "grad_norm": 1.9283699989318848, "learning_rate": 9.069448762834391e-05, "loss": 1.4667, "step": 19820 }, { "epoch": 5.299305184393372, "grad_norm": 1.7096954584121704, "learning_rate": 9.061090029356867e-05, "loss": 1.37, "step": 19830 }, { "epoch": 5.30197755211117, "grad_norm": 1.9460853338241577, "learning_rate": 9.052731957663104e-05, "loss": 1.4034, "step": 19840 }, { "epoch": 5.304649919828968, "grad_norm": 1.6662943363189697, "learning_rate": 9.044374553644218e-05, "loss": 1.4375, "step": 19850 }, { "epoch": 5.307322287546766, "grad_norm": 1.8387529850006104, "learning_rate": 9.036017823190876e-05, "loss": 1.4105, "step": 19860 }, { "epoch": 5.309994655264564, "grad_norm": 1.7618277072906494, "learning_rate": 9.027661772193249e-05, "loss": 1.4752, "step": 19870 }, { "epoch": 5.312667022982362, "grad_norm": 1.6710059642791748, "learning_rate": 9.019306406541042e-05, "loss": 1.3728, "step": 19880 }, { "epoch": 5.31533939070016, "grad_norm": 1.8021734952926636, "learning_rate": 9.010951732123473e-05, "loss": 1.37, "step": 19890 }, { "epoch": 5.318011758417958, "grad_norm": 1.6048088073730469, "learning_rate": 9.002597754829269e-05, "loss": 1.3938, "step": 19900 }, { "epoch": 5.320684126135756, "grad_norm": 1.799424409866333, "learning_rate": 8.994244480546672e-05, "loss": 1.4425, "step": 19910 }, { "epoch": 5.323356493853554, "grad_norm": 1.723467469215393, "learning_rate": 8.985891915163425e-05, "loss": 1.4524, "step": 19920 }, { "epoch": 5.326028861571352, "grad_norm": 1.785679817199707, "learning_rate": 8.977540064566771e-05, "loss": 1.4493, "step": 19930 }, { "epoch": 5.32870122928915, "grad_norm": 1.9639917612075806, "learning_rate": 8.969188934643454e-05, "loss": 1.3669, "step": 19940 }, { "epoch": 5.331373597006948, "grad_norm": 1.80316960811615, "learning_rate": 8.960838531279698e-05, "loss": 1.4181, "step": 19950 }, { "epoch": 5.334045964724746, "grad_norm": 1.7789998054504395, "learning_rate": 8.952488860361234e-05, "loss": 1.3567, "step": 19960 }, { "epoch": 5.336718332442544, "grad_norm": 1.8293192386627197, "learning_rate": 8.944139927773261e-05, "loss": 1.4086, "step": 19970 }, { "epoch": 5.339390700160342, "grad_norm": 1.614897608757019, "learning_rate": 8.935791739400457e-05, "loss": 1.3897, "step": 19980 }, { "epoch": 5.34206306787814, "grad_norm": 1.9090148210525513, "learning_rate": 8.92744430112699e-05, "loss": 1.4291, "step": 19990 }, { "epoch": 5.344735435595938, "grad_norm": 1.67301607131958, "learning_rate": 8.919097618836488e-05, "loss": 1.3847, "step": 20000 }, { "epoch": 5.347407803313736, "grad_norm": 1.8355300426483154, "learning_rate": 8.91075169841204e-05, "loss": 1.454, "step": 20010 }, { "epoch": 5.350080171031534, "grad_norm": 1.7994592189788818, "learning_rate": 8.902406545736217e-05, "loss": 1.3414, "step": 20020 }, { "epoch": 5.352752538749332, "grad_norm": 1.747182846069336, "learning_rate": 8.89406216669103e-05, "loss": 1.4433, "step": 20030 }, { "epoch": 5.35542490646713, "grad_norm": 1.8212345838546753, "learning_rate": 8.885718567157964e-05, "loss": 1.3703, "step": 20040 }, { "epoch": 5.358097274184928, "grad_norm": 1.795236349105835, "learning_rate": 8.877375753017933e-05, "loss": 1.3588, "step": 20050 }, { "epoch": 5.360769641902726, "grad_norm": 2.3064260482788086, "learning_rate": 8.869033730151312e-05, "loss": 1.4676, "step": 20060 }, { "epoch": 5.363442009620524, "grad_norm": 1.7423838376998901, "learning_rate": 8.860692504437912e-05, "loss": 1.4386, "step": 20070 }, { "epoch": 5.366114377338322, "grad_norm": 1.7394424676895142, "learning_rate": 8.852352081756989e-05, "loss": 1.3764, "step": 20080 }, { "epoch": 5.36878674505612, "grad_norm": 1.929689645767212, "learning_rate": 8.844012467987222e-05, "loss": 1.4234, "step": 20090 }, { "epoch": 5.371459112773918, "grad_norm": 1.8902708292007446, "learning_rate": 8.835673669006733e-05, "loss": 1.4606, "step": 20100 }, { "epoch": 5.374131480491716, "grad_norm": 1.847443699836731, "learning_rate": 8.827335690693055e-05, "loss": 1.4488, "step": 20110 }, { "epoch": 5.376803848209514, "grad_norm": 1.915730357170105, "learning_rate": 8.81899853892316e-05, "loss": 1.4001, "step": 20120 }, { "epoch": 5.379476215927312, "grad_norm": 1.954857587814331, "learning_rate": 8.810662219573421e-05, "loss": 1.3996, "step": 20130 }, { "epoch": 5.38214858364511, "grad_norm": 1.909435749053955, "learning_rate": 8.80232673851963e-05, "loss": 1.4051, "step": 20140 }, { "epoch": 5.384820951362908, "grad_norm": 1.8227602243423462, "learning_rate": 8.793992101636995e-05, "loss": 1.3539, "step": 20150 }, { "epoch": 5.387493319080706, "grad_norm": 1.9692318439483643, "learning_rate": 8.785658314800121e-05, "loss": 1.3321, "step": 20160 }, { "epoch": 5.390165686798504, "grad_norm": 2.007525682449341, "learning_rate": 8.777325383883011e-05, "loss": 1.4246, "step": 20170 }, { "epoch": 5.392838054516301, "grad_norm": 1.845348596572876, "learning_rate": 8.76899331475908e-05, "loss": 1.3951, "step": 20180 }, { "epoch": 5.395510422234099, "grad_norm": 1.9953432083129883, "learning_rate": 8.760662113301113e-05, "loss": 1.4315, "step": 20190 }, { "epoch": 5.398182789951897, "grad_norm": 1.837344765663147, "learning_rate": 8.752331785381308e-05, "loss": 1.4457, "step": 20200 }, { "epoch": 5.400855157669695, "grad_norm": 1.7662973403930664, "learning_rate": 8.74400233687123e-05, "loss": 1.4232, "step": 20210 }, { "epoch": 5.403527525387493, "grad_norm": 1.7298915386199951, "learning_rate": 8.735673773641824e-05, "loss": 1.3671, "step": 20220 }, { "epoch": 5.406199893105291, "grad_norm": 1.8851754665374756, "learning_rate": 8.727346101563425e-05, "loss": 1.4451, "step": 20230 }, { "epoch": 5.408872260823089, "grad_norm": 1.8406022787094116, "learning_rate": 8.719019326505729e-05, "loss": 1.3841, "step": 20240 }, { "epoch": 5.411544628540887, "grad_norm": 1.753535270690918, "learning_rate": 8.710693454337796e-05, "loss": 1.3557, "step": 20250 }, { "epoch": 5.414216996258685, "grad_norm": 1.8125604391098022, "learning_rate": 8.70236849092806e-05, "loss": 1.4351, "step": 20260 }, { "epoch": 5.416889363976483, "grad_norm": 1.7749202251434326, "learning_rate": 8.694044442144312e-05, "loss": 1.4501, "step": 20270 }, { "epoch": 5.419561731694281, "grad_norm": 1.9773153066635132, "learning_rate": 8.685721313853693e-05, "loss": 1.4406, "step": 20280 }, { "epoch": 5.422234099412079, "grad_norm": 1.7915995121002197, "learning_rate": 8.677399111922702e-05, "loss": 1.4535, "step": 20290 }, { "epoch": 5.424906467129877, "grad_norm": 1.9801911115646362, "learning_rate": 8.669077842217173e-05, "loss": 1.3725, "step": 20300 }, { "epoch": 5.427578834847675, "grad_norm": 1.9179128408432007, "learning_rate": 8.660757510602302e-05, "loss": 1.4339, "step": 20310 }, { "epoch": 5.430251202565473, "grad_norm": 1.912291169166565, "learning_rate": 8.652438122942609e-05, "loss": 1.4284, "step": 20320 }, { "epoch": 5.432923570283271, "grad_norm": 1.7995072603225708, "learning_rate": 8.64411968510195e-05, "loss": 1.4228, "step": 20330 }, { "epoch": 5.435595938001069, "grad_norm": 1.821509838104248, "learning_rate": 8.63580220294352e-05, "loss": 1.5033, "step": 20340 }, { "epoch": 5.438268305718867, "grad_norm": 1.7498754262924194, "learning_rate": 8.627485682329827e-05, "loss": 1.4062, "step": 20350 }, { "epoch": 5.440940673436665, "grad_norm": 1.8918187618255615, "learning_rate": 8.619170129122721e-05, "loss": 1.5136, "step": 20360 }, { "epoch": 5.443613041154463, "grad_norm": 1.8670575618743896, "learning_rate": 8.61085554918335e-05, "loss": 1.3779, "step": 20370 }, { "epoch": 5.446285408872261, "grad_norm": 1.6510086059570312, "learning_rate": 8.602541948372182e-05, "loss": 1.4161, "step": 20380 }, { "epoch": 5.448957776590059, "grad_norm": 1.811910629272461, "learning_rate": 8.594229332549006e-05, "loss": 1.443, "step": 20390 }, { "epoch": 5.451630144307857, "grad_norm": 1.8337417840957642, "learning_rate": 8.585917707572903e-05, "loss": 1.4084, "step": 20400 }, { "epoch": 5.454302512025655, "grad_norm": 1.7589662075042725, "learning_rate": 8.577607079302254e-05, "loss": 1.4467, "step": 20410 }, { "epoch": 5.456974879743453, "grad_norm": 1.909666895866394, "learning_rate": 8.569297453594755e-05, "loss": 1.3712, "step": 20420 }, { "epoch": 5.459647247461251, "grad_norm": 1.9083164930343628, "learning_rate": 8.56098883630738e-05, "loss": 1.4333, "step": 20430 }, { "epoch": 5.462319615179049, "grad_norm": 1.8496124744415283, "learning_rate": 8.552681233296395e-05, "loss": 1.4009, "step": 20440 }, { "epoch": 5.464991982896847, "grad_norm": 1.6915220022201538, "learning_rate": 8.544374650417354e-05, "loss": 1.4183, "step": 20450 }, { "epoch": 5.467664350614645, "grad_norm": 1.8739166259765625, "learning_rate": 8.536069093525089e-05, "loss": 1.4258, "step": 20460 }, { "epoch": 5.470336718332443, "grad_norm": 1.8223084211349487, "learning_rate": 8.527764568473714e-05, "loss": 1.4589, "step": 20470 }, { "epoch": 5.47300908605024, "grad_norm": 1.802821159362793, "learning_rate": 8.51946108111661e-05, "loss": 1.4588, "step": 20480 }, { "epoch": 5.475681453768038, "grad_norm": 1.7672052383422852, "learning_rate": 8.511158637306425e-05, "loss": 1.4324, "step": 20490 }, { "epoch": 5.478353821485836, "grad_norm": 2.055225372314453, "learning_rate": 8.502857242895082e-05, "loss": 1.3586, "step": 20500 }, { "epoch": 5.481026189203634, "grad_norm": 1.851271152496338, "learning_rate": 8.494556903733751e-05, "loss": 1.4696, "step": 20510 }, { "epoch": 5.483698556921432, "grad_norm": 1.7848930358886719, "learning_rate": 8.486257625672872e-05, "loss": 1.4534, "step": 20520 }, { "epoch": 5.48637092463923, "grad_norm": 1.7438911199569702, "learning_rate": 8.477959414562126e-05, "loss": 1.4173, "step": 20530 }, { "epoch": 5.489043292357028, "grad_norm": 1.926193118095398, "learning_rate": 8.469662276250442e-05, "loss": 1.4543, "step": 20540 }, { "epoch": 5.491715660074826, "grad_norm": 1.714095115661621, "learning_rate": 8.461366216586005e-05, "loss": 1.4284, "step": 20550 }, { "epoch": 5.494388027792624, "grad_norm": 1.7335968017578125, "learning_rate": 8.453071241416225e-05, "loss": 1.3828, "step": 20560 }, { "epoch": 5.497060395510422, "grad_norm": 1.8304640054702759, "learning_rate": 8.444777356587755e-05, "loss": 1.3795, "step": 20570 }, { "epoch": 5.49973276322822, "grad_norm": 1.7875304222106934, "learning_rate": 8.43648456794648e-05, "loss": 1.4115, "step": 20580 }, { "epoch": 5.502405130946018, "grad_norm": 1.8970807790756226, "learning_rate": 8.42819288133751e-05, "loss": 1.4616, "step": 20590 }, { "epoch": 5.505077498663816, "grad_norm": 1.857749581336975, "learning_rate": 8.41990230260518e-05, "loss": 1.3808, "step": 20600 }, { "epoch": 5.507749866381614, "grad_norm": 1.6583001613616943, "learning_rate": 8.411612837593043e-05, "loss": 1.3328, "step": 20610 }, { "epoch": 5.510422234099412, "grad_norm": 1.8332334756851196, "learning_rate": 8.403324492143865e-05, "loss": 1.4356, "step": 20620 }, { "epoch": 5.51309460181721, "grad_norm": 1.8376007080078125, "learning_rate": 8.395037272099629e-05, "loss": 1.4134, "step": 20630 }, { "epoch": 5.515766969535008, "grad_norm": 1.9075250625610352, "learning_rate": 8.386751183301515e-05, "loss": 1.4126, "step": 20640 }, { "epoch": 5.518439337252806, "grad_norm": 1.7450436353683472, "learning_rate": 8.378466231589917e-05, "loss": 1.3828, "step": 20650 }, { "epoch": 5.521111704970604, "grad_norm": 1.9347198009490967, "learning_rate": 8.37018242280442e-05, "loss": 1.4048, "step": 20660 }, { "epoch": 5.523784072688402, "grad_norm": 1.8395016193389893, "learning_rate": 8.361899762783803e-05, "loss": 1.4849, "step": 20670 }, { "epoch": 5.5264564404062, "grad_norm": 1.7988110780715942, "learning_rate": 8.353618257366044e-05, "loss": 1.3643, "step": 20680 }, { "epoch": 5.529128808123998, "grad_norm": 1.9901680946350098, "learning_rate": 8.345337912388297e-05, "loss": 1.442, "step": 20690 }, { "epoch": 5.531801175841796, "grad_norm": 1.6740586757659912, "learning_rate": 8.337058733686898e-05, "loss": 1.4137, "step": 20700 }, { "epoch": 5.534473543559594, "grad_norm": 1.8861411809921265, "learning_rate": 8.328780727097373e-05, "loss": 1.4486, "step": 20710 }, { "epoch": 5.537145911277392, "grad_norm": 1.8824735879898071, "learning_rate": 8.320503898454411e-05, "loss": 1.4204, "step": 20720 }, { "epoch": 5.53981827899519, "grad_norm": 1.8825196027755737, "learning_rate": 8.312228253591867e-05, "loss": 1.4858, "step": 20730 }, { "epoch": 5.542490646712988, "grad_norm": 1.7106484174728394, "learning_rate": 8.303953798342779e-05, "loss": 1.4151, "step": 20740 }, { "epoch": 5.545163014430786, "grad_norm": 1.8421587944030762, "learning_rate": 8.295680538539326e-05, "loss": 1.4265, "step": 20750 }, { "epoch": 5.547835382148584, "grad_norm": 2.018005609512329, "learning_rate": 8.287408480012861e-05, "loss": 1.4118, "step": 20760 }, { "epoch": 5.550507749866382, "grad_norm": 1.9154627323150635, "learning_rate": 8.279137628593882e-05, "loss": 1.3656, "step": 20770 }, { "epoch": 5.55318011758418, "grad_norm": 1.7975690364837646, "learning_rate": 8.270867990112034e-05, "loss": 1.4457, "step": 20780 }, { "epoch": 5.555852485301978, "grad_norm": 1.9669737815856934, "learning_rate": 8.262599570396115e-05, "loss": 1.4032, "step": 20790 }, { "epoch": 5.558524853019776, "grad_norm": 1.916625738143921, "learning_rate": 8.254332375274056e-05, "loss": 1.4549, "step": 20800 }, { "epoch": 5.561197220737574, "grad_norm": 1.8926728963851929, "learning_rate": 8.24606641057293e-05, "loss": 1.3828, "step": 20810 }, { "epoch": 5.563869588455372, "grad_norm": 1.8983536958694458, "learning_rate": 8.237801682118942e-05, "loss": 1.3588, "step": 20820 }, { "epoch": 5.566541956173169, "grad_norm": 2.161370038986206, "learning_rate": 8.22953819573742e-05, "loss": 1.2994, "step": 20830 }, { "epoch": 5.569214323890967, "grad_norm": 1.795060396194458, "learning_rate": 8.22127595725283e-05, "loss": 1.4147, "step": 20840 }, { "epoch": 5.571886691608765, "grad_norm": 1.8414568901062012, "learning_rate": 8.213014972488745e-05, "loss": 1.4071, "step": 20850 }, { "epoch": 5.574559059326563, "grad_norm": 1.9023696184158325, "learning_rate": 8.204755247267855e-05, "loss": 1.4326, "step": 20860 }, { "epoch": 5.577231427044361, "grad_norm": 1.855142593383789, "learning_rate": 8.196496787411975e-05, "loss": 1.378, "step": 20870 }, { "epoch": 5.579903794762159, "grad_norm": 1.7736339569091797, "learning_rate": 8.188239598742014e-05, "loss": 1.4636, "step": 20880 }, { "epoch": 5.582576162479957, "grad_norm": 1.7798281908035278, "learning_rate": 8.179983687077986e-05, "loss": 1.4085, "step": 20890 }, { "epoch": 5.585248530197755, "grad_norm": 1.8400295972824097, "learning_rate": 8.17172905823902e-05, "loss": 1.4173, "step": 20900 }, { "epoch": 5.587920897915553, "grad_norm": 1.8477122783660889, "learning_rate": 8.16347571804332e-05, "loss": 1.3912, "step": 20910 }, { "epoch": 5.590593265633351, "grad_norm": 1.8249187469482422, "learning_rate": 8.155223672308204e-05, "loss": 1.3408, "step": 20920 }, { "epoch": 5.593265633351149, "grad_norm": 1.8549121618270874, "learning_rate": 8.146972926850054e-05, "loss": 1.4286, "step": 20930 }, { "epoch": 5.595938001068947, "grad_norm": 2.0361337661743164, "learning_rate": 8.13872348748435e-05, "loss": 1.4204, "step": 20940 }, { "epoch": 5.598610368786745, "grad_norm": 1.8047282695770264, "learning_rate": 8.130475360025653e-05, "loss": 1.4394, "step": 20950 }, { "epoch": 5.601282736504543, "grad_norm": 1.8704631328582764, "learning_rate": 8.122228550287591e-05, "loss": 1.4417, "step": 20960 }, { "epoch": 5.603955104222341, "grad_norm": 1.8467167615890503, "learning_rate": 8.113983064082866e-05, "loss": 1.3722, "step": 20970 }, { "epoch": 5.606627471940139, "grad_norm": 1.8335866928100586, "learning_rate": 8.105738907223249e-05, "loss": 1.414, "step": 20980 }, { "epoch": 5.609299839657937, "grad_norm": 1.8786503076553345, "learning_rate": 8.097496085519573e-05, "loss": 1.3668, "step": 20990 }, { "epoch": 5.611972207375735, "grad_norm": 1.8728291988372803, "learning_rate": 8.089254604781732e-05, "loss": 1.3863, "step": 21000 }, { "epoch": 5.614644575093533, "grad_norm": 1.7736185789108276, "learning_rate": 8.081014470818672e-05, "loss": 1.4979, "step": 21010 }, { "epoch": 5.617316942811331, "grad_norm": 1.698287844657898, "learning_rate": 8.072775689438385e-05, "loss": 1.4372, "step": 21020 }, { "epoch": 5.619989310529129, "grad_norm": 1.6462870836257935, "learning_rate": 8.064538266447926e-05, "loss": 1.4553, "step": 21030 }, { "epoch": 5.622661678246927, "grad_norm": 1.702731728553772, "learning_rate": 8.056302207653375e-05, "loss": 1.3848, "step": 21040 }, { "epoch": 5.625334045964725, "grad_norm": 1.9015578031539917, "learning_rate": 8.048067518859853e-05, "loss": 1.4118, "step": 21050 }, { "epoch": 5.628006413682523, "grad_norm": 2.0413286685943604, "learning_rate": 8.03983420587153e-05, "loss": 1.3706, "step": 21060 }, { "epoch": 5.630678781400321, "grad_norm": 2.0313758850097656, "learning_rate": 8.031602274491585e-05, "loss": 1.4304, "step": 21070 }, { "epoch": 5.633351149118119, "grad_norm": 1.720688819885254, "learning_rate": 8.023371730522244e-05, "loss": 1.4069, "step": 21080 }, { "epoch": 5.636023516835917, "grad_norm": 1.7646926641464233, "learning_rate": 8.015142579764741e-05, "loss": 1.4524, "step": 21090 }, { "epoch": 5.638695884553715, "grad_norm": 1.887972354888916, "learning_rate": 8.006914828019327e-05, "loss": 1.3912, "step": 21100 }, { "epoch": 5.641368252271512, "grad_norm": 1.9093656539916992, "learning_rate": 7.998688481085283e-05, "loss": 1.3113, "step": 21110 }, { "epoch": 5.64404061998931, "grad_norm": 1.8109502792358398, "learning_rate": 7.99046354476088e-05, "loss": 1.4354, "step": 21120 }, { "epoch": 5.646712987707108, "grad_norm": 1.6826542615890503, "learning_rate": 7.982240024843404e-05, "loss": 1.3939, "step": 21130 }, { "epoch": 5.649385355424906, "grad_norm": 1.8867690563201904, "learning_rate": 7.974017927129148e-05, "loss": 1.4446, "step": 21140 }, { "epoch": 5.652057723142704, "grad_norm": 1.8560004234313965, "learning_rate": 7.965797257413391e-05, "loss": 1.4355, "step": 21150 }, { "epoch": 5.654730090860502, "grad_norm": 1.6699858903884888, "learning_rate": 7.957578021490415e-05, "loss": 1.4192, "step": 21160 }, { "epoch": 5.6574024585783, "grad_norm": 1.786773920059204, "learning_rate": 7.949360225153488e-05, "loss": 1.3611, "step": 21170 }, { "epoch": 5.660074826296098, "grad_norm": 1.9619256258010864, "learning_rate": 7.941143874194859e-05, "loss": 1.3829, "step": 21180 }, { "epoch": 5.662747194013896, "grad_norm": 1.8596181869506836, "learning_rate": 7.932928974405767e-05, "loss": 1.4219, "step": 21190 }, { "epoch": 5.665419561731694, "grad_norm": 1.9420338869094849, "learning_rate": 7.92471553157642e-05, "loss": 1.4303, "step": 21200 }, { "epoch": 5.668091929449492, "grad_norm": 1.885595440864563, "learning_rate": 7.916503551496002e-05, "loss": 1.4115, "step": 21210 }, { "epoch": 5.67076429716729, "grad_norm": 2.14754319190979, "learning_rate": 7.908293039952672e-05, "loss": 1.4569, "step": 21220 }, { "epoch": 5.673436664885088, "grad_norm": 1.840187907218933, "learning_rate": 7.900084002733541e-05, "loss": 1.3816, "step": 21230 }, { "epoch": 5.676109032602886, "grad_norm": 1.805789828300476, "learning_rate": 7.891876445624695e-05, "loss": 1.4556, "step": 21240 }, { "epoch": 5.678781400320684, "grad_norm": 1.8917306661605835, "learning_rate": 7.883670374411168e-05, "loss": 1.3914, "step": 21250 }, { "epoch": 5.681453768038482, "grad_norm": 1.7766830921173096, "learning_rate": 7.875465794876944e-05, "loss": 1.4106, "step": 21260 }, { "epoch": 5.68412613575628, "grad_norm": 1.8623842000961304, "learning_rate": 7.867262712804967e-05, "loss": 1.4727, "step": 21270 }, { "epoch": 5.686798503474078, "grad_norm": 1.8976304531097412, "learning_rate": 7.859061133977116e-05, "loss": 1.3634, "step": 21280 }, { "epoch": 5.689470871191876, "grad_norm": 1.7975999116897583, "learning_rate": 7.850861064174209e-05, "loss": 1.4084, "step": 21290 }, { "epoch": 5.692143238909674, "grad_norm": 1.9109054803848267, "learning_rate": 7.842662509176013e-05, "loss": 1.3689, "step": 21300 }, { "epoch": 5.694815606627472, "grad_norm": 2.0148255825042725, "learning_rate": 7.834465474761212e-05, "loss": 1.3981, "step": 21310 }, { "epoch": 5.69748797434527, "grad_norm": 1.7822149991989136, "learning_rate": 7.82626996670743e-05, "loss": 1.4759, "step": 21320 }, { "epoch": 5.700160342063068, "grad_norm": 1.698251485824585, "learning_rate": 7.818075990791208e-05, "loss": 1.4854, "step": 21330 }, { "epoch": 5.702832709780866, "grad_norm": 1.8046317100524902, "learning_rate": 7.809883552788009e-05, "loss": 1.4243, "step": 21340 }, { "epoch": 5.705505077498664, "grad_norm": 1.8524879217147827, "learning_rate": 7.801692658472214e-05, "loss": 1.3466, "step": 21350 }, { "epoch": 5.708177445216462, "grad_norm": 1.8356361389160156, "learning_rate": 7.793503313617113e-05, "loss": 1.4532, "step": 21360 }, { "epoch": 5.71084981293426, "grad_norm": 1.8731732368469238, "learning_rate": 7.785315523994903e-05, "loss": 1.4338, "step": 21370 }, { "epoch": 5.713522180652058, "grad_norm": 1.968556523323059, "learning_rate": 7.777129295376693e-05, "loss": 1.4749, "step": 21380 }, { "epoch": 5.716194548369856, "grad_norm": 1.7161849737167358, "learning_rate": 7.768944633532477e-05, "loss": 1.4179, "step": 21390 }, { "epoch": 5.718866916087654, "grad_norm": 1.7846759557724, "learning_rate": 7.760761544231166e-05, "loss": 1.4557, "step": 21400 }, { "epoch": 5.721539283805452, "grad_norm": 1.7804423570632935, "learning_rate": 7.752580033240542e-05, "loss": 1.4131, "step": 21410 }, { "epoch": 5.72421165152325, "grad_norm": 1.953589677810669, "learning_rate": 7.744400106327279e-05, "loss": 1.3795, "step": 21420 }, { "epoch": 5.726884019241048, "grad_norm": 1.726546049118042, "learning_rate": 7.736221769256946e-05, "loss": 1.4868, "step": 21430 }, { "epoch": 5.729556386958846, "grad_norm": 1.8707987070083618, "learning_rate": 7.72804502779398e-05, "loss": 1.4808, "step": 21440 }, { "epoch": 5.732228754676644, "grad_norm": 1.7484774589538574, "learning_rate": 7.719869887701694e-05, "loss": 1.4293, "step": 21450 }, { "epoch": 5.734901122394442, "grad_norm": 1.8914085626602173, "learning_rate": 7.711696354742281e-05, "loss": 1.4495, "step": 21460 }, { "epoch": 5.73757349011224, "grad_norm": 1.8179551362991333, "learning_rate": 7.70352443467679e-05, "loss": 1.4383, "step": 21470 }, { "epoch": 5.740245857830037, "grad_norm": 1.9113681316375732, "learning_rate": 7.695354133265144e-05, "loss": 1.4195, "step": 21480 }, { "epoch": 5.742918225547835, "grad_norm": 1.7981265783309937, "learning_rate": 7.687185456266116e-05, "loss": 1.4797, "step": 21490 }, { "epoch": 5.745590593265633, "grad_norm": 1.8957091569900513, "learning_rate": 7.679018409437335e-05, "loss": 1.4402, "step": 21500 }, { "epoch": 5.748262960983431, "grad_norm": 1.728651762008667, "learning_rate": 7.67085299853529e-05, "loss": 1.4046, "step": 21510 }, { "epoch": 5.750935328701229, "grad_norm": 1.767903447151184, "learning_rate": 7.662689229315307e-05, "loss": 1.3694, "step": 21520 }, { "epoch": 5.753607696419027, "grad_norm": 1.8116743564605713, "learning_rate": 7.654527107531559e-05, "loss": 1.3906, "step": 21530 }, { "epoch": 5.756280064136825, "grad_norm": 1.7834914922714233, "learning_rate": 7.646366638937057e-05, "loss": 1.463, "step": 21540 }, { "epoch": 5.758952431854623, "grad_norm": 1.8045434951782227, "learning_rate": 7.638207829283645e-05, "loss": 1.4808, "step": 21550 }, { "epoch": 5.761624799572421, "grad_norm": 1.7416683435440063, "learning_rate": 7.630050684322005e-05, "loss": 1.3901, "step": 21560 }, { "epoch": 5.764297167290219, "grad_norm": 1.8306225538253784, "learning_rate": 7.621895209801636e-05, "loss": 1.4188, "step": 21570 }, { "epoch": 5.766969535008017, "grad_norm": 1.7175840139389038, "learning_rate": 7.613741411470862e-05, "loss": 1.3847, "step": 21580 }, { "epoch": 5.769641902725815, "grad_norm": 1.8680164813995361, "learning_rate": 7.605589295076834e-05, "loss": 1.4709, "step": 21590 }, { "epoch": 5.772314270443613, "grad_norm": 1.7645819187164307, "learning_rate": 7.59743886636551e-05, "loss": 1.4534, "step": 21600 }, { "epoch": 5.774986638161411, "grad_norm": 1.9910109043121338, "learning_rate": 7.58929013108165e-05, "loss": 1.4604, "step": 21610 }, { "epoch": 5.777659005879209, "grad_norm": 1.7553080320358276, "learning_rate": 7.581143094968845e-05, "loss": 1.4155, "step": 21620 }, { "epoch": 5.780331373597007, "grad_norm": 1.9407318830490112, "learning_rate": 7.572997763769459e-05, "loss": 1.3913, "step": 21630 }, { "epoch": 5.783003741314805, "grad_norm": 2.0086443424224854, "learning_rate": 7.564854143224679e-05, "loss": 1.3741, "step": 21640 }, { "epoch": 5.785676109032603, "grad_norm": 1.8153927326202393, "learning_rate": 7.556712239074473e-05, "loss": 1.4104, "step": 21650 }, { "epoch": 5.788348476750401, "grad_norm": 1.8651721477508545, "learning_rate": 7.548572057057594e-05, "loss": 1.4595, "step": 21660 }, { "epoch": 5.791020844468199, "grad_norm": 1.723351240158081, "learning_rate": 7.540433602911603e-05, "loss": 1.4139, "step": 21670 }, { "epoch": 5.793693212185997, "grad_norm": 1.7649847269058228, "learning_rate": 7.53229688237282e-05, "loss": 1.4655, "step": 21680 }, { "epoch": 5.796365579903795, "grad_norm": 1.9785157442092896, "learning_rate": 7.524161901176353e-05, "loss": 1.4393, "step": 21690 }, { "epoch": 5.799037947621593, "grad_norm": 1.8101013898849487, "learning_rate": 7.516028665056086e-05, "loss": 1.3584, "step": 21700 }, { "epoch": 5.801710315339391, "grad_norm": 1.8289343118667603, "learning_rate": 7.50789717974467e-05, "loss": 1.4208, "step": 21710 }, { "epoch": 5.804382683057189, "grad_norm": 1.940071702003479, "learning_rate": 7.499767450973523e-05, "loss": 1.4662, "step": 21720 }, { "epoch": 5.807055050774987, "grad_norm": 1.7949780225753784, "learning_rate": 7.491639484472819e-05, "loss": 1.4615, "step": 21730 }, { "epoch": 5.809727418492785, "grad_norm": 1.6472865343093872, "learning_rate": 7.483513285971502e-05, "loss": 1.4643, "step": 21740 }, { "epoch": 5.812399786210583, "grad_norm": 1.8350540399551392, "learning_rate": 7.475388861197261e-05, "loss": 1.4522, "step": 21750 }, { "epoch": 5.81507215392838, "grad_norm": 1.730674386024475, "learning_rate": 7.467266215876537e-05, "loss": 1.4097, "step": 21760 }, { "epoch": 5.817744521646178, "grad_norm": 1.818445086479187, "learning_rate": 7.459145355734509e-05, "loss": 1.4022, "step": 21770 }, { "epoch": 5.820416889363976, "grad_norm": 1.796509027481079, "learning_rate": 7.451026286495118e-05, "loss": 1.4705, "step": 21780 }, { "epoch": 5.823089257081774, "grad_norm": 1.96515691280365, "learning_rate": 7.442909013881018e-05, "loss": 1.3849, "step": 21790 }, { "epoch": 5.825761624799572, "grad_norm": 1.7800146341323853, "learning_rate": 7.434793543613621e-05, "loss": 1.4661, "step": 21800 }, { "epoch": 5.82843399251737, "grad_norm": 1.7168282270431519, "learning_rate": 7.42667988141305e-05, "loss": 1.3941, "step": 21810 }, { "epoch": 5.831106360235168, "grad_norm": 1.8376736640930176, "learning_rate": 7.418568032998153e-05, "loss": 1.4191, "step": 21820 }, { "epoch": 5.833778727952966, "grad_norm": 1.7231342792510986, "learning_rate": 7.410458004086519e-05, "loss": 1.4534, "step": 21830 }, { "epoch": 5.836451095670764, "grad_norm": 1.7365481853485107, "learning_rate": 7.402349800394436e-05, "loss": 1.4341, "step": 21840 }, { "epoch": 5.839123463388562, "grad_norm": 1.8495562076568604, "learning_rate": 7.394243427636906e-05, "loss": 1.444, "step": 21850 }, { "epoch": 5.84179583110636, "grad_norm": 1.8117125034332275, "learning_rate": 7.386138891527659e-05, "loss": 1.4369, "step": 21860 }, { "epoch": 5.844468198824158, "grad_norm": 2.011932611465454, "learning_rate": 7.378036197779105e-05, "loss": 1.4503, "step": 21870 }, { "epoch": 5.847140566541956, "grad_norm": 1.7977993488311768, "learning_rate": 7.369935352102373e-05, "loss": 1.4153, "step": 21880 }, { "epoch": 5.849812934259754, "grad_norm": 1.7696391344070435, "learning_rate": 7.361836360207287e-05, "loss": 1.3521, "step": 21890 }, { "epoch": 5.852485301977552, "grad_norm": 1.8803189992904663, "learning_rate": 7.353739227802357e-05, "loss": 1.3855, "step": 21900 }, { "epoch": 5.85515766969535, "grad_norm": 1.787385106086731, "learning_rate": 7.34564396059479e-05, "loss": 1.3908, "step": 21910 }, { "epoch": 5.857830037413148, "grad_norm": 1.916798710823059, "learning_rate": 7.337550564290473e-05, "loss": 1.4026, "step": 21920 }, { "epoch": 5.860502405130946, "grad_norm": 1.7575953006744385, "learning_rate": 7.329459044593976e-05, "loss": 1.4585, "step": 21930 }, { "epoch": 5.863174772848744, "grad_norm": 1.8872182369232178, "learning_rate": 7.321369407208551e-05, "loss": 1.3945, "step": 21940 }, { "epoch": 5.865847140566542, "grad_norm": 1.7646726369857788, "learning_rate": 7.313281657836113e-05, "loss": 1.3848, "step": 21950 }, { "epoch": 5.86851950828434, "grad_norm": 1.8913259506225586, "learning_rate": 7.30519580217726e-05, "loss": 1.4149, "step": 21960 }, { "epoch": 5.871191876002138, "grad_norm": 1.8760039806365967, "learning_rate": 7.297111845931243e-05, "loss": 1.4296, "step": 21970 }, { "epoch": 5.873864243719936, "grad_norm": 1.732088327407837, "learning_rate": 7.289029794795976e-05, "loss": 1.4447, "step": 21980 }, { "epoch": 5.876536611437734, "grad_norm": 1.9041616916656494, "learning_rate": 7.280949654468043e-05, "loss": 1.4114, "step": 21990 }, { "epoch": 5.879208979155532, "grad_norm": 1.7664214372634888, "learning_rate": 7.272871430642663e-05, "loss": 1.3673, "step": 22000 }, { "epoch": 5.88188134687333, "grad_norm": 1.9334638118743896, "learning_rate": 7.264795129013709e-05, "loss": 1.5063, "step": 22010 }, { "epoch": 5.884553714591128, "grad_norm": 1.80722177028656, "learning_rate": 7.256720755273713e-05, "loss": 1.4412, "step": 22020 }, { "epoch": 5.887226082308926, "grad_norm": 1.869354248046875, "learning_rate": 7.248648315113829e-05, "loss": 1.4711, "step": 22030 }, { "epoch": 5.889898450026724, "grad_norm": 1.9294512271881104, "learning_rate": 7.240577814223863e-05, "loss": 1.4012, "step": 22040 }, { "epoch": 5.892570817744522, "grad_norm": 1.8781780004501343, "learning_rate": 7.232509258292246e-05, "loss": 1.438, "step": 22050 }, { "epoch": 5.89524318546232, "grad_norm": 1.8945399522781372, "learning_rate": 7.224442653006038e-05, "loss": 1.3915, "step": 22060 }, { "epoch": 5.897915553180118, "grad_norm": 2.3173465728759766, "learning_rate": 7.216378004050928e-05, "loss": 1.4527, "step": 22070 }, { "epoch": 5.900587920897916, "grad_norm": 1.8504656553268433, "learning_rate": 7.208315317111224e-05, "loss": 1.4334, "step": 22080 }, { "epoch": 5.903260288615714, "grad_norm": 1.7004691362380981, "learning_rate": 7.20025459786985e-05, "loss": 1.3564, "step": 22090 }, { "epoch": 5.905932656333512, "grad_norm": 1.9632539749145508, "learning_rate": 7.192195852008347e-05, "loss": 1.4859, "step": 22100 }, { "epoch": 5.90860502405131, "grad_norm": 1.7879068851470947, "learning_rate": 7.184139085206858e-05, "loss": 1.4729, "step": 22110 }, { "epoch": 5.911277391769107, "grad_norm": 1.8004533052444458, "learning_rate": 7.176084303144141e-05, "loss": 1.4285, "step": 22120 }, { "epoch": 5.913949759486905, "grad_norm": 1.7685760259628296, "learning_rate": 7.16803151149755e-05, "loss": 1.3682, "step": 22130 }, { "epoch": 5.916622127204703, "grad_norm": 1.9934276342391968, "learning_rate": 7.159980715943027e-05, "loss": 1.411, "step": 22140 }, { "epoch": 5.919294494922501, "grad_norm": 1.8428826332092285, "learning_rate": 7.151931922155126e-05, "loss": 1.4323, "step": 22150 }, { "epoch": 5.921966862640299, "grad_norm": 1.8446909189224243, "learning_rate": 7.143885135806975e-05, "loss": 1.3668, "step": 22160 }, { "epoch": 5.924639230358097, "grad_norm": 1.707067847251892, "learning_rate": 7.13584036257029e-05, "loss": 1.4099, "step": 22170 }, { "epoch": 5.927311598075895, "grad_norm": 1.795223355293274, "learning_rate": 7.127797608115376e-05, "loss": 1.414, "step": 22180 }, { "epoch": 5.929983965793693, "grad_norm": 1.6973556280136108, "learning_rate": 7.119756878111104e-05, "loss": 1.3696, "step": 22190 }, { "epoch": 5.932656333511491, "grad_norm": 1.8875623941421509, "learning_rate": 7.111718178224925e-05, "loss": 1.4816, "step": 22200 }, { "epoch": 5.935328701229289, "grad_norm": 2.0357954502105713, "learning_rate": 7.10368151412286e-05, "loss": 1.4494, "step": 22210 }, { "epoch": 5.938001068947087, "grad_norm": 1.8732619285583496, "learning_rate": 7.095646891469482e-05, "loss": 1.4331, "step": 22220 }, { "epoch": 5.940673436664885, "grad_norm": 1.720133662223816, "learning_rate": 7.08761431592795e-05, "loss": 1.4076, "step": 22230 }, { "epoch": 5.943345804382683, "grad_norm": 1.7815426588058472, "learning_rate": 7.079583793159957e-05, "loss": 1.455, "step": 22240 }, { "epoch": 5.946018172100481, "grad_norm": 1.8538612127304077, "learning_rate": 7.071555328825758e-05, "loss": 1.406, "step": 22250 }, { "epoch": 5.948690539818279, "grad_norm": 1.8338390588760376, "learning_rate": 7.063528928584158e-05, "loss": 1.3684, "step": 22260 }, { "epoch": 5.951362907536077, "grad_norm": 1.7937397956848145, "learning_rate": 7.055504598092503e-05, "loss": 1.4068, "step": 22270 }, { "epoch": 5.954035275253875, "grad_norm": 1.9398363828659058, "learning_rate": 7.04748234300669e-05, "loss": 1.4004, "step": 22280 }, { "epoch": 5.956707642971673, "grad_norm": 1.991209626197815, "learning_rate": 7.039462168981144e-05, "loss": 1.3988, "step": 22290 }, { "epoch": 5.959380010689471, "grad_norm": 1.8283241987228394, "learning_rate": 7.031444081668817e-05, "loss": 1.4362, "step": 22300 }, { "epoch": 5.962052378407269, "grad_norm": 1.9161646366119385, "learning_rate": 7.023428086721209e-05, "loss": 1.437, "step": 22310 }, { "epoch": 5.964724746125067, "grad_norm": 1.9440922737121582, "learning_rate": 7.015414189788332e-05, "loss": 1.4421, "step": 22320 }, { "epoch": 5.967397113842865, "grad_norm": 1.8406633138656616, "learning_rate": 7.007402396518717e-05, "loss": 1.386, "step": 22330 }, { "epoch": 5.970069481560663, "grad_norm": 1.7498799562454224, "learning_rate": 6.999392712559425e-05, "loss": 1.5282, "step": 22340 }, { "epoch": 5.972741849278461, "grad_norm": 1.7741248607635498, "learning_rate": 6.991385143556015e-05, "loss": 1.4413, "step": 22350 }, { "epoch": 5.975414216996259, "grad_norm": 1.7782055139541626, "learning_rate": 6.983379695152571e-05, "loss": 1.4095, "step": 22360 }, { "epoch": 5.978086584714057, "grad_norm": 1.7061249017715454, "learning_rate": 6.97537637299167e-05, "loss": 1.4772, "step": 22370 }, { "epoch": 5.980758952431855, "grad_norm": 1.7291443347930908, "learning_rate": 6.967375182714391e-05, "loss": 1.4935, "step": 22380 }, { "epoch": 5.983431320149653, "grad_norm": 2.1783764362335205, "learning_rate": 6.95937612996032e-05, "loss": 1.4364, "step": 22390 }, { "epoch": 5.98610368786745, "grad_norm": 1.6925691366195679, "learning_rate": 6.951379220367528e-05, "loss": 1.4147, "step": 22400 }, { "epoch": 5.988776055585248, "grad_norm": 1.7771650552749634, "learning_rate": 6.943384459572575e-05, "loss": 1.4636, "step": 22410 }, { "epoch": 5.991448423303046, "grad_norm": 1.9734643697738647, "learning_rate": 6.935391853210515e-05, "loss": 1.4018, "step": 22420 }, { "epoch": 5.994120791020844, "grad_norm": 1.6722396612167358, "learning_rate": 6.927401406914872e-05, "loss": 1.407, "step": 22430 }, { "epoch": 5.996793158738642, "grad_norm": 1.7412807941436768, "learning_rate": 6.919413126317656e-05, "loss": 1.4692, "step": 22440 }, { "epoch": 5.99946552645644, "grad_norm": 1.8932757377624512, "learning_rate": 6.911427017049346e-05, "loss": 1.3839, "step": 22450 }, { "epoch": 6.002137894174238, "grad_norm": 1.9046579599380493, "learning_rate": 6.903443084738892e-05, "loss": 1.3296, "step": 22460 }, { "epoch": 6.004810261892036, "grad_norm": 1.9861325025558472, "learning_rate": 6.895461335013712e-05, "loss": 1.3221, "step": 22470 }, { "epoch": 6.007482629609834, "grad_norm": 1.8595702648162842, "learning_rate": 6.887481773499683e-05, "loss": 1.2499, "step": 22480 }, { "epoch": 6.010154997327632, "grad_norm": 2.0673961639404297, "learning_rate": 6.879504405821134e-05, "loss": 1.3126, "step": 22490 }, { "epoch": 6.01282736504543, "grad_norm": 1.9374237060546875, "learning_rate": 6.871529237600864e-05, "loss": 1.2919, "step": 22500 }, { "epoch": 6.015499732763228, "grad_norm": 1.9081655740737915, "learning_rate": 6.8635562744601e-05, "loss": 1.2276, "step": 22510 }, { "epoch": 6.018172100481026, "grad_norm": 2.1691958904266357, "learning_rate": 6.855585522018537e-05, "loss": 1.3396, "step": 22520 }, { "epoch": 6.020844468198824, "grad_norm": 2.029203414916992, "learning_rate": 6.847616985894296e-05, "loss": 1.3081, "step": 22530 }, { "epoch": 6.023516835916622, "grad_norm": 1.9637291431427002, "learning_rate": 6.839650671703937e-05, "loss": 1.2681, "step": 22540 }, { "epoch": 6.02618920363442, "grad_norm": 2.052896499633789, "learning_rate": 6.831686585062467e-05, "loss": 1.2409, "step": 22550 }, { "epoch": 6.028861571352218, "grad_norm": 1.8653088808059692, "learning_rate": 6.823724731583307e-05, "loss": 1.2823, "step": 22560 }, { "epoch": 6.031533939070016, "grad_norm": 1.7768802642822266, "learning_rate": 6.815765116878307e-05, "loss": 1.322, "step": 22570 }, { "epoch": 6.034206306787814, "grad_norm": 2.0534133911132812, "learning_rate": 6.807807746557754e-05, "loss": 1.2878, "step": 22580 }, { "epoch": 6.036878674505612, "grad_norm": 1.9299341440200806, "learning_rate": 6.799852626230332e-05, "loss": 1.2697, "step": 22590 }, { "epoch": 6.03955104222341, "grad_norm": 1.8582969903945923, "learning_rate": 6.791899761503153e-05, "loss": 1.3149, "step": 22600 }, { "epoch": 6.042223409941208, "grad_norm": 1.945414423942566, "learning_rate": 6.783949157981735e-05, "loss": 1.3075, "step": 22610 }, { "epoch": 6.044895777659006, "grad_norm": 1.971627950668335, "learning_rate": 6.776000821270001e-05, "loss": 1.2919, "step": 22620 }, { "epoch": 6.047568145376804, "grad_norm": 1.818709135055542, "learning_rate": 6.768054756970279e-05, "loss": 1.2471, "step": 22630 }, { "epoch": 6.050240513094602, "grad_norm": 1.9002695083618164, "learning_rate": 6.760110970683288e-05, "loss": 1.202, "step": 22640 }, { "epoch": 6.0529128808124, "grad_norm": 1.9322364330291748, "learning_rate": 6.752169468008153e-05, "loss": 1.2837, "step": 22650 }, { "epoch": 6.055585248530198, "grad_norm": 1.9187424182891846, "learning_rate": 6.744230254542386e-05, "loss": 1.2427, "step": 22660 }, { "epoch": 6.058257616247996, "grad_norm": 1.9976487159729004, "learning_rate": 6.736293335881874e-05, "loss": 1.2721, "step": 22670 }, { "epoch": 6.060929983965794, "grad_norm": 1.9472683668136597, "learning_rate": 6.728358717620903e-05, "loss": 1.3635, "step": 22680 }, { "epoch": 6.063602351683592, "grad_norm": 1.8906091451644897, "learning_rate": 6.720426405352129e-05, "loss": 1.2698, "step": 22690 }, { "epoch": 6.06627471940139, "grad_norm": 2.0350935459136963, "learning_rate": 6.71249640466658e-05, "loss": 1.2213, "step": 22700 }, { "epoch": 6.068947087119188, "grad_norm": 1.9788485765457153, "learning_rate": 6.704568721153662e-05, "loss": 1.2135, "step": 22710 }, { "epoch": 6.071619454836986, "grad_norm": 2.0295591354370117, "learning_rate": 6.696643360401144e-05, "loss": 1.2765, "step": 22720 }, { "epoch": 6.074291822554784, "grad_norm": 1.9607418775558472, "learning_rate": 6.688720327995153e-05, "loss": 1.2846, "step": 22730 }, { "epoch": 6.076964190272582, "grad_norm": 2.017274856567383, "learning_rate": 6.680799629520187e-05, "loss": 1.3168, "step": 22740 }, { "epoch": 6.079636557990379, "grad_norm": 1.886898398399353, "learning_rate": 6.672881270559086e-05, "loss": 1.3313, "step": 22750 }, { "epoch": 6.082308925708177, "grad_norm": 1.9333875179290771, "learning_rate": 6.664965256693052e-05, "loss": 1.2698, "step": 22760 }, { "epoch": 6.084981293425975, "grad_norm": 1.9045292139053345, "learning_rate": 6.657051593501629e-05, "loss": 1.2806, "step": 22770 }, { "epoch": 6.087653661143773, "grad_norm": 1.9250677824020386, "learning_rate": 6.649140286562696e-05, "loss": 1.2848, "step": 22780 }, { "epoch": 6.090326028861571, "grad_norm": 1.9216147661209106, "learning_rate": 6.641231341452487e-05, "loss": 1.2887, "step": 22790 }, { "epoch": 6.092998396579369, "grad_norm": 1.9174996614456177, "learning_rate": 6.633324763745563e-05, "loss": 1.3431, "step": 22800 }, { "epoch": 6.095670764297167, "grad_norm": 1.8849976062774658, "learning_rate": 6.625420559014812e-05, "loss": 1.315, "step": 22810 }, { "epoch": 6.098343132014965, "grad_norm": 1.927781343460083, "learning_rate": 6.61751873283146e-05, "loss": 1.2483, "step": 22820 }, { "epoch": 6.101015499732763, "grad_norm": 1.9553791284561157, "learning_rate": 6.609619290765045e-05, "loss": 1.1845, "step": 22830 }, { "epoch": 6.103687867450561, "grad_norm": 2.073179244995117, "learning_rate": 6.601722238383437e-05, "loss": 1.2956, "step": 22840 }, { "epoch": 6.106360235168359, "grad_norm": 1.7539079189300537, "learning_rate": 6.593827581252812e-05, "loss": 1.2073, "step": 22850 }, { "epoch": 6.109032602886157, "grad_norm": 2.009506940841675, "learning_rate": 6.585935324937656e-05, "loss": 1.3296, "step": 22860 }, { "epoch": 6.111704970603955, "grad_norm": 1.9820828437805176, "learning_rate": 6.578045475000778e-05, "loss": 1.2751, "step": 22870 }, { "epoch": 6.114377338321753, "grad_norm": 2.021517515182495, "learning_rate": 6.570158037003273e-05, "loss": 1.3532, "step": 22880 }, { "epoch": 6.117049706039551, "grad_norm": 2.0773496627807617, "learning_rate": 6.562273016504541e-05, "loss": 1.3231, "step": 22890 }, { "epoch": 6.119722073757349, "grad_norm": 2.197002410888672, "learning_rate": 6.554390419062288e-05, "loss": 1.2714, "step": 22900 }, { "epoch": 6.122394441475147, "grad_norm": 1.852329134941101, "learning_rate": 6.546510250232498e-05, "loss": 1.3469, "step": 22910 }, { "epoch": 6.125066809192945, "grad_norm": 1.8561760187149048, "learning_rate": 6.538632515569457e-05, "loss": 1.3371, "step": 22920 }, { "epoch": 6.127739176910743, "grad_norm": 1.9889413118362427, "learning_rate": 6.530757220625723e-05, "loss": 1.2938, "step": 22930 }, { "epoch": 6.130411544628541, "grad_norm": 1.8642148971557617, "learning_rate": 6.522884370952136e-05, "loss": 1.2297, "step": 22940 }, { "epoch": 6.133083912346339, "grad_norm": 2.2356655597686768, "learning_rate": 6.515013972097825e-05, "loss": 1.2885, "step": 22950 }, { "epoch": 6.135756280064137, "grad_norm": 1.9197638034820557, "learning_rate": 6.507146029610179e-05, "loss": 1.2557, "step": 22960 }, { "epoch": 6.138428647781935, "grad_norm": 2.064286231994629, "learning_rate": 6.499280549034855e-05, "loss": 1.3098, "step": 22970 }, { "epoch": 6.141101015499733, "grad_norm": 1.8423385620117188, "learning_rate": 6.491417535915783e-05, "loss": 1.2579, "step": 22980 }, { "epoch": 6.143773383217531, "grad_norm": 1.8803256750106812, "learning_rate": 6.483556995795147e-05, "loss": 1.3517, "step": 22990 }, { "epoch": 6.146445750935329, "grad_norm": 2.0200650691986084, "learning_rate": 6.475698934213395e-05, "loss": 1.2232, "step": 23000 }, { "epoch": 6.149118118653127, "grad_norm": 2.00144100189209, "learning_rate": 6.467843356709221e-05, "loss": 1.3506, "step": 23010 }, { "epoch": 6.151790486370925, "grad_norm": 1.9935312271118164, "learning_rate": 6.459990268819567e-05, "loss": 1.244, "step": 23020 }, { "epoch": 6.154462854088723, "grad_norm": 1.9040898084640503, "learning_rate": 6.452139676079631e-05, "loss": 1.3473, "step": 23030 }, { "epoch": 6.157135221806521, "grad_norm": 2.008814811706543, "learning_rate": 6.444291584022843e-05, "loss": 1.2708, "step": 23040 }, { "epoch": 6.159807589524319, "grad_norm": 1.8682596683502197, "learning_rate": 6.436445998180866e-05, "loss": 1.2746, "step": 23050 }, { "epoch": 6.162479957242116, "grad_norm": 1.8754842281341553, "learning_rate": 6.428602924083615e-05, "loss": 1.248, "step": 23060 }, { "epoch": 6.165152324959914, "grad_norm": 2.0143139362335205, "learning_rate": 6.420762367259213e-05, "loss": 1.3027, "step": 23070 }, { "epoch": 6.167824692677712, "grad_norm": 1.872719645500183, "learning_rate": 6.412924333234026e-05, "loss": 1.2795, "step": 23080 }, { "epoch": 6.17049706039551, "grad_norm": 2.1277658939361572, "learning_rate": 6.40508882753263e-05, "loss": 1.2405, "step": 23090 }, { "epoch": 6.173169428113308, "grad_norm": 1.7047022581100464, "learning_rate": 6.397255855677821e-05, "loss": 1.2908, "step": 23100 }, { "epoch": 6.175841795831106, "grad_norm": 1.8312227725982666, "learning_rate": 6.389425423190619e-05, "loss": 1.3096, "step": 23110 }, { "epoch": 6.178514163548904, "grad_norm": 2.1834187507629395, "learning_rate": 6.381597535590242e-05, "loss": 1.2477, "step": 23120 }, { "epoch": 6.181186531266702, "grad_norm": 2.458266496658325, "learning_rate": 6.373772198394114e-05, "loss": 1.2699, "step": 23130 }, { "epoch": 6.1838588989845, "grad_norm": 2.0120556354522705, "learning_rate": 6.365949417117876e-05, "loss": 1.2854, "step": 23140 }, { "epoch": 6.186531266702298, "grad_norm": 2.13199782371521, "learning_rate": 6.358129197275348e-05, "loss": 1.3501, "step": 23150 }, { "epoch": 6.189203634420096, "grad_norm": 2.2219533920288086, "learning_rate": 6.350311544378564e-05, "loss": 1.2776, "step": 23160 }, { "epoch": 6.191876002137894, "grad_norm": 2.0801823139190674, "learning_rate": 6.342496463937731e-05, "loss": 1.3194, "step": 23170 }, { "epoch": 6.194548369855692, "grad_norm": 1.946763515472412, "learning_rate": 6.334683961461252e-05, "loss": 1.3422, "step": 23180 }, { "epoch": 6.19722073757349, "grad_norm": 1.9578254222869873, "learning_rate": 6.326874042455715e-05, "loss": 1.3403, "step": 23190 }, { "epoch": 6.199893105291288, "grad_norm": 1.91990327835083, "learning_rate": 6.319066712425883e-05, "loss": 1.2727, "step": 23200 }, { "epoch": 6.202565473009086, "grad_norm": 1.915351152420044, "learning_rate": 6.311261976874689e-05, "loss": 1.2846, "step": 23210 }, { "epoch": 6.205237840726884, "grad_norm": 1.8709057569503784, "learning_rate": 6.303459841303253e-05, "loss": 1.2895, "step": 23220 }, { "epoch": 6.207910208444682, "grad_norm": 1.9091672897338867, "learning_rate": 6.295660311210843e-05, "loss": 1.295, "step": 23230 }, { "epoch": 6.21058257616248, "grad_norm": 1.864214539527893, "learning_rate": 6.287863392094908e-05, "loss": 1.3519, "step": 23240 }, { "epoch": 6.213254943880278, "grad_norm": 2.014496088027954, "learning_rate": 6.280069089451046e-05, "loss": 1.3719, "step": 23250 }, { "epoch": 6.215927311598076, "grad_norm": 2.009049892425537, "learning_rate": 6.272277408773009e-05, "loss": 1.2893, "step": 23260 }, { "epoch": 6.218599679315874, "grad_norm": 1.9774397611618042, "learning_rate": 6.264488355552714e-05, "loss": 1.2778, "step": 23270 }, { "epoch": 6.221272047033672, "grad_norm": 2.062709093093872, "learning_rate": 6.256701935280214e-05, "loss": 1.3393, "step": 23280 }, { "epoch": 6.22394441475147, "grad_norm": 2.0006885528564453, "learning_rate": 6.248918153443705e-05, "loss": 1.3484, "step": 23290 }, { "epoch": 6.226616782469268, "grad_norm": 2.1000020503997803, "learning_rate": 6.241137015529537e-05, "loss": 1.3144, "step": 23300 }, { "epoch": 6.229289150187066, "grad_norm": 1.9920177459716797, "learning_rate": 6.233358527022177e-05, "loss": 1.3562, "step": 23310 }, { "epoch": 6.231961517904864, "grad_norm": 2.069359540939331, "learning_rate": 6.225582693404249e-05, "loss": 1.3087, "step": 23320 }, { "epoch": 6.234633885622662, "grad_norm": 1.9647448062896729, "learning_rate": 6.217809520156481e-05, "loss": 1.2882, "step": 23330 }, { "epoch": 6.23730625334046, "grad_norm": 1.8878511190414429, "learning_rate": 6.210039012757738e-05, "loss": 1.3485, "step": 23340 }, { "epoch": 6.239978621058258, "grad_norm": 1.9968006610870361, "learning_rate": 6.20227117668501e-05, "loss": 1.3109, "step": 23350 }, { "epoch": 6.242650988776056, "grad_norm": 1.8300390243530273, "learning_rate": 6.194506017413392e-05, "loss": 1.3033, "step": 23360 }, { "epoch": 6.245323356493854, "grad_norm": 1.9156193733215332, "learning_rate": 6.186743540416104e-05, "loss": 1.252, "step": 23370 }, { "epoch": 6.247995724211652, "grad_norm": 2.013760805130005, "learning_rate": 6.178983751164471e-05, "loss": 1.3234, "step": 23380 }, { "epoch": 6.25066809192945, "grad_norm": 1.9752377271652222, "learning_rate": 6.171226655127916e-05, "loss": 1.333, "step": 23390 }, { "epoch": 6.253340459647247, "grad_norm": 1.9661108255386353, "learning_rate": 6.163472257773982e-05, "loss": 1.2996, "step": 23400 }, { "epoch": 6.256012827365045, "grad_norm": 1.981852412223816, "learning_rate": 6.155720564568293e-05, "loss": 1.3432, "step": 23410 }, { "epoch": 6.258685195082843, "grad_norm": 2.1153745651245117, "learning_rate": 6.147971580974568e-05, "loss": 1.3372, "step": 23420 }, { "epoch": 6.261357562800641, "grad_norm": 2.1223621368408203, "learning_rate": 6.140225312454629e-05, "loss": 1.319, "step": 23430 }, { "epoch": 6.264029930518439, "grad_norm": 1.8627023696899414, "learning_rate": 6.132481764468373e-05, "loss": 1.3023, "step": 23440 }, { "epoch": 6.266702298236237, "grad_norm": 1.9366716146469116, "learning_rate": 6.124740942473778e-05, "loss": 1.2827, "step": 23450 }, { "epoch": 6.269374665954035, "grad_norm": 2.0096912384033203, "learning_rate": 6.117002851926914e-05, "loss": 1.3612, "step": 23460 }, { "epoch": 6.272047033671833, "grad_norm": 2.025266170501709, "learning_rate": 6.109267498281909e-05, "loss": 1.3672, "step": 23470 }, { "epoch": 6.274719401389631, "grad_norm": 2.2905125617980957, "learning_rate": 6.101534886990976e-05, "loss": 1.303, "step": 23480 }, { "epoch": 6.277391769107429, "grad_norm": 1.912898063659668, "learning_rate": 6.0938050235043886e-05, "loss": 1.2745, "step": 23490 }, { "epoch": 6.280064136825227, "grad_norm": 1.9694336652755737, "learning_rate": 6.086077913270476e-05, "loss": 1.4124, "step": 23500 }, { "epoch": 6.282736504543025, "grad_norm": 2.0188982486724854, "learning_rate": 6.0783535617356434e-05, "loss": 1.3102, "step": 23510 }, { "epoch": 6.285408872260823, "grad_norm": 2.0003912448883057, "learning_rate": 6.070631974344343e-05, "loss": 1.307, "step": 23520 }, { "epoch": 6.288081239978621, "grad_norm": 1.8425689935684204, "learning_rate": 6.0629131565390715e-05, "loss": 1.2732, "step": 23530 }, { "epoch": 6.290753607696419, "grad_norm": 1.9907017946243286, "learning_rate": 6.055197113760385e-05, "loss": 1.3561, "step": 23540 }, { "epoch": 6.293425975414217, "grad_norm": 1.8586206436157227, "learning_rate": 6.04748385144688e-05, "loss": 1.3121, "step": 23550 }, { "epoch": 6.296098343132015, "grad_norm": 1.839750051498413, "learning_rate": 6.0397733750351925e-05, "loss": 1.3793, "step": 23560 }, { "epoch": 6.298770710849813, "grad_norm": 1.9381271600723267, "learning_rate": 6.0320656899599934e-05, "loss": 1.3002, "step": 23570 }, { "epoch": 6.301443078567611, "grad_norm": 1.955628514289856, "learning_rate": 6.024360801653985e-05, "loss": 1.2719, "step": 23580 }, { "epoch": 6.304115446285409, "grad_norm": 1.8711755275726318, "learning_rate": 6.016658715547909e-05, "loss": 1.3457, "step": 23590 }, { "epoch": 6.306787814003207, "grad_norm": 1.9570006132125854, "learning_rate": 6.008959437070518e-05, "loss": 1.3249, "step": 23600 }, { "epoch": 6.309460181721005, "grad_norm": 2.015333652496338, "learning_rate": 6.001262971648589e-05, "loss": 1.2877, "step": 23610 }, { "epoch": 6.312132549438803, "grad_norm": 2.026175022125244, "learning_rate": 5.993569324706925e-05, "loss": 1.29, "step": 23620 }, { "epoch": 6.314804917156601, "grad_norm": 1.9941869974136353, "learning_rate": 5.9858785016683306e-05, "loss": 1.3553, "step": 23630 }, { "epoch": 6.317477284874399, "grad_norm": 1.9505996704101562, "learning_rate": 5.9781905079536324e-05, "loss": 1.369, "step": 23640 }, { "epoch": 6.320149652592197, "grad_norm": 2.053548812866211, "learning_rate": 5.970505348981652e-05, "loss": 1.3175, "step": 23650 }, { "epoch": 6.322822020309995, "grad_norm": 1.9613093137741089, "learning_rate": 5.962823030169213e-05, "loss": 1.335, "step": 23660 }, { "epoch": 6.325494388027793, "grad_norm": 2.036151647567749, "learning_rate": 5.955143556931149e-05, "loss": 1.3233, "step": 23670 }, { "epoch": 6.328166755745591, "grad_norm": 2.0008156299591064, "learning_rate": 5.947466934680277e-05, "loss": 1.274, "step": 23680 }, { "epoch": 6.330839123463389, "grad_norm": 1.8678631782531738, "learning_rate": 5.9397931688274023e-05, "loss": 1.2367, "step": 23690 }, { "epoch": 6.333511491181187, "grad_norm": 1.9700605869293213, "learning_rate": 5.93212226478133e-05, "loss": 1.3093, "step": 23700 }, { "epoch": 6.336183858898984, "grad_norm": 1.8679239749908447, "learning_rate": 5.9244542279488346e-05, "loss": 1.326, "step": 23710 }, { "epoch": 6.338856226616782, "grad_norm": 1.9873723983764648, "learning_rate": 5.91678906373468e-05, "loss": 1.2827, "step": 23720 }, { "epoch": 6.34152859433458, "grad_norm": 1.9908934831619263, "learning_rate": 5.909126777541596e-05, "loss": 1.3772, "step": 23730 }, { "epoch": 6.344200962052378, "grad_norm": 1.9041279554367065, "learning_rate": 5.9014673747702886e-05, "loss": 1.3175, "step": 23740 }, { "epoch": 6.346873329770176, "grad_norm": 2.04715895652771, "learning_rate": 5.8938108608194355e-05, "loss": 1.3249, "step": 23750 }, { "epoch": 6.349545697487974, "grad_norm": 1.8260263204574585, "learning_rate": 5.886157241085672e-05, "loss": 1.3476, "step": 23760 }, { "epoch": 6.352218065205772, "grad_norm": 1.9012395143508911, "learning_rate": 5.8785065209635916e-05, "loss": 1.3144, "step": 23770 }, { "epoch": 6.35489043292357, "grad_norm": 1.9461390972137451, "learning_rate": 5.8708587058457545e-05, "loss": 1.2653, "step": 23780 }, { "epoch": 6.357562800641368, "grad_norm": 2.0778017044067383, "learning_rate": 5.863213801122659e-05, "loss": 1.287, "step": 23790 }, { "epoch": 6.360235168359166, "grad_norm": 1.932028889656067, "learning_rate": 5.855571812182769e-05, "loss": 1.2953, "step": 23800 }, { "epoch": 6.362907536076964, "grad_norm": 2.24345326423645, "learning_rate": 5.847932744412477e-05, "loss": 1.3015, "step": 23810 }, { "epoch": 6.365579903794762, "grad_norm": 1.9071317911148071, "learning_rate": 5.840296603196124e-05, "loss": 1.3571, "step": 23820 }, { "epoch": 6.36825227151256, "grad_norm": 1.827386736869812, "learning_rate": 5.832663393915991e-05, "loss": 1.2797, "step": 23830 }, { "epoch": 6.370924639230358, "grad_norm": 1.8914283514022827, "learning_rate": 5.8250331219522925e-05, "loss": 1.313, "step": 23840 }, { "epoch": 6.373597006948156, "grad_norm": 1.8569040298461914, "learning_rate": 5.817405792683155e-05, "loss": 1.3113, "step": 23850 }, { "epoch": 6.376269374665954, "grad_norm": 1.8708868026733398, "learning_rate": 5.809781411484662e-05, "loss": 1.2918, "step": 23860 }, { "epoch": 6.378941742383752, "grad_norm": 1.834140419960022, "learning_rate": 5.802159983730786e-05, "loss": 1.3398, "step": 23870 }, { "epoch": 6.38161411010155, "grad_norm": 1.9090319871902466, "learning_rate": 5.7945415147934525e-05, "loss": 1.336, "step": 23880 }, { "epoch": 6.384286477819348, "grad_norm": 2.0437538623809814, "learning_rate": 5.786926010042468e-05, "loss": 1.3584, "step": 23890 }, { "epoch": 6.386958845537146, "grad_norm": 2.0860626697540283, "learning_rate": 5.7793134748455714e-05, "loss": 1.3766, "step": 23900 }, { "epoch": 6.389631213254944, "grad_norm": 1.9386303424835205, "learning_rate": 5.7717039145684005e-05, "loss": 1.2482, "step": 23910 }, { "epoch": 6.392303580972742, "grad_norm": 2.092529773712158, "learning_rate": 5.764097334574503e-05, "loss": 1.336, "step": 23920 }, { "epoch": 6.39497594869054, "grad_norm": 2.0294992923736572, "learning_rate": 5.756493740225306e-05, "loss": 1.3181, "step": 23930 }, { "epoch": 6.397648316408338, "grad_norm": 1.9609752893447876, "learning_rate": 5.7488931368801644e-05, "loss": 1.336, "step": 23940 }, { "epoch": 6.400320684126136, "grad_norm": 2.144275426864624, "learning_rate": 5.741295529896295e-05, "loss": 1.3315, "step": 23950 }, { "epoch": 6.402993051843934, "grad_norm": 2.0646297931671143, "learning_rate": 5.733700924628821e-05, "loss": 1.3433, "step": 23960 }, { "epoch": 6.405665419561732, "grad_norm": 1.8610888719558716, "learning_rate": 5.7261093264307416e-05, "loss": 1.2768, "step": 23970 }, { "epoch": 6.40833778727953, "grad_norm": 1.857261061668396, "learning_rate": 5.7185207406529386e-05, "loss": 1.2985, "step": 23980 }, { "epoch": 6.411010154997328, "grad_norm": 1.9301776885986328, "learning_rate": 5.710935172644175e-05, "loss": 1.3038, "step": 23990 }, { "epoch": 6.413682522715126, "grad_norm": 2.1339030265808105, "learning_rate": 5.7033526277510755e-05, "loss": 1.2281, "step": 24000 }, { "epoch": 6.416354890432924, "grad_norm": 1.8557411432266235, "learning_rate": 5.6957731113181433e-05, "loss": 1.3243, "step": 24010 }, { "epoch": 6.419027258150722, "grad_norm": 1.8736084699630737, "learning_rate": 5.688196628687745e-05, "loss": 1.3724, "step": 24020 }, { "epoch": 6.42169962586852, "grad_norm": 2.0215272903442383, "learning_rate": 5.6806231852001104e-05, "loss": 1.2903, "step": 24030 }, { "epoch": 6.424371993586318, "grad_norm": 2.0285580158233643, "learning_rate": 5.673052786193323e-05, "loss": 1.4089, "step": 24040 }, { "epoch": 6.427044361304115, "grad_norm": 1.9792866706848145, "learning_rate": 5.665485437003326e-05, "loss": 1.3206, "step": 24050 }, { "epoch": 6.429716729021913, "grad_norm": 2.078568935394287, "learning_rate": 5.6579211429639e-05, "loss": 1.3477, "step": 24060 }, { "epoch": 6.432389096739711, "grad_norm": 1.9913173913955688, "learning_rate": 5.650359909406695e-05, "loss": 1.3042, "step": 24070 }, { "epoch": 6.435061464457509, "grad_norm": 1.9369879961013794, "learning_rate": 5.642801741661181e-05, "loss": 1.36, "step": 24080 }, { "epoch": 6.437733832175307, "grad_norm": 1.9501110315322876, "learning_rate": 5.6352466450546806e-05, "loss": 1.4001, "step": 24090 }, { "epoch": 6.440406199893105, "grad_norm": 1.8964717388153076, "learning_rate": 5.627694624912347e-05, "loss": 1.3047, "step": 24100 }, { "epoch": 6.443078567610903, "grad_norm": 2.044865846633911, "learning_rate": 5.620145686557165e-05, "loss": 1.3673, "step": 24110 }, { "epoch": 6.445750935328701, "grad_norm": 2.040405750274658, "learning_rate": 5.612599835309949e-05, "loss": 1.3436, "step": 24120 }, { "epoch": 6.448423303046499, "grad_norm": 1.9377208948135376, "learning_rate": 5.605057076489342e-05, "loss": 1.348, "step": 24130 }, { "epoch": 6.451095670764297, "grad_norm": 1.9589042663574219, "learning_rate": 5.597517415411792e-05, "loss": 1.2912, "step": 24140 }, { "epoch": 6.453768038482095, "grad_norm": 1.9442590475082397, "learning_rate": 5.589980857391577e-05, "loss": 1.2581, "step": 24150 }, { "epoch": 6.456440406199893, "grad_norm": 1.9758940935134888, "learning_rate": 5.582447407740785e-05, "loss": 1.3421, "step": 24160 }, { "epoch": 6.459112773917691, "grad_norm": 2.0502991676330566, "learning_rate": 5.574917071769313e-05, "loss": 1.2519, "step": 24170 }, { "epoch": 6.461785141635489, "grad_norm": 2.0679097175598145, "learning_rate": 5.5673898547848656e-05, "loss": 1.3707, "step": 24180 }, { "epoch": 6.464457509353287, "grad_norm": 2.0154318809509277, "learning_rate": 5.559865762092935e-05, "loss": 1.2755, "step": 24190 }, { "epoch": 6.467129877071085, "grad_norm": 2.023235559463501, "learning_rate": 5.552344798996838e-05, "loss": 1.3422, "step": 24200 }, { "epoch": 6.469802244788883, "grad_norm": 1.8802125453948975, "learning_rate": 5.544826970797658e-05, "loss": 1.2408, "step": 24210 }, { "epoch": 6.472474612506681, "grad_norm": 2.142982006072998, "learning_rate": 5.5373122827942844e-05, "loss": 1.3135, "step": 24220 }, { "epoch": 6.475146980224479, "grad_norm": 1.9517136812210083, "learning_rate": 5.5298007402833905e-05, "loss": 1.2831, "step": 24230 }, { "epoch": 6.477819347942277, "grad_norm": 1.7909530401229858, "learning_rate": 5.522292348559434e-05, "loss": 1.2253, "step": 24240 }, { "epoch": 6.480491715660075, "grad_norm": 1.970603585243225, "learning_rate": 5.5147871129146377e-05, "loss": 1.3396, "step": 24250 }, { "epoch": 6.483164083377873, "grad_norm": 1.9476258754730225, "learning_rate": 5.5072850386390283e-05, "loss": 1.3097, "step": 24260 }, { "epoch": 6.485836451095671, "grad_norm": 1.9174326658248901, "learning_rate": 5.49978613102037e-05, "loss": 1.2603, "step": 24270 }, { "epoch": 6.488508818813469, "grad_norm": 1.8014626502990723, "learning_rate": 5.492290395344232e-05, "loss": 1.3148, "step": 24280 }, { "epoch": 6.491181186531267, "grad_norm": 2.1735575199127197, "learning_rate": 5.4847978368939126e-05, "loss": 1.3427, "step": 24290 }, { "epoch": 6.493853554249065, "grad_norm": 1.9877837896347046, "learning_rate": 5.477308460950491e-05, "loss": 1.2973, "step": 24300 }, { "epoch": 6.496525921966863, "grad_norm": 1.9472131729125977, "learning_rate": 5.4698222727927997e-05, "loss": 1.353, "step": 24310 }, { "epoch": 6.499198289684661, "grad_norm": 1.8456213474273682, "learning_rate": 5.462339277697426e-05, "loss": 1.3024, "step": 24320 }, { "epoch": 6.501870657402459, "grad_norm": 1.9842456579208374, "learning_rate": 5.454859480938692e-05, "loss": 1.3093, "step": 24330 }, { "epoch": 6.504543025120256, "grad_norm": 1.9452012777328491, "learning_rate": 5.4473828877886925e-05, "loss": 1.2748, "step": 24340 }, { "epoch": 6.507215392838054, "grad_norm": 2.105241537094116, "learning_rate": 5.43990950351724e-05, "loss": 1.3571, "step": 24350 }, { "epoch": 6.509887760555852, "grad_norm": 2.0901341438293457, "learning_rate": 5.432439333391895e-05, "loss": 1.4147, "step": 24360 }, { "epoch": 6.51256012827365, "grad_norm": 2.008657693862915, "learning_rate": 5.4249723826779556e-05, "loss": 1.397, "step": 24370 }, { "epoch": 6.515232495991448, "grad_norm": 2.063403606414795, "learning_rate": 5.417508656638437e-05, "loss": 1.3559, "step": 24380 }, { "epoch": 6.517904863709246, "grad_norm": 2.0258562564849854, "learning_rate": 5.410048160534107e-05, "loss": 1.2944, "step": 24390 }, { "epoch": 6.520577231427044, "grad_norm": 1.9289439916610718, "learning_rate": 5.402590899623427e-05, "loss": 1.2823, "step": 24400 }, { "epoch": 6.523249599144842, "grad_norm": 2.064502716064453, "learning_rate": 5.395136879162599e-05, "loss": 1.3447, "step": 24410 }, { "epoch": 6.52592196686264, "grad_norm": 2.0392673015594482, "learning_rate": 5.3876861044055335e-05, "loss": 1.3871, "step": 24420 }, { "epoch": 6.528594334580438, "grad_norm": 1.8871740102767944, "learning_rate": 5.3802385806038535e-05, "loss": 1.2774, "step": 24430 }, { "epoch": 6.531266702298236, "grad_norm": 2.1257073879241943, "learning_rate": 5.3727943130068904e-05, "loss": 1.317, "step": 24440 }, { "epoch": 6.533939070016034, "grad_norm": 2.031369924545288, "learning_rate": 5.3653533068616866e-05, "loss": 1.2801, "step": 24450 }, { "epoch": 6.536611437733832, "grad_norm": 1.995954990386963, "learning_rate": 5.357915567412968e-05, "loss": 1.2943, "step": 24460 }, { "epoch": 6.53928380545163, "grad_norm": 1.9537169933319092, "learning_rate": 5.350481099903186e-05, "loss": 1.3021, "step": 24470 }, { "epoch": 6.541956173169428, "grad_norm": 1.887035846710205, "learning_rate": 5.3430499095724596e-05, "loss": 1.3474, "step": 24480 }, { "epoch": 6.544628540887226, "grad_norm": 1.9988093376159668, "learning_rate": 5.33562200165861e-05, "loss": 1.3242, "step": 24490 }, { "epoch": 6.547300908605024, "grad_norm": 1.9960969686508179, "learning_rate": 5.3281973813971444e-05, "loss": 1.3295, "step": 24500 }, { "epoch": 6.549973276322822, "grad_norm": 1.8658087253570557, "learning_rate": 5.320776054021253e-05, "loss": 1.3153, "step": 24510 }, { "epoch": 6.55264564404062, "grad_norm": 2.133042573928833, "learning_rate": 5.3133580247618e-05, "loss": 1.3661, "step": 24520 }, { "epoch": 6.555318011758418, "grad_norm": 1.898963451385498, "learning_rate": 5.305943298847336e-05, "loss": 1.381, "step": 24530 }, { "epoch": 6.557990379476216, "grad_norm": 2.0984890460968018, "learning_rate": 5.2985318815040653e-05, "loss": 1.3343, "step": 24540 }, { "epoch": 6.560662747194014, "grad_norm": 2.0512940883636475, "learning_rate": 5.2911237779558774e-05, "loss": 1.3343, "step": 24550 }, { "epoch": 6.563335114911812, "grad_norm": 2.725210428237915, "learning_rate": 5.283718993424317e-05, "loss": 1.3337, "step": 24560 }, { "epoch": 6.56600748262961, "grad_norm": 1.88321053981781, "learning_rate": 5.276317533128593e-05, "loss": 1.3841, "step": 24570 }, { "epoch": 6.568679850347408, "grad_norm": 2.130901336669922, "learning_rate": 5.268919402285573e-05, "loss": 1.3608, "step": 24580 }, { "epoch": 6.571352218065206, "grad_norm": 1.9648263454437256, "learning_rate": 5.261524606109764e-05, "loss": 1.316, "step": 24590 }, { "epoch": 6.574024585783004, "grad_norm": 1.9932940006256104, "learning_rate": 5.254133149813349e-05, "loss": 1.3343, "step": 24600 }, { "epoch": 6.576696953500802, "grad_norm": 2.0183959007263184, "learning_rate": 5.24674503860613e-05, "loss": 1.3807, "step": 24610 }, { "epoch": 6.5793693212186, "grad_norm": 1.8899110555648804, "learning_rate": 5.239360277695565e-05, "loss": 1.3112, "step": 24620 }, { "epoch": 6.582041688936398, "grad_norm": 2.061530113220215, "learning_rate": 5.23197887228675e-05, "loss": 1.3094, "step": 24630 }, { "epoch": 6.584714056654196, "grad_norm": 1.839147686958313, "learning_rate": 5.224600827582418e-05, "loss": 1.3192, "step": 24640 }, { "epoch": 6.587386424371994, "grad_norm": 1.986692190170288, "learning_rate": 5.2172261487829146e-05, "loss": 1.3544, "step": 24650 }, { "epoch": 6.590058792089792, "grad_norm": 1.9184010028839111, "learning_rate": 5.2098548410862466e-05, "loss": 1.3775, "step": 24660 }, { "epoch": 6.59273115980759, "grad_norm": 1.9746228456497192, "learning_rate": 5.202486909688009e-05, "loss": 1.3051, "step": 24670 }, { "epoch": 6.595403527525388, "grad_norm": 1.9751840829849243, "learning_rate": 5.19512235978145e-05, "loss": 1.3443, "step": 24680 }, { "epoch": 6.598075895243186, "grad_norm": 1.950573444366455, "learning_rate": 5.187761196557406e-05, "loss": 1.34, "step": 24690 }, { "epoch": 6.600748262960983, "grad_norm": 1.9397246837615967, "learning_rate": 5.180403425204342e-05, "loss": 1.2836, "step": 24700 }, { "epoch": 6.603420630678781, "grad_norm": 2.12785267829895, "learning_rate": 5.1730490509083296e-05, "loss": 1.3503, "step": 24710 }, { "epoch": 6.606092998396579, "grad_norm": 2.105128526687622, "learning_rate": 5.1656980788530484e-05, "loss": 1.3326, "step": 24720 }, { "epoch": 6.608765366114377, "grad_norm": 2.083244800567627, "learning_rate": 5.1583505142197694e-05, "loss": 1.3435, "step": 24730 }, { "epoch": 6.611437733832175, "grad_norm": 1.8611396551132202, "learning_rate": 5.151006362187374e-05, "loss": 1.38, "step": 24740 }, { "epoch": 6.614110101549973, "grad_norm": 2.061445951461792, "learning_rate": 5.143665627932331e-05, "loss": 1.2961, "step": 24750 }, { "epoch": 6.616782469267771, "grad_norm": 1.8973666429519653, "learning_rate": 5.136328316628706e-05, "loss": 1.3606, "step": 24760 }, { "epoch": 6.619454836985569, "grad_norm": 1.97553551197052, "learning_rate": 5.12899443344815e-05, "loss": 1.3811, "step": 24770 }, { "epoch": 6.622127204703367, "grad_norm": 1.9667669534683228, "learning_rate": 5.1216639835598856e-05, "loss": 1.3449, "step": 24780 }, { "epoch": 6.624799572421165, "grad_norm": 2.018714666366577, "learning_rate": 5.1143369721307424e-05, "loss": 1.2524, "step": 24790 }, { "epoch": 6.627471940138963, "grad_norm": 1.939206838607788, "learning_rate": 5.1070134043250983e-05, "loss": 1.3711, "step": 24800 }, { "epoch": 6.630144307856761, "grad_norm": 1.9365715980529785, "learning_rate": 5.0996932853049186e-05, "loss": 1.3017, "step": 24810 }, { "epoch": 6.632816675574559, "grad_norm": 2.0131990909576416, "learning_rate": 5.0923766202297376e-05, "loss": 1.3143, "step": 24820 }, { "epoch": 6.635489043292357, "grad_norm": 1.902485728263855, "learning_rate": 5.08506341425665e-05, "loss": 1.2985, "step": 24830 }, { "epoch": 6.638161411010155, "grad_norm": 2.0457568168640137, "learning_rate": 5.0777536725403175e-05, "loss": 1.2853, "step": 24840 }, { "epoch": 6.640833778727953, "grad_norm": 2.0161006450653076, "learning_rate": 5.070447400232958e-05, "loss": 1.2634, "step": 24850 }, { "epoch": 6.643506146445751, "grad_norm": 2.0461578369140625, "learning_rate": 5.0631446024843365e-05, "loss": 1.2905, "step": 24860 }, { "epoch": 6.646178514163549, "grad_norm": 1.9626697301864624, "learning_rate": 5.0558452844417895e-05, "loss": 1.3102, "step": 24870 }, { "epoch": 6.648850881881347, "grad_norm": 1.8565678596496582, "learning_rate": 5.048549451250173e-05, "loss": 1.2997, "step": 24880 }, { "epoch": 6.651523249599145, "grad_norm": 2.02064847946167, "learning_rate": 5.041257108051909e-05, "loss": 1.3787, "step": 24890 }, { "epoch": 6.654195617316943, "grad_norm": 2.0025739669799805, "learning_rate": 5.033968259986951e-05, "loss": 1.3616, "step": 24900 }, { "epoch": 6.656867985034741, "grad_norm": 1.908174991607666, "learning_rate": 5.026682912192788e-05, "loss": 1.3133, "step": 24910 }, { "epoch": 6.659540352752539, "grad_norm": 2.0004167556762695, "learning_rate": 5.019401069804448e-05, "loss": 1.3082, "step": 24920 }, { "epoch": 6.662212720470337, "grad_norm": 1.8949096202850342, "learning_rate": 5.0121227379544775e-05, "loss": 1.3418, "step": 24930 }, { "epoch": 6.664885088188135, "grad_norm": 2.18448543548584, "learning_rate": 5.004847921772957e-05, "loss": 1.3453, "step": 24940 }, { "epoch": 6.667557455905933, "grad_norm": 1.9995133876800537, "learning_rate": 4.997576626387489e-05, "loss": 1.3099, "step": 24950 }, { "epoch": 6.670229823623731, "grad_norm": 2.020721673965454, "learning_rate": 4.9903088569231904e-05, "loss": 1.3469, "step": 24960 }, { "epoch": 6.672902191341529, "grad_norm": 1.9147567749023438, "learning_rate": 4.983044618502696e-05, "loss": 1.3453, "step": 24970 }, { "epoch": 6.675574559059327, "grad_norm": 1.9366708993911743, "learning_rate": 4.975783916246154e-05, "loss": 1.2894, "step": 24980 }, { "epoch": 6.678246926777124, "grad_norm": 2.050410509109497, "learning_rate": 4.968526755271206e-05, "loss": 1.3283, "step": 24990 }, { "epoch": 6.680919294494922, "grad_norm": 1.9889848232269287, "learning_rate": 4.961273140693021e-05, "loss": 1.3503, "step": 25000 }, { "epoch": 6.68359166221272, "grad_norm": 2.047287940979004, "learning_rate": 4.9540230776242466e-05, "loss": 1.3312, "step": 25010 }, { "epoch": 6.686264029930518, "grad_norm": 1.980771541595459, "learning_rate": 4.9467765711750414e-05, "loss": 1.2966, "step": 25020 }, { "epoch": 6.688936397648316, "grad_norm": 1.8416842222213745, "learning_rate": 4.9395336264530486e-05, "loss": 1.3168, "step": 25030 }, { "epoch": 6.691608765366114, "grad_norm": 2.0462563037872314, "learning_rate": 4.932294248563406e-05, "loss": 1.3618, "step": 25040 }, { "epoch": 6.694281133083912, "grad_norm": 1.9609215259552002, "learning_rate": 4.9250584426087356e-05, "loss": 1.3455, "step": 25050 }, { "epoch": 6.69695350080171, "grad_norm": 2.044280767440796, "learning_rate": 4.9178262136891454e-05, "loss": 1.3548, "step": 25060 }, { "epoch": 6.699625868519508, "grad_norm": 1.970746636390686, "learning_rate": 4.910597566902211e-05, "loss": 1.3309, "step": 25070 }, { "epoch": 6.702298236237306, "grad_norm": 1.9606901407241821, "learning_rate": 4.903372507342995e-05, "loss": 1.2857, "step": 25080 }, { "epoch": 6.704970603955104, "grad_norm": 2.2049036026000977, "learning_rate": 4.8961510401040254e-05, "loss": 1.3342, "step": 25090 }, { "epoch": 6.707642971672902, "grad_norm": 1.8935282230377197, "learning_rate": 4.8889331702753025e-05, "loss": 1.3692, "step": 25100 }, { "epoch": 6.7103153393907, "grad_norm": 2.032672643661499, "learning_rate": 4.881718902944291e-05, "loss": 1.3425, "step": 25110 }, { "epoch": 6.712987707108498, "grad_norm": 1.9839675426483154, "learning_rate": 4.874508243195902e-05, "loss": 1.3862, "step": 25120 }, { "epoch": 6.715660074826296, "grad_norm": 1.9736772775650024, "learning_rate": 4.8673011961125334e-05, "loss": 1.3463, "step": 25130 }, { "epoch": 6.718332442544094, "grad_norm": 1.9014322757720947, "learning_rate": 4.8600977667740055e-05, "loss": 1.2962, "step": 25140 }, { "epoch": 6.721004810261892, "grad_norm": 2.1234326362609863, "learning_rate": 4.8528979602576066e-05, "loss": 1.3125, "step": 25150 }, { "epoch": 6.72367717797969, "grad_norm": 2.13374400138855, "learning_rate": 4.8457017816380655e-05, "loss": 1.4241, "step": 25160 }, { "epoch": 6.726349545697488, "grad_norm": 1.9276955127716064, "learning_rate": 4.8385092359875626e-05, "loss": 1.2602, "step": 25170 }, { "epoch": 6.729021913415286, "grad_norm": 1.8928720951080322, "learning_rate": 4.831320328375698e-05, "loss": 1.2977, "step": 25180 }, { "epoch": 6.731694281133084, "grad_norm": 1.9410635232925415, "learning_rate": 4.8241350638695346e-05, "loss": 1.3074, "step": 25190 }, { "epoch": 6.734366648850882, "grad_norm": 1.9313756227493286, "learning_rate": 4.8169534475335374e-05, "loss": 1.3694, "step": 25200 }, { "epoch": 6.73703901656868, "grad_norm": 2.0795538425445557, "learning_rate": 4.8097754844296326e-05, "loss": 1.3431, "step": 25210 }, { "epoch": 6.739711384286478, "grad_norm": 2.1617908477783203, "learning_rate": 4.8026011796171425e-05, "loss": 1.3635, "step": 25220 }, { "epoch": 6.742383752004276, "grad_norm": 2.0728282928466797, "learning_rate": 4.7954305381528254e-05, "loss": 1.285, "step": 25230 }, { "epoch": 6.745056119722074, "grad_norm": 1.8272680044174194, "learning_rate": 4.788263565090856e-05, "loss": 1.3446, "step": 25240 }, { "epoch": 6.747728487439872, "grad_norm": 1.9283318519592285, "learning_rate": 4.781100265482826e-05, "loss": 1.3083, "step": 25250 }, { "epoch": 6.75040085515767, "grad_norm": 1.8441859483718872, "learning_rate": 4.773940644377727e-05, "loss": 1.3354, "step": 25260 }, { "epoch": 6.753073222875468, "grad_norm": 1.9824219942092896, "learning_rate": 4.7667847068219674e-05, "loss": 1.2434, "step": 25270 }, { "epoch": 6.755745590593266, "grad_norm": 1.8814092874526978, "learning_rate": 4.7596324578593586e-05, "loss": 1.3441, "step": 25280 }, { "epoch": 6.758417958311064, "grad_norm": 1.9747974872589111, "learning_rate": 4.752483902531111e-05, "loss": 1.3623, "step": 25290 }, { "epoch": 6.761090326028862, "grad_norm": 1.963364601135254, "learning_rate": 4.745339045875832e-05, "loss": 1.2794, "step": 25300 }, { "epoch": 6.76376269374666, "grad_norm": 2.001263380050659, "learning_rate": 4.738197892929512e-05, "loss": 1.3283, "step": 25310 }, { "epoch": 6.766435061464458, "grad_norm": 1.976517677307129, "learning_rate": 4.731060448725554e-05, "loss": 1.344, "step": 25320 }, { "epoch": 6.769107429182256, "grad_norm": 2.0967068672180176, "learning_rate": 4.723926718294722e-05, "loss": 1.2342, "step": 25330 }, { "epoch": 6.771779796900054, "grad_norm": 2.0544328689575195, "learning_rate": 4.716796706665177e-05, "loss": 1.3306, "step": 25340 }, { "epoch": 6.774452164617851, "grad_norm": 1.9995408058166504, "learning_rate": 4.7096704188624564e-05, "loss": 1.2832, "step": 25350 }, { "epoch": 6.777124532335649, "grad_norm": 1.9461665153503418, "learning_rate": 4.7025478599094686e-05, "loss": 1.3502, "step": 25360 }, { "epoch": 6.779796900053447, "grad_norm": 2.1463234424591064, "learning_rate": 4.695429034826499e-05, "loss": 1.2909, "step": 25370 }, { "epoch": 6.782469267771245, "grad_norm": 1.9402949810028076, "learning_rate": 4.6883139486312e-05, "loss": 1.3051, "step": 25380 }, { "epoch": 6.785141635489043, "grad_norm": 1.8647191524505615, "learning_rate": 4.6812026063385796e-05, "loss": 1.2959, "step": 25390 }, { "epoch": 6.787814003206841, "grad_norm": 1.8827990293502808, "learning_rate": 4.674095012961027e-05, "loss": 1.2996, "step": 25400 }, { "epoch": 6.790486370924639, "grad_norm": 2.082275152206421, "learning_rate": 4.6669911735082685e-05, "loss": 1.3208, "step": 25410 }, { "epoch": 6.793158738642437, "grad_norm": 1.8540116548538208, "learning_rate": 4.659891092987395e-05, "loss": 1.3016, "step": 25420 }, { "epoch": 6.795831106360235, "grad_norm": 1.945229411125183, "learning_rate": 4.6527947764028456e-05, "loss": 1.2816, "step": 25430 }, { "epoch": 6.798503474078033, "grad_norm": 1.9631634950637817, "learning_rate": 4.6457022287564066e-05, "loss": 1.3821, "step": 25440 }, { "epoch": 6.801175841795831, "grad_norm": 2.0663106441497803, "learning_rate": 4.638613455047213e-05, "loss": 1.2343, "step": 25450 }, { "epoch": 6.803848209513629, "grad_norm": 2.1802597045898438, "learning_rate": 4.631528460271725e-05, "loss": 1.2804, "step": 25460 }, { "epoch": 6.806520577231427, "grad_norm": 2.0285112857818604, "learning_rate": 4.6244472494237536e-05, "loss": 1.3734, "step": 25470 }, { "epoch": 6.809192944949225, "grad_norm": 1.9220365285873413, "learning_rate": 4.617369827494439e-05, "loss": 1.302, "step": 25480 }, { "epoch": 6.811865312667023, "grad_norm": 2.2240166664123535, "learning_rate": 4.6102961994722515e-05, "loss": 1.3799, "step": 25490 }, { "epoch": 6.814537680384821, "grad_norm": 1.9817068576812744, "learning_rate": 4.603226370342977e-05, "loss": 1.3467, "step": 25500 }, { "epoch": 6.817210048102619, "grad_norm": 1.7919260263442993, "learning_rate": 4.5961603450897436e-05, "loss": 1.4048, "step": 25510 }, { "epoch": 6.819882415820417, "grad_norm": 1.9146183729171753, "learning_rate": 4.5890981286929754e-05, "loss": 1.2824, "step": 25520 }, { "epoch": 6.822554783538215, "grad_norm": 1.9657856225967407, "learning_rate": 4.582039726130437e-05, "loss": 1.3488, "step": 25530 }, { "epoch": 6.825227151256013, "grad_norm": 1.9412111043930054, "learning_rate": 4.57498514237718e-05, "loss": 1.3343, "step": 25540 }, { "epoch": 6.827899518973811, "grad_norm": 2.0994606018066406, "learning_rate": 4.567934382405581e-05, "loss": 1.3324, "step": 25550 }, { "epoch": 6.830571886691609, "grad_norm": 1.8755228519439697, "learning_rate": 4.560887451185314e-05, "loss": 1.2411, "step": 25560 }, { "epoch": 6.833244254409407, "grad_norm": 1.7629376649856567, "learning_rate": 4.553844353683361e-05, "loss": 1.2474, "step": 25570 }, { "epoch": 6.835916622127205, "grad_norm": 1.887208104133606, "learning_rate": 4.546805094863985e-05, "loss": 1.3752, "step": 25580 }, { "epoch": 6.838588989845003, "grad_norm": 2.057021141052246, "learning_rate": 4.5397696796887726e-05, "loss": 1.327, "step": 25590 }, { "epoch": 6.841261357562801, "grad_norm": 1.8888603448867798, "learning_rate": 4.532738113116571e-05, "loss": 1.3035, "step": 25600 }, { "epoch": 6.843933725280599, "grad_norm": 1.9359172582626343, "learning_rate": 4.5257104001035333e-05, "loss": 1.3673, "step": 25610 }, { "epoch": 6.846606092998397, "grad_norm": 2.076350450515747, "learning_rate": 4.5186865456030914e-05, "loss": 1.3122, "step": 25620 }, { "epoch": 6.849278460716194, "grad_norm": 1.9578807353973389, "learning_rate": 4.511666554565955e-05, "loss": 1.3545, "step": 25630 }, { "epoch": 6.851950828433992, "grad_norm": 2.000336170196533, "learning_rate": 4.5046504319401183e-05, "loss": 1.316, "step": 25640 }, { "epoch": 6.85462319615179, "grad_norm": 2.111220359802246, "learning_rate": 4.497638182670836e-05, "loss": 1.3444, "step": 25650 }, { "epoch": 6.857295563869588, "grad_norm": 2.0362842082977295, "learning_rate": 4.490629811700644e-05, "loss": 1.3057, "step": 25660 }, { "epoch": 6.859967931587386, "grad_norm": 2.01241135597229, "learning_rate": 4.483625323969339e-05, "loss": 1.3195, "step": 25670 }, { "epoch": 6.862640299305184, "grad_norm": 1.8604832887649536, "learning_rate": 4.476624724413986e-05, "loss": 1.3234, "step": 25680 }, { "epoch": 6.865312667022982, "grad_norm": 1.9640719890594482, "learning_rate": 4.4696280179689034e-05, "loss": 1.3018, "step": 25690 }, { "epoch": 6.86798503474078, "grad_norm": 1.862758994102478, "learning_rate": 4.462635209565671e-05, "loss": 1.3515, "step": 25700 }, { "epoch": 6.870657402458578, "grad_norm": 1.9486703872680664, "learning_rate": 4.4556463041331086e-05, "loss": 1.2896, "step": 25710 }, { "epoch": 6.873329770176376, "grad_norm": 1.8705099821090698, "learning_rate": 4.44866130659731e-05, "loss": 1.3557, "step": 25720 }, { "epoch": 6.876002137894174, "grad_norm": 1.9109236001968384, "learning_rate": 4.441680221881587e-05, "loss": 1.3565, "step": 25730 }, { "epoch": 6.878674505611972, "grad_norm": 1.9683568477630615, "learning_rate": 4.434703054906508e-05, "loss": 1.3075, "step": 25740 }, { "epoch": 6.88134687332977, "grad_norm": 1.9412583112716675, "learning_rate": 4.4277298105898815e-05, "loss": 1.3709, "step": 25750 }, { "epoch": 6.884019241047568, "grad_norm": 1.8737123012542725, "learning_rate": 4.420760493846743e-05, "loss": 1.3315, "step": 25760 }, { "epoch": 6.886691608765366, "grad_norm": 1.9197075366973877, "learning_rate": 4.413795109589366e-05, "loss": 1.282, "step": 25770 }, { "epoch": 6.889363976483164, "grad_norm": 2.134570837020874, "learning_rate": 4.406833662727253e-05, "loss": 1.3583, "step": 25780 }, { "epoch": 6.892036344200962, "grad_norm": 2.059075355529785, "learning_rate": 4.399876158167122e-05, "loss": 1.4106, "step": 25790 }, { "epoch": 6.89470871191876, "grad_norm": 1.904106616973877, "learning_rate": 4.392922600812922e-05, "loss": 1.2815, "step": 25800 }, { "epoch": 6.897381079636558, "grad_norm": 1.9103493690490723, "learning_rate": 4.385972995565817e-05, "loss": 1.3453, "step": 25810 }, { "epoch": 6.900053447354356, "grad_norm": 1.8709713220596313, "learning_rate": 4.379027347324184e-05, "loss": 1.3045, "step": 25820 }, { "epoch": 6.902725815072154, "grad_norm": 1.9322621822357178, "learning_rate": 4.372085660983616e-05, "loss": 1.3484, "step": 25830 }, { "epoch": 6.905398182789952, "grad_norm": 2.026193380355835, "learning_rate": 4.365147941436899e-05, "loss": 1.3053, "step": 25840 }, { "epoch": 6.90807055050775, "grad_norm": 1.9680495262145996, "learning_rate": 4.358214193574046e-05, "loss": 1.3385, "step": 25850 }, { "epoch": 6.910742918225548, "grad_norm": 1.9501479864120483, "learning_rate": 4.351284422282249e-05, "loss": 1.3452, "step": 25860 }, { "epoch": 6.913415285943346, "grad_norm": 1.7781267166137695, "learning_rate": 4.3443586324459095e-05, "loss": 1.3682, "step": 25870 }, { "epoch": 6.916087653661144, "grad_norm": 2.174943685531616, "learning_rate": 4.337436828946618e-05, "loss": 1.3665, "step": 25880 }, { "epoch": 6.918760021378942, "grad_norm": 1.9405536651611328, "learning_rate": 4.330519016663163e-05, "loss": 1.4252, "step": 25890 }, { "epoch": 6.92143238909674, "grad_norm": 2.011246681213379, "learning_rate": 4.323605200471499e-05, "loss": 1.359, "step": 25900 }, { "epoch": 6.924104756814538, "grad_norm": 1.920982837677002, "learning_rate": 4.3166953852447954e-05, "loss": 1.3207, "step": 25910 }, { "epoch": 6.926777124532336, "grad_norm": 2.0366029739379883, "learning_rate": 4.309789575853368e-05, "loss": 1.3285, "step": 25920 }, { "epoch": 6.929449492250134, "grad_norm": 1.91665518283844, "learning_rate": 4.302887777164742e-05, "loss": 1.3189, "step": 25930 }, { "epoch": 6.932121859967932, "grad_norm": 1.9528197050094604, "learning_rate": 4.2959899940435866e-05, "loss": 1.2788, "step": 25940 }, { "epoch": 6.93479422768573, "grad_norm": 2.000671625137329, "learning_rate": 4.289096231351759e-05, "loss": 1.3488, "step": 25950 }, { "epoch": 6.937466595403528, "grad_norm": 2.094839096069336, "learning_rate": 4.2822064939482744e-05, "loss": 1.3504, "step": 25960 }, { "epoch": 6.940138963121326, "grad_norm": 2.1478819847106934, "learning_rate": 4.275320786689319e-05, "loss": 1.3497, "step": 25970 }, { "epoch": 6.942811330839124, "grad_norm": 2.1537442207336426, "learning_rate": 4.268439114428223e-05, "loss": 1.3983, "step": 25980 }, { "epoch": 6.945483698556921, "grad_norm": 1.9925214052200317, "learning_rate": 4.2615614820154895e-05, "loss": 1.3543, "step": 25990 }, { "epoch": 6.948156066274719, "grad_norm": 1.9166977405548096, "learning_rate": 4.254687894298764e-05, "loss": 1.3626, "step": 26000 }, { "epoch": 6.950828433992517, "grad_norm": 2.103987216949463, "learning_rate": 4.2478183561228455e-05, "loss": 1.3837, "step": 26010 }, { "epoch": 6.953500801710315, "grad_norm": 2.0219504833221436, "learning_rate": 4.24095287232968e-05, "loss": 1.4031, "step": 26020 }, { "epoch": 6.956173169428113, "grad_norm": 1.9439438581466675, "learning_rate": 4.2340914477583425e-05, "loss": 1.2934, "step": 26030 }, { "epoch": 6.958845537145911, "grad_norm": 1.9404563903808594, "learning_rate": 4.227234087245072e-05, "loss": 1.3562, "step": 26040 }, { "epoch": 6.961517904863709, "grad_norm": 2.0004537105560303, "learning_rate": 4.220380795623218e-05, "loss": 1.3195, "step": 26050 }, { "epoch": 6.964190272581507, "grad_norm": 1.8503367900848389, "learning_rate": 4.213531577723274e-05, "loss": 1.3032, "step": 26060 }, { "epoch": 6.966862640299305, "grad_norm": 2.080974817276001, "learning_rate": 4.206686438372864e-05, "loss": 1.3404, "step": 26070 }, { "epoch": 6.969535008017103, "grad_norm": 1.9929003715515137, "learning_rate": 4.199845382396732e-05, "loss": 1.3262, "step": 26080 }, { "epoch": 6.972207375734901, "grad_norm": 2.0957374572753906, "learning_rate": 4.193008414616745e-05, "loss": 1.4116, "step": 26090 }, { "epoch": 6.974879743452699, "grad_norm": 2.079894542694092, "learning_rate": 4.186175539851894e-05, "loss": 1.3242, "step": 26100 }, { "epoch": 6.977552111170497, "grad_norm": 2.051224946975708, "learning_rate": 4.17934676291827e-05, "loss": 1.3347, "step": 26110 }, { "epoch": 6.980224478888295, "grad_norm": 1.7990056276321411, "learning_rate": 4.172522088629099e-05, "loss": 1.3434, "step": 26120 }, { "epoch": 6.982896846606093, "grad_norm": 1.9451942443847656, "learning_rate": 4.1657015217946936e-05, "loss": 1.3354, "step": 26130 }, { "epoch": 6.985569214323891, "grad_norm": 2.1389999389648438, "learning_rate": 4.158885067222481e-05, "loss": 1.2951, "step": 26140 }, { "epoch": 6.988241582041689, "grad_norm": 2.0456199645996094, "learning_rate": 4.1520727297169906e-05, "loss": 1.2959, "step": 26150 }, { "epoch": 6.990913949759487, "grad_norm": 2.0102691650390625, "learning_rate": 4.145264514079847e-05, "loss": 1.3544, "step": 26160 }, { "epoch": 6.993586317477285, "grad_norm": 1.9765325784683228, "learning_rate": 4.138460425109775e-05, "loss": 1.3281, "step": 26170 }, { "epoch": 6.996258685195083, "grad_norm": 1.9393184185028076, "learning_rate": 4.131660467602579e-05, "loss": 1.2823, "step": 26180 }, { "epoch": 6.998931052912881, "grad_norm": 2.0244810581207275, "learning_rate": 4.124864646351163e-05, "loss": 1.349, "step": 26190 }, { "epoch": 7.001603420630679, "grad_norm": 1.9014036655426025, "learning_rate": 4.1180729661455106e-05, "loss": 1.2706, "step": 26200 }, { "epoch": 7.004275788348477, "grad_norm": 1.9684710502624512, "learning_rate": 4.111285431772692e-05, "loss": 1.2202, "step": 26210 }, { "epoch": 7.006948156066275, "grad_norm": 2.188354969024658, "learning_rate": 4.1045020480168394e-05, "loss": 1.2392, "step": 26220 }, { "epoch": 7.009620523784073, "grad_norm": 1.9602842330932617, "learning_rate": 4.097722819659184e-05, "loss": 1.2321, "step": 26230 }, { "epoch": 7.012292891501871, "grad_norm": 2.304478406906128, "learning_rate": 4.090947751478003e-05, "loss": 1.1661, "step": 26240 }, { "epoch": 7.014965259219669, "grad_norm": 2.202989339828491, "learning_rate": 4.0841768482486655e-05, "loss": 1.1744, "step": 26250 }, { "epoch": 7.017637626937467, "grad_norm": 2.0549769401550293, "learning_rate": 4.077410114743582e-05, "loss": 1.2174, "step": 26260 }, { "epoch": 7.020309994655265, "grad_norm": 2.0904242992401123, "learning_rate": 4.070647555732239e-05, "loss": 1.2886, "step": 26270 }, { "epoch": 7.022982362373063, "grad_norm": 1.912893533706665, "learning_rate": 4.0638891759811746e-05, "loss": 1.1457, "step": 26280 }, { "epoch": 7.025654730090861, "grad_norm": 2.1165356636047363, "learning_rate": 4.0571349802539895e-05, "loss": 1.3009, "step": 26290 }, { "epoch": 7.028327097808658, "grad_norm": 2.0290679931640625, "learning_rate": 4.050384973311315e-05, "loss": 1.2149, "step": 26300 }, { "epoch": 7.030999465526456, "grad_norm": 2.025852680206299, "learning_rate": 4.0436391599108606e-05, "loss": 1.1835, "step": 26310 }, { "epoch": 7.033671833244254, "grad_norm": 2.161137342453003, "learning_rate": 4.03689754480735e-05, "loss": 1.2131, "step": 26320 }, { "epoch": 7.036344200962052, "grad_norm": 2.2476398944854736, "learning_rate": 4.030160132752565e-05, "loss": 1.1689, "step": 26330 }, { "epoch": 7.03901656867985, "grad_norm": 2.1565744876861572, "learning_rate": 4.023426928495322e-05, "loss": 1.2858, "step": 26340 }, { "epoch": 7.041688936397648, "grad_norm": 2.135911703109741, "learning_rate": 4.0166979367814685e-05, "loss": 1.1995, "step": 26350 }, { "epoch": 7.044361304115446, "grad_norm": 1.8626652956008911, "learning_rate": 4.009973162353888e-05, "loss": 1.1992, "step": 26360 }, { "epoch": 7.047033671833244, "grad_norm": 2.130089044570923, "learning_rate": 4.003252609952484e-05, "loss": 1.2277, "step": 26370 }, { "epoch": 7.049706039551042, "grad_norm": 2.1491618156433105, "learning_rate": 3.9965362843141886e-05, "loss": 1.2943, "step": 26380 }, { "epoch": 7.05237840726884, "grad_norm": 2.036555528640747, "learning_rate": 3.989824190172955e-05, "loss": 1.1872, "step": 26390 }, { "epoch": 7.055050774986638, "grad_norm": 2.0331013202667236, "learning_rate": 3.9831163322597544e-05, "loss": 1.1884, "step": 26400 }, { "epoch": 7.057723142704436, "grad_norm": 2.068228006362915, "learning_rate": 3.97641271530257e-05, "loss": 1.2011, "step": 26410 }, { "epoch": 7.060395510422234, "grad_norm": 2.1616146564483643, "learning_rate": 3.9697133440264e-05, "loss": 1.1771, "step": 26420 }, { "epoch": 7.063067878140032, "grad_norm": 2.004011392593384, "learning_rate": 3.9630182231532366e-05, "loss": 1.2046, "step": 26430 }, { "epoch": 7.06574024585783, "grad_norm": 2.036231279373169, "learning_rate": 3.956327357402101e-05, "loss": 1.2836, "step": 26440 }, { "epoch": 7.068412613575628, "grad_norm": 2.101543426513672, "learning_rate": 3.949640751488989e-05, "loss": 1.2117, "step": 26450 }, { "epoch": 7.071084981293426, "grad_norm": 1.9944366216659546, "learning_rate": 3.942958410126909e-05, "loss": 1.1968, "step": 26460 }, { "epoch": 7.073757349011224, "grad_norm": 1.9979811906814575, "learning_rate": 3.9362803380258606e-05, "loss": 1.1528, "step": 26470 }, { "epoch": 7.076429716729022, "grad_norm": 2.1518075466156006, "learning_rate": 3.929606539892833e-05, "loss": 1.2141, "step": 26480 }, { "epoch": 7.07910208444682, "grad_norm": 2.061533212661743, "learning_rate": 3.922937020431803e-05, "loss": 1.2192, "step": 26490 }, { "epoch": 7.081774452164618, "grad_norm": 2.0257511138916016, "learning_rate": 3.916271784343737e-05, "loss": 1.1701, "step": 26500 }, { "epoch": 7.084446819882416, "grad_norm": 2.084320545196533, "learning_rate": 3.9096108363265694e-05, "loss": 1.2416, "step": 26510 }, { "epoch": 7.087119187600214, "grad_norm": 2.0941121578216553, "learning_rate": 3.9029541810752244e-05, "loss": 1.208, "step": 26520 }, { "epoch": 7.089791555318012, "grad_norm": 2.087214708328247, "learning_rate": 3.896301823281595e-05, "loss": 1.1721, "step": 26530 }, { "epoch": 7.09246392303581, "grad_norm": 2.1198551654815674, "learning_rate": 3.889653767634549e-05, "loss": 1.2566, "step": 26540 }, { "epoch": 7.095136290753608, "grad_norm": 1.9783620834350586, "learning_rate": 3.88301001881992e-05, "loss": 1.2379, "step": 26550 }, { "epoch": 7.097808658471406, "grad_norm": 2.0319607257843018, "learning_rate": 3.8763705815204976e-05, "loss": 1.224, "step": 26560 }, { "epoch": 7.100481026189204, "grad_norm": 2.227588415145874, "learning_rate": 3.869735460416051e-05, "loss": 1.2774, "step": 26570 }, { "epoch": 7.103153393907002, "grad_norm": 2.153714895248413, "learning_rate": 3.8631046601832876e-05, "loss": 1.2179, "step": 26580 }, { "epoch": 7.1058257616248, "grad_norm": 2.2052743434906006, "learning_rate": 3.856478185495882e-05, "loss": 1.2417, "step": 26590 }, { "epoch": 7.108498129342598, "grad_norm": 2.112349271774292, "learning_rate": 3.8498560410244546e-05, "loss": 1.2785, "step": 26600 }, { "epoch": 7.111170497060396, "grad_norm": 2.0284695625305176, "learning_rate": 3.84323823143658e-05, "loss": 1.2482, "step": 26610 }, { "epoch": 7.113842864778193, "grad_norm": 2.106285810470581, "learning_rate": 3.83662476139676e-05, "loss": 1.274, "step": 26620 }, { "epoch": 7.116515232495991, "grad_norm": 2.0881786346435547, "learning_rate": 3.8300156355664665e-05, "loss": 1.2292, "step": 26630 }, { "epoch": 7.119187600213789, "grad_norm": 2.2239317893981934, "learning_rate": 3.823410858604076e-05, "loss": 1.212, "step": 26640 }, { "epoch": 7.121859967931587, "grad_norm": 2.2325310707092285, "learning_rate": 3.8168104351649335e-05, "loss": 1.2368, "step": 26650 }, { "epoch": 7.124532335649385, "grad_norm": 2.174346685409546, "learning_rate": 3.810214369901287e-05, "loss": 1.1748, "step": 26660 }, { "epoch": 7.127204703367183, "grad_norm": 2.0972397327423096, "learning_rate": 3.803622667462328e-05, "loss": 1.1974, "step": 26670 }, { "epoch": 7.129877071084981, "grad_norm": 2.162646770477295, "learning_rate": 3.797035332494169e-05, "loss": 1.1844, "step": 26680 }, { "epoch": 7.132549438802779, "grad_norm": 2.159243583679199, "learning_rate": 3.7904523696398484e-05, "loss": 1.202, "step": 26690 }, { "epoch": 7.135221806520577, "grad_norm": 2.0711281299591064, "learning_rate": 3.7838737835393124e-05, "loss": 1.2173, "step": 26700 }, { "epoch": 7.137894174238375, "grad_norm": 2.230268955230713, "learning_rate": 3.777299578829431e-05, "loss": 1.2559, "step": 26710 }, { "epoch": 7.140566541956173, "grad_norm": 1.9708648920059204, "learning_rate": 3.770729760143985e-05, "loss": 1.2635, "step": 26720 }, { "epoch": 7.143238909673971, "grad_norm": 2.09283447265625, "learning_rate": 3.7641643321136624e-05, "loss": 1.2281, "step": 26730 }, { "epoch": 7.145911277391769, "grad_norm": 2.0920164585113525, "learning_rate": 3.757603299366059e-05, "loss": 1.2649, "step": 26740 }, { "epoch": 7.148583645109567, "grad_norm": 2.066572666168213, "learning_rate": 3.7510466665256614e-05, "loss": 1.2286, "step": 26750 }, { "epoch": 7.151256012827365, "grad_norm": 1.872041940689087, "learning_rate": 3.744494438213877e-05, "loss": 1.2269, "step": 26760 }, { "epoch": 7.153928380545163, "grad_norm": 2.0074431896209717, "learning_rate": 3.737946619048985e-05, "loss": 1.2338, "step": 26770 }, { "epoch": 7.156600748262961, "grad_norm": 2.1136064529418945, "learning_rate": 3.731403213646171e-05, "loss": 1.1935, "step": 26780 }, { "epoch": 7.159273115980759, "grad_norm": 2.053072452545166, "learning_rate": 3.724864226617506e-05, "loss": 1.221, "step": 26790 }, { "epoch": 7.161945483698557, "grad_norm": 2.150113344192505, "learning_rate": 3.718329662571946e-05, "loss": 1.1816, "step": 26800 }, { "epoch": 7.164617851416355, "grad_norm": 2.078615427017212, "learning_rate": 3.711799526115333e-05, "loss": 1.2467, "step": 26810 }, { "epoch": 7.167290219134153, "grad_norm": 2.1886465549468994, "learning_rate": 3.705273821850385e-05, "loss": 1.2066, "step": 26820 }, { "epoch": 7.169962586851951, "grad_norm": 2.0835061073303223, "learning_rate": 3.698752554376689e-05, "loss": 1.2217, "step": 26830 }, { "epoch": 7.172634954569749, "grad_norm": 2.081408977508545, "learning_rate": 3.6922357282907236e-05, "loss": 1.2074, "step": 26840 }, { "epoch": 7.175307322287547, "grad_norm": 1.9500755071640015, "learning_rate": 3.685723348185818e-05, "loss": 1.2378, "step": 26850 }, { "epoch": 7.177979690005345, "grad_norm": 2.18097186088562, "learning_rate": 3.679215418652177e-05, "loss": 1.2477, "step": 26860 }, { "epoch": 7.180652057723143, "grad_norm": 2.0728347301483154, "learning_rate": 3.672711944276865e-05, "loss": 1.1835, "step": 26870 }, { "epoch": 7.183324425440941, "grad_norm": 2.0779342651367188, "learning_rate": 3.666212929643812e-05, "loss": 1.2418, "step": 26880 }, { "epoch": 7.185996793158739, "grad_norm": 2.126870632171631, "learning_rate": 3.659718379333801e-05, "loss": 1.1285, "step": 26890 }, { "epoch": 7.188669160876537, "grad_norm": 2.122117280960083, "learning_rate": 3.653228297924461e-05, "loss": 1.1866, "step": 26900 }, { "epoch": 7.191341528594335, "grad_norm": 2.13063645362854, "learning_rate": 3.6467426899902826e-05, "loss": 1.1705, "step": 26910 }, { "epoch": 7.194013896312133, "grad_norm": 2.143397092819214, "learning_rate": 3.640261560102598e-05, "loss": 1.249, "step": 26920 }, { "epoch": 7.196686264029931, "grad_norm": 2.073452949523926, "learning_rate": 3.633784912829586e-05, "loss": 1.2529, "step": 26930 }, { "epoch": 7.199358631747728, "grad_norm": 2.1552131175994873, "learning_rate": 3.627312752736256e-05, "loss": 1.2631, "step": 26940 }, { "epoch": 7.202030999465526, "grad_norm": 2.0443172454833984, "learning_rate": 3.620845084384474e-05, "loss": 1.2495, "step": 26950 }, { "epoch": 7.204703367183324, "grad_norm": 1.8864065408706665, "learning_rate": 3.614381912332916e-05, "loss": 1.2517, "step": 26960 }, { "epoch": 7.207375734901122, "grad_norm": 2.0248312950134277, "learning_rate": 3.6079232411371125e-05, "loss": 1.2414, "step": 26970 }, { "epoch": 7.21004810261892, "grad_norm": 2.1586263179779053, "learning_rate": 3.601469075349402e-05, "loss": 1.2887, "step": 26980 }, { "epoch": 7.212720470336718, "grad_norm": 2.0618584156036377, "learning_rate": 3.595019419518958e-05, "loss": 1.179, "step": 26990 }, { "epoch": 7.215392838054516, "grad_norm": 2.2867074012756348, "learning_rate": 3.588574278191773e-05, "loss": 1.1725, "step": 27000 }, { "epoch": 7.218065205772314, "grad_norm": 2.165614366531372, "learning_rate": 3.58213365591066e-05, "loss": 1.2002, "step": 27010 }, { "epoch": 7.220737573490112, "grad_norm": 2.111840009689331, "learning_rate": 3.575697557215236e-05, "loss": 1.2551, "step": 27020 }, { "epoch": 7.22340994120791, "grad_norm": 2.1707651615142822, "learning_rate": 3.5692659866419484e-05, "loss": 1.3149, "step": 27030 }, { "epoch": 7.226082308925708, "grad_norm": 2.069751501083374, "learning_rate": 3.5628389487240344e-05, "loss": 1.2253, "step": 27040 }, { "epoch": 7.228754676643506, "grad_norm": 2.1931421756744385, "learning_rate": 3.556416447991546e-05, "loss": 1.2033, "step": 27050 }, { "epoch": 7.231427044361304, "grad_norm": 2.147146463394165, "learning_rate": 3.5499984889713364e-05, "loss": 1.2643, "step": 27060 }, { "epoch": 7.234099412079102, "grad_norm": 2.0687849521636963, "learning_rate": 3.543585076187057e-05, "loss": 1.2699, "step": 27070 }, { "epoch": 7.2367717797969, "grad_norm": 2.0323586463928223, "learning_rate": 3.537176214159156e-05, "loss": 1.1776, "step": 27080 }, { "epoch": 7.239444147514698, "grad_norm": 2.0149714946746826, "learning_rate": 3.530771907404867e-05, "loss": 1.2115, "step": 27090 }, { "epoch": 7.242116515232496, "grad_norm": 2.117044448852539, "learning_rate": 3.524372160438222e-05, "loss": 1.2277, "step": 27100 }, { "epoch": 7.244788882950294, "grad_norm": 2.203836441040039, "learning_rate": 3.517976977770033e-05, "loss": 1.2374, "step": 27110 }, { "epoch": 7.247461250668092, "grad_norm": 2.0977160930633545, "learning_rate": 3.511586363907902e-05, "loss": 1.2262, "step": 27120 }, { "epoch": 7.25013361838589, "grad_norm": 2.137202501296997, "learning_rate": 3.5052003233562e-05, "loss": 1.2438, "step": 27130 }, { "epoch": 7.252805986103688, "grad_norm": 2.1474907398223877, "learning_rate": 3.49881886061609e-05, "loss": 1.2149, "step": 27140 }, { "epoch": 7.255478353821486, "grad_norm": 2.2300004959106445, "learning_rate": 3.492441980185484e-05, "loss": 1.227, "step": 27150 }, { "epoch": 7.258150721539284, "grad_norm": 2.0743207931518555, "learning_rate": 3.486069686559095e-05, "loss": 1.2462, "step": 27160 }, { "epoch": 7.260823089257082, "grad_norm": 2.0569257736206055, "learning_rate": 3.479701984228375e-05, "loss": 1.2546, "step": 27170 }, { "epoch": 7.26349545697488, "grad_norm": 2.0144920349121094, "learning_rate": 3.473338877681558e-05, "loss": 1.3128, "step": 27180 }, { "epoch": 7.266167824692678, "grad_norm": 2.3156790733337402, "learning_rate": 3.4669803714036306e-05, "loss": 1.2507, "step": 27190 }, { "epoch": 7.268840192410476, "grad_norm": 2.3155198097229004, "learning_rate": 3.4606264698763404e-05, "loss": 1.2753, "step": 27200 }, { "epoch": 7.271512560128274, "grad_norm": 2.1147971153259277, "learning_rate": 3.454277177578187e-05, "loss": 1.2602, "step": 27210 }, { "epoch": 7.274184927846072, "grad_norm": 2.0096116065979004, "learning_rate": 3.447932498984425e-05, "loss": 1.1753, "step": 27220 }, { "epoch": 7.27685729556387, "grad_norm": 2.1410534381866455, "learning_rate": 3.4415924385670495e-05, "loss": 1.2732, "step": 27230 }, { "epoch": 7.279529663281668, "grad_norm": 2.093339204788208, "learning_rate": 3.435257000794807e-05, "loss": 1.1837, "step": 27240 }, { "epoch": 7.282202030999466, "grad_norm": 2.1783902645111084, "learning_rate": 3.428926190133185e-05, "loss": 1.255, "step": 27250 }, { "epoch": 7.284874398717264, "grad_norm": 1.9138840436935425, "learning_rate": 3.4226000110444064e-05, "loss": 1.2411, "step": 27260 }, { "epoch": 7.287546766435061, "grad_norm": 1.9429867267608643, "learning_rate": 3.416278467987439e-05, "loss": 1.186, "step": 27270 }, { "epoch": 7.290219134152859, "grad_norm": 2.091928005218506, "learning_rate": 3.409961565417963e-05, "loss": 1.2449, "step": 27280 }, { "epoch": 7.292891501870657, "grad_norm": 1.927122712135315, "learning_rate": 3.403649307788414e-05, "loss": 1.2616, "step": 27290 }, { "epoch": 7.295563869588455, "grad_norm": 2.201514482498169, "learning_rate": 3.39734169954793e-05, "loss": 1.2214, "step": 27300 }, { "epoch": 7.298236237306253, "grad_norm": 2.097895622253418, "learning_rate": 3.3910387451423876e-05, "loss": 1.2455, "step": 27310 }, { "epoch": 7.300908605024051, "grad_norm": 2.2148094177246094, "learning_rate": 3.3847404490143755e-05, "loss": 1.2837, "step": 27320 }, { "epoch": 7.303580972741849, "grad_norm": 2.0491528511047363, "learning_rate": 3.378446815603205e-05, "loss": 1.3052, "step": 27330 }, { "epoch": 7.306253340459647, "grad_norm": 2.043707847595215, "learning_rate": 3.372157849344887e-05, "loss": 1.1766, "step": 27340 }, { "epoch": 7.308925708177445, "grad_norm": 2.0265252590179443, "learning_rate": 3.3658735546721664e-05, "loss": 1.3366, "step": 27350 }, { "epoch": 7.311598075895243, "grad_norm": 2.085498571395874, "learning_rate": 3.359593936014469e-05, "loss": 1.1958, "step": 27360 }, { "epoch": 7.314270443613041, "grad_norm": 2.19191837310791, "learning_rate": 3.353318997797951e-05, "loss": 1.2541, "step": 27370 }, { "epoch": 7.316942811330839, "grad_norm": 2.31546688079834, "learning_rate": 3.3470487444454446e-05, "loss": 1.2631, "step": 27380 }, { "epoch": 7.319615179048637, "grad_norm": 1.9966678619384766, "learning_rate": 3.340783180376498e-05, "loss": 1.2182, "step": 27390 }, { "epoch": 7.322287546766435, "grad_norm": 2.0418057441711426, "learning_rate": 3.334522310007345e-05, "loss": 1.1623, "step": 27400 }, { "epoch": 7.324959914484233, "grad_norm": 2.1081957817077637, "learning_rate": 3.328266137750918e-05, "loss": 1.2262, "step": 27410 }, { "epoch": 7.327632282202031, "grad_norm": 2.1674318313598633, "learning_rate": 3.3220146680168287e-05, "loss": 1.288, "step": 27420 }, { "epoch": 7.330304649919829, "grad_norm": 2.1051673889160156, "learning_rate": 3.31576790521138e-05, "loss": 1.2202, "step": 27430 }, { "epoch": 7.332977017637627, "grad_norm": 2.1748886108398438, "learning_rate": 3.309525853737557e-05, "loss": 1.3121, "step": 27440 }, { "epoch": 7.335649385355425, "grad_norm": 2.2035019397735596, "learning_rate": 3.303288517995026e-05, "loss": 1.1804, "step": 27450 }, { "epoch": 7.338321753073223, "grad_norm": 2.1828949451446533, "learning_rate": 3.297055902380129e-05, "loss": 1.2538, "step": 27460 }, { "epoch": 7.340994120791021, "grad_norm": 1.8600070476531982, "learning_rate": 3.290828011285868e-05, "loss": 1.2314, "step": 27470 }, { "epoch": 7.343666488508819, "grad_norm": 2.1104629039764404, "learning_rate": 3.28460484910194e-05, "loss": 1.2091, "step": 27480 }, { "epoch": 7.346338856226617, "grad_norm": 1.9043546915054321, "learning_rate": 3.278386420214685e-05, "loss": 1.1755, "step": 27490 }, { "epoch": 7.349011223944415, "grad_norm": 2.1234591007232666, "learning_rate": 3.272172729007121e-05, "loss": 1.2054, "step": 27500 }, { "epoch": 7.351683591662213, "grad_norm": 2.143435478210449, "learning_rate": 3.265963779858922e-05, "loss": 1.3058, "step": 27510 }, { "epoch": 7.354355959380011, "grad_norm": 2.012240171432495, "learning_rate": 3.2597595771464185e-05, "loss": 1.1978, "step": 27520 }, { "epoch": 7.357028327097809, "grad_norm": 2.1521317958831787, "learning_rate": 3.253560125242597e-05, "loss": 1.1987, "step": 27530 }, { "epoch": 7.359700694815607, "grad_norm": 2.0875651836395264, "learning_rate": 3.2473654285171004e-05, "loss": 1.2867, "step": 27540 }, { "epoch": 7.362373062533405, "grad_norm": 2.0559356212615967, "learning_rate": 3.241175491336206e-05, "loss": 1.2875, "step": 27550 }, { "epoch": 7.365045430251203, "grad_norm": 2.1649205684661865, "learning_rate": 3.234990318062855e-05, "loss": 1.2321, "step": 27560 }, { "epoch": 7.367717797969001, "grad_norm": 2.1024770736694336, "learning_rate": 3.228809913056613e-05, "loss": 1.2764, "step": 27570 }, { "epoch": 7.370390165686798, "grad_norm": 2.1040284633636475, "learning_rate": 3.222634280673696e-05, "loss": 1.2064, "step": 27580 }, { "epoch": 7.373062533404596, "grad_norm": 2.161027431488037, "learning_rate": 3.2164634252669535e-05, "loss": 1.2076, "step": 27590 }, { "epoch": 7.375734901122394, "grad_norm": 1.9908994436264038, "learning_rate": 3.210297351185867e-05, "loss": 1.2856, "step": 27600 }, { "epoch": 7.378407268840192, "grad_norm": 2.054513454437256, "learning_rate": 3.2041360627765504e-05, "loss": 1.2207, "step": 27610 }, { "epoch": 7.38107963655799, "grad_norm": 2.0925278663635254, "learning_rate": 3.197979564381738e-05, "loss": 1.2605, "step": 27620 }, { "epoch": 7.383752004275788, "grad_norm": 2.0387942790985107, "learning_rate": 3.191827860340794e-05, "loss": 1.1624, "step": 27630 }, { "epoch": 7.386424371993586, "grad_norm": 1.9733607769012451, "learning_rate": 3.1856809549897e-05, "loss": 1.2643, "step": 27640 }, { "epoch": 7.389096739711384, "grad_norm": 2.050025463104248, "learning_rate": 3.1795388526610633e-05, "loss": 1.2614, "step": 27650 }, { "epoch": 7.391769107429182, "grad_norm": 2.247288227081299, "learning_rate": 3.1734015576840895e-05, "loss": 1.16, "step": 27660 }, { "epoch": 7.39444147514698, "grad_norm": 2.1705844402313232, "learning_rate": 3.167269074384616e-05, "loss": 1.2709, "step": 27670 }, { "epoch": 7.397113842864778, "grad_norm": 2.0863277912139893, "learning_rate": 3.1611414070850666e-05, "loss": 1.2264, "step": 27680 }, { "epoch": 7.399786210582576, "grad_norm": 2.080504894256592, "learning_rate": 3.155018560104496e-05, "loss": 1.1547, "step": 27690 }, { "epoch": 7.402458578300374, "grad_norm": 2.1259548664093018, "learning_rate": 3.148900537758537e-05, "loss": 1.2138, "step": 27700 }, { "epoch": 7.405130946018172, "grad_norm": 2.1991636753082275, "learning_rate": 3.142787344359436e-05, "loss": 1.2567, "step": 27710 }, { "epoch": 7.40780331373597, "grad_norm": 2.322218418121338, "learning_rate": 3.136678984216033e-05, "loss": 1.323, "step": 27720 }, { "epoch": 7.410475681453768, "grad_norm": 2.108767032623291, "learning_rate": 3.1305754616337624e-05, "loss": 1.2391, "step": 27730 }, { "epoch": 7.413148049171566, "grad_norm": 2.124277353286743, "learning_rate": 3.124476780914638e-05, "loss": 1.2679, "step": 27740 }, { "epoch": 7.415820416889364, "grad_norm": 2.0178637504577637, "learning_rate": 3.118382946357282e-05, "loss": 1.2728, "step": 27750 }, { "epoch": 7.418492784607162, "grad_norm": 2.0202298164367676, "learning_rate": 3.1122939622568816e-05, "loss": 1.242, "step": 27760 }, { "epoch": 7.42116515232496, "grad_norm": 2.028144598007202, "learning_rate": 3.1062098329052124e-05, "loss": 1.2401, "step": 27770 }, { "epoch": 7.423837520042758, "grad_norm": 1.9863171577453613, "learning_rate": 3.100130562590631e-05, "loss": 1.2009, "step": 27780 }, { "epoch": 7.426509887760556, "grad_norm": 2.0276663303375244, "learning_rate": 3.094056155598063e-05, "loss": 1.2562, "step": 27790 }, { "epoch": 7.429182255478354, "grad_norm": 2.038090467453003, "learning_rate": 3.0879866162090145e-05, "loss": 1.2114, "step": 27800 }, { "epoch": 7.431854623196152, "grad_norm": 1.9752060174942017, "learning_rate": 3.081921948701548e-05, "loss": 1.2638, "step": 27810 }, { "epoch": 7.43452699091395, "grad_norm": 2.0179131031036377, "learning_rate": 3.075862157350304e-05, "loss": 1.2521, "step": 27820 }, { "epoch": 7.437199358631748, "grad_norm": 2.139413356781006, "learning_rate": 3.06980724642648e-05, "loss": 1.192, "step": 27830 }, { "epoch": 7.439871726349546, "grad_norm": 2.0572116374969482, "learning_rate": 3.0637572201978384e-05, "loss": 1.2547, "step": 27840 }, { "epoch": 7.442544094067344, "grad_norm": 2.1968374252319336, "learning_rate": 3.057712082928692e-05, "loss": 1.2545, "step": 27850 }, { "epoch": 7.445216461785142, "grad_norm": 2.1138153076171875, "learning_rate": 3.051671838879917e-05, "loss": 1.2324, "step": 27860 }, { "epoch": 7.44788882950294, "grad_norm": 1.9967312812805176, "learning_rate": 3.0456364923089232e-05, "loss": 1.224, "step": 27870 }, { "epoch": 7.450561197220738, "grad_norm": 2.2048964500427246, "learning_rate": 3.039606047469694e-05, "loss": 1.2165, "step": 27880 }, { "epoch": 7.453233564938536, "grad_norm": 2.059983730316162, "learning_rate": 3.0335805086127332e-05, "loss": 1.2961, "step": 27890 }, { "epoch": 7.455905932656334, "grad_norm": 1.978102445602417, "learning_rate": 3.0275598799851023e-05, "loss": 1.2161, "step": 27900 }, { "epoch": 7.458578300374132, "grad_norm": 2.2445929050445557, "learning_rate": 3.0215441658303955e-05, "loss": 1.3037, "step": 27910 }, { "epoch": 7.461250668091929, "grad_norm": 2.0911028385162354, "learning_rate": 3.0155333703887455e-05, "loss": 1.2744, "step": 27920 }, { "epoch": 7.463923035809727, "grad_norm": 2.2235841751098633, "learning_rate": 3.009527497896817e-05, "loss": 1.2155, "step": 27930 }, { "epoch": 7.466595403527525, "grad_norm": 2.1373989582061768, "learning_rate": 3.0035265525878065e-05, "loss": 1.1749, "step": 27940 }, { "epoch": 7.469267771245323, "grad_norm": 2.1481776237487793, "learning_rate": 2.997530538691431e-05, "loss": 1.2451, "step": 27950 }, { "epoch": 7.471940138963121, "grad_norm": 2.2615137100219727, "learning_rate": 2.99153946043394e-05, "loss": 1.2551, "step": 27960 }, { "epoch": 7.474612506680919, "grad_norm": 2.2867887020111084, "learning_rate": 2.9855533220380994e-05, "loss": 1.2871, "step": 27970 }, { "epoch": 7.477284874398717, "grad_norm": 2.131208658218384, "learning_rate": 2.9795721277231957e-05, "loss": 1.2874, "step": 27980 }, { "epoch": 7.479957242116515, "grad_norm": 2.4127533435821533, "learning_rate": 2.9735958817050335e-05, "loss": 1.2257, "step": 27990 }, { "epoch": 7.482629609834313, "grad_norm": 2.0967767238616943, "learning_rate": 2.967624588195914e-05, "loss": 1.2808, "step": 28000 }, { "epoch": 7.485301977552111, "grad_norm": 2.086289405822754, "learning_rate": 2.9616582514046743e-05, "loss": 1.1906, "step": 28010 }, { "epoch": 7.487974345269909, "grad_norm": 1.8830486536026, "learning_rate": 2.955696875536631e-05, "loss": 1.2248, "step": 28020 }, { "epoch": 7.490646712987707, "grad_norm": 2.2826850414276123, "learning_rate": 2.949740464793621e-05, "loss": 1.2467, "step": 28030 }, { "epoch": 7.493319080705505, "grad_norm": 2.0717697143554688, "learning_rate": 2.9437890233739762e-05, "loss": 1.2972, "step": 28040 }, { "epoch": 7.495991448423303, "grad_norm": 2.160871744155884, "learning_rate": 2.9378425554725274e-05, "loss": 1.1881, "step": 28050 }, { "epoch": 7.498663816141101, "grad_norm": 2.1282119750976562, "learning_rate": 2.9319010652805913e-05, "loss": 1.2222, "step": 28060 }, { "epoch": 7.501336183858899, "grad_norm": 2.094926118850708, "learning_rate": 2.925964556985995e-05, "loss": 1.2999, "step": 28070 }, { "epoch": 7.504008551576697, "grad_norm": 2.021768093109131, "learning_rate": 2.920033034773031e-05, "loss": 1.2802, "step": 28080 }, { "epoch": 7.506680919294495, "grad_norm": 2.03518009185791, "learning_rate": 2.914106502822499e-05, "loss": 1.2134, "step": 28090 }, { "epoch": 7.509353287012293, "grad_norm": 2.202364206314087, "learning_rate": 2.9081849653116644e-05, "loss": 1.2066, "step": 28100 }, { "epoch": 7.512025654730091, "grad_norm": 2.1216659545898438, "learning_rate": 2.902268426414282e-05, "loss": 1.1717, "step": 28110 }, { "epoch": 7.514698022447889, "grad_norm": 2.0889415740966797, "learning_rate": 2.896356890300579e-05, "loss": 1.2003, "step": 28120 }, { "epoch": 7.517370390165687, "grad_norm": 2.193819284439087, "learning_rate": 2.8904503611372612e-05, "loss": 1.2026, "step": 28130 }, { "epoch": 7.520042757883485, "grad_norm": 2.163248062133789, "learning_rate": 2.8845488430874944e-05, "loss": 1.2555, "step": 28140 }, { "epoch": 7.522715125601283, "grad_norm": 2.0415689945220947, "learning_rate": 2.878652340310932e-05, "loss": 1.2142, "step": 28150 }, { "epoch": 7.525387493319081, "grad_norm": 1.9740504026412964, "learning_rate": 2.87276085696367e-05, "loss": 1.23, "step": 28160 }, { "epoch": 7.528059861036879, "grad_norm": 2.0317816734313965, "learning_rate": 2.8668743971982816e-05, "loss": 1.2271, "step": 28170 }, { "epoch": 7.530732228754677, "grad_norm": 2.0579888820648193, "learning_rate": 2.8609929651637925e-05, "loss": 1.2266, "step": 28180 }, { "epoch": 7.533404596472475, "grad_norm": 2.053981065750122, "learning_rate": 2.855116565005689e-05, "loss": 1.1658, "step": 28190 }, { "epoch": 7.536076964190273, "grad_norm": 2.0381927490234375, "learning_rate": 2.8492452008659086e-05, "loss": 1.2244, "step": 28200 }, { "epoch": 7.53874933190807, "grad_norm": 2.3309037685394287, "learning_rate": 2.8433788768828363e-05, "loss": 1.2272, "step": 28210 }, { "epoch": 7.541421699625868, "grad_norm": 1.9979252815246582, "learning_rate": 2.8375175971913072e-05, "loss": 1.1987, "step": 28220 }, { "epoch": 7.544094067343666, "grad_norm": 2.2014684677124023, "learning_rate": 2.8316613659226022e-05, "loss": 1.202, "step": 28230 }, { "epoch": 7.546766435061464, "grad_norm": 2.075988531112671, "learning_rate": 2.825810187204443e-05, "loss": 1.239, "step": 28240 }, { "epoch": 7.549438802779262, "grad_norm": 2.1374521255493164, "learning_rate": 2.8199640651609883e-05, "loss": 1.2426, "step": 28250 }, { "epoch": 7.55211117049706, "grad_norm": 2.334420680999756, "learning_rate": 2.8141230039128395e-05, "loss": 1.2005, "step": 28260 }, { "epoch": 7.554783538214858, "grad_norm": 2.030236005783081, "learning_rate": 2.808287007577014e-05, "loss": 1.2058, "step": 28270 }, { "epoch": 7.557455905932656, "grad_norm": 2.1864736080169678, "learning_rate": 2.802456080266984e-05, "loss": 1.2464, "step": 28280 }, { "epoch": 7.560128273650454, "grad_norm": 2.1882996559143066, "learning_rate": 2.796630226092627e-05, "loss": 1.272, "step": 28290 }, { "epoch": 7.562800641368252, "grad_norm": 2.258795976638794, "learning_rate": 2.7908094491602556e-05, "loss": 1.2588, "step": 28300 }, { "epoch": 7.56547300908605, "grad_norm": 2.1145286560058594, "learning_rate": 2.784993753572601e-05, "loss": 1.2809, "step": 28310 }, { "epoch": 7.568145376803848, "grad_norm": 2.2491462230682373, "learning_rate": 2.779183143428814e-05, "loss": 1.2408, "step": 28320 }, { "epoch": 7.570817744521646, "grad_norm": 2.005479097366333, "learning_rate": 2.773377622824459e-05, "loss": 1.2191, "step": 28330 }, { "epoch": 7.573490112239444, "grad_norm": 2.2879302501678467, "learning_rate": 2.7675771958515196e-05, "loss": 1.284, "step": 28340 }, { "epoch": 7.576162479957242, "grad_norm": 2.1377029418945312, "learning_rate": 2.7617818665983762e-05, "loss": 1.1812, "step": 28350 }, { "epoch": 7.57883484767504, "grad_norm": 2.1379306316375732, "learning_rate": 2.7559916391498276e-05, "loss": 1.3124, "step": 28360 }, { "epoch": 7.581507215392838, "grad_norm": 2.143080472946167, "learning_rate": 2.7502065175870718e-05, "loss": 1.184, "step": 28370 }, { "epoch": 7.584179583110636, "grad_norm": 2.1253340244293213, "learning_rate": 2.7444265059877106e-05, "loss": 1.1638, "step": 28380 }, { "epoch": 7.586851950828434, "grad_norm": 2.110478162765503, "learning_rate": 2.738651608425744e-05, "loss": 1.183, "step": 28390 }, { "epoch": 7.589524318546232, "grad_norm": 1.9841487407684326, "learning_rate": 2.7328818289715576e-05, "loss": 1.2086, "step": 28400 }, { "epoch": 7.59219668626403, "grad_norm": 2.1448872089385986, "learning_rate": 2.7271171716919496e-05, "loss": 1.3087, "step": 28410 }, { "epoch": 7.594869053981828, "grad_norm": 2.0087194442749023, "learning_rate": 2.7213576406500873e-05, "loss": 1.2426, "step": 28420 }, { "epoch": 7.597541421699626, "grad_norm": 2.205631971359253, "learning_rate": 2.7156032399055375e-05, "loss": 1.2845, "step": 28430 }, { "epoch": 7.600213789417424, "grad_norm": 2.080989122390747, "learning_rate": 2.7098539735142448e-05, "loss": 1.2443, "step": 28440 }, { "epoch": 7.602886157135222, "grad_norm": 2.1031556129455566, "learning_rate": 2.704109845528542e-05, "loss": 1.2825, "step": 28450 }, { "epoch": 7.60555852485302, "grad_norm": 2.1243724822998047, "learning_rate": 2.6983708599971257e-05, "loss": 1.2246, "step": 28460 }, { "epoch": 7.608230892570818, "grad_norm": 2.1016321182250977, "learning_rate": 2.6926370209650898e-05, "loss": 1.2626, "step": 28470 }, { "epoch": 7.610903260288616, "grad_norm": 2.1604385375976562, "learning_rate": 2.6869083324738764e-05, "loss": 1.2269, "step": 28480 }, { "epoch": 7.613575628006414, "grad_norm": 2.0251622200012207, "learning_rate": 2.6811847985613204e-05, "loss": 1.1951, "step": 28490 }, { "epoch": 7.616247995724212, "grad_norm": 2.0509791374206543, "learning_rate": 2.6754664232616045e-05, "loss": 1.2121, "step": 28500 }, { "epoch": 7.61892036344201, "grad_norm": 2.0376598834991455, "learning_rate": 2.669753210605286e-05, "loss": 1.1605, "step": 28510 }, { "epoch": 7.621592731159808, "grad_norm": 2.168583631515503, "learning_rate": 2.66404516461928e-05, "loss": 1.2685, "step": 28520 }, { "epoch": 7.624265098877606, "grad_norm": 2.124511480331421, "learning_rate": 2.6583422893268638e-05, "loss": 1.2421, "step": 28530 }, { "epoch": 7.626937466595404, "grad_norm": 2.171947479248047, "learning_rate": 2.6526445887476613e-05, "loss": 1.288, "step": 28540 }, { "epoch": 7.629609834313202, "grad_norm": 2.1840152740478516, "learning_rate": 2.646952066897658e-05, "loss": 1.2568, "step": 28550 }, { "epoch": 7.632282202031, "grad_norm": 2.0040221214294434, "learning_rate": 2.6412647277891846e-05, "loss": 1.2027, "step": 28560 }, { "epoch": 7.634954569748797, "grad_norm": 2.1620066165924072, "learning_rate": 2.6355825754309217e-05, "loss": 1.2359, "step": 28570 }, { "epoch": 7.637626937466595, "grad_norm": 2.356335401535034, "learning_rate": 2.6299056138278945e-05, "loss": 1.2326, "step": 28580 }, { "epoch": 7.640299305184393, "grad_norm": 2.1007184982299805, "learning_rate": 2.6242338469814575e-05, "loss": 1.2403, "step": 28590 }, { "epoch": 7.642971672902191, "grad_norm": 2.003970146179199, "learning_rate": 2.618567278889328e-05, "loss": 1.3214, "step": 28600 }, { "epoch": 7.645644040619989, "grad_norm": 2.0494182109832764, "learning_rate": 2.6129059135455335e-05, "loss": 1.2185, "step": 28610 }, { "epoch": 7.648316408337787, "grad_norm": 2.0207862854003906, "learning_rate": 2.6072497549404506e-05, "loss": 1.244, "step": 28620 }, { "epoch": 7.650988776055585, "grad_norm": 1.914623737335205, "learning_rate": 2.601598807060779e-05, "loss": 1.2497, "step": 28630 }, { "epoch": 7.653661143773383, "grad_norm": 2.183497190475464, "learning_rate": 2.5959530738895486e-05, "loss": 1.1827, "step": 28640 }, { "epoch": 7.656333511491181, "grad_norm": 2.0640687942504883, "learning_rate": 2.5903125594061128e-05, "loss": 1.3111, "step": 28650 }, { "epoch": 7.659005879208979, "grad_norm": 2.200634241104126, "learning_rate": 2.5846772675861485e-05, "loss": 1.2258, "step": 28660 }, { "epoch": 7.661678246926777, "grad_norm": 2.038461685180664, "learning_rate": 2.579047202401642e-05, "loss": 1.289, "step": 28670 }, { "epoch": 7.664350614644575, "grad_norm": 1.9681406021118164, "learning_rate": 2.5734223678209135e-05, "loss": 1.3086, "step": 28680 }, { "epoch": 7.667022982362373, "grad_norm": 2.1332638263702393, "learning_rate": 2.5678027678085793e-05, "loss": 1.3003, "step": 28690 }, { "epoch": 7.669695350080171, "grad_norm": 2.178931713104248, "learning_rate": 2.5621884063255742e-05, "loss": 1.2336, "step": 28700 }, { "epoch": 7.672367717797969, "grad_norm": 2.1359126567840576, "learning_rate": 2.5565792873291395e-05, "loss": 1.2421, "step": 28710 }, { "epoch": 7.675040085515767, "grad_norm": 2.1914706230163574, "learning_rate": 2.5509754147728226e-05, "loss": 1.2467, "step": 28720 }, { "epoch": 7.677712453233565, "grad_norm": 2.121519088745117, "learning_rate": 2.545376792606473e-05, "loss": 1.2759, "step": 28730 }, { "epoch": 7.680384820951363, "grad_norm": 2.1237680912017822, "learning_rate": 2.539783424776233e-05, "loss": 1.2678, "step": 28740 }, { "epoch": 7.683057188669161, "grad_norm": 2.0890979766845703, "learning_rate": 2.5341953152245502e-05, "loss": 1.2993, "step": 28750 }, { "epoch": 7.685729556386959, "grad_norm": 2.0381410121917725, "learning_rate": 2.5286124678901624e-05, "loss": 1.2441, "step": 28760 }, { "epoch": 7.688401924104757, "grad_norm": 2.1491963863372803, "learning_rate": 2.5230348867081e-05, "loss": 1.2417, "step": 28770 }, { "epoch": 7.691074291822555, "grad_norm": 2.4194345474243164, "learning_rate": 2.5174625756096716e-05, "loss": 1.1738, "step": 28780 }, { "epoch": 7.693746659540353, "grad_norm": 2.290524959564209, "learning_rate": 2.5118955385224907e-05, "loss": 1.2388, "step": 28790 }, { "epoch": 7.696419027258151, "grad_norm": 1.9745029211044312, "learning_rate": 2.506333779370432e-05, "loss": 1.2206, "step": 28800 }, { "epoch": 7.699091394975949, "grad_norm": 2.3277316093444824, "learning_rate": 2.5007773020736712e-05, "loss": 1.2415, "step": 28810 }, { "epoch": 7.701763762693747, "grad_norm": 2.0364115238189697, "learning_rate": 2.495226110548642e-05, "loss": 1.1968, "step": 28820 }, { "epoch": 7.704436130411545, "grad_norm": 2.0834743976593018, "learning_rate": 2.489680208708063e-05, "loss": 1.2205, "step": 28830 }, { "epoch": 7.707108498129343, "grad_norm": 2.029390573501587, "learning_rate": 2.484139600460922e-05, "loss": 1.2304, "step": 28840 }, { "epoch": 7.709780865847141, "grad_norm": 2.1209352016448975, "learning_rate": 2.4786042897124807e-05, "loss": 1.2894, "step": 28850 }, { "epoch": 7.712453233564938, "grad_norm": 2.161252498626709, "learning_rate": 2.4730742803642504e-05, "loss": 1.2743, "step": 28860 }, { "epoch": 7.715125601282736, "grad_norm": 2.150007963180542, "learning_rate": 2.4675495763140298e-05, "loss": 1.2482, "step": 28870 }, { "epoch": 7.717797969000534, "grad_norm": 1.979568600654602, "learning_rate": 2.462030181455859e-05, "loss": 1.3048, "step": 28880 }, { "epoch": 7.720470336718332, "grad_norm": 2.0801188945770264, "learning_rate": 2.4565160996800428e-05, "loss": 1.2267, "step": 28890 }, { "epoch": 7.72314270443613, "grad_norm": 2.001863718032837, "learning_rate": 2.4510073348731434e-05, "loss": 1.2412, "step": 28900 }, { "epoch": 7.725815072153928, "grad_norm": 2.247753381729126, "learning_rate": 2.4455038909179717e-05, "loss": 1.2977, "step": 28910 }, { "epoch": 7.728487439871726, "grad_norm": 1.9314312934875488, "learning_rate": 2.440005771693593e-05, "loss": 1.1992, "step": 28920 }, { "epoch": 7.731159807589524, "grad_norm": 2.0058915615081787, "learning_rate": 2.4345129810753087e-05, "loss": 1.2119, "step": 28930 }, { "epoch": 7.733832175307322, "grad_norm": 2.0803093910217285, "learning_rate": 2.429025522934677e-05, "loss": 1.263, "step": 28940 }, { "epoch": 7.73650454302512, "grad_norm": 2.030503273010254, "learning_rate": 2.423543401139491e-05, "loss": 1.1688, "step": 28950 }, { "epoch": 7.739176910742918, "grad_norm": 2.104008913040161, "learning_rate": 2.4180666195537838e-05, "loss": 1.1846, "step": 28960 }, { "epoch": 7.741849278460716, "grad_norm": 2.021791696548462, "learning_rate": 2.4125951820378235e-05, "loss": 1.2346, "step": 28970 }, { "epoch": 7.744521646178514, "grad_norm": 2.2479772567749023, "learning_rate": 2.407129092448117e-05, "loss": 1.2751, "step": 28980 }, { "epoch": 7.747194013896312, "grad_norm": 2.1344785690307617, "learning_rate": 2.4016683546373886e-05, "loss": 1.2018, "step": 28990 }, { "epoch": 7.74986638161411, "grad_norm": 2.1255135536193848, "learning_rate": 2.396212972454609e-05, "loss": 1.2589, "step": 29000 }, { "epoch": 7.752538749331908, "grad_norm": 2.1533989906311035, "learning_rate": 2.390762949744956e-05, "loss": 1.2402, "step": 29010 }, { "epoch": 7.755211117049706, "grad_norm": 2.156620740890503, "learning_rate": 2.3853182903498404e-05, "loss": 1.2419, "step": 29020 }, { "epoch": 7.757883484767504, "grad_norm": 1.9978028535842896, "learning_rate": 2.3798789981068914e-05, "loss": 1.2721, "step": 29030 }, { "epoch": 7.760555852485302, "grad_norm": 2.0848798751831055, "learning_rate": 2.3744450768499538e-05, "loss": 1.2698, "step": 29040 }, { "epoch": 7.7632282202031, "grad_norm": 2.175245761871338, "learning_rate": 2.369016530409085e-05, "loss": 1.2468, "step": 29050 }, { "epoch": 7.765900587920898, "grad_norm": 2.1696841716766357, "learning_rate": 2.363593362610561e-05, "loss": 1.2172, "step": 29060 }, { "epoch": 7.768572955638696, "grad_norm": 1.9714295864105225, "learning_rate": 2.3581755772768543e-05, "loss": 1.3093, "step": 29070 }, { "epoch": 7.771245323356494, "grad_norm": 2.104121208190918, "learning_rate": 2.3527631782266544e-05, "loss": 1.2086, "step": 29080 }, { "epoch": 7.773917691074292, "grad_norm": 2.087700366973877, "learning_rate": 2.3473561692748513e-05, "loss": 1.2256, "step": 29090 }, { "epoch": 7.77659005879209, "grad_norm": 2.0084388256073, "learning_rate": 2.3419545542325338e-05, "loss": 1.2699, "step": 29100 }, { "epoch": 7.779262426509888, "grad_norm": 2.2062480449676514, "learning_rate": 2.3365583369069943e-05, "loss": 1.2715, "step": 29110 }, { "epoch": 7.781934794227686, "grad_norm": 2.1122920513153076, "learning_rate": 2.331167521101708e-05, "loss": 1.2422, "step": 29120 }, { "epoch": 7.784607161945484, "grad_norm": 2.2014482021331787, "learning_rate": 2.3257821106163634e-05, "loss": 1.2173, "step": 29130 }, { "epoch": 7.787279529663282, "grad_norm": 2.2499282360076904, "learning_rate": 2.3204021092468186e-05, "loss": 1.2048, "step": 29140 }, { "epoch": 7.78995189738108, "grad_norm": 2.075568199157715, "learning_rate": 2.3150275207851303e-05, "loss": 1.2639, "step": 29150 }, { "epoch": 7.792624265098878, "grad_norm": 2.0956461429595947, "learning_rate": 2.3096583490195377e-05, "loss": 1.2731, "step": 29160 }, { "epoch": 7.795296632816676, "grad_norm": 2.10376238822937, "learning_rate": 2.3042945977344642e-05, "loss": 1.2319, "step": 29170 }, { "epoch": 7.797969000534474, "grad_norm": 2.21205472946167, "learning_rate": 2.2989362707105032e-05, "loss": 1.3066, "step": 29180 }, { "epoch": 7.800641368252272, "grad_norm": 2.1282801628112793, "learning_rate": 2.2935833717244415e-05, "loss": 1.2209, "step": 29190 }, { "epoch": 7.80331373597007, "grad_norm": 2.08268666267395, "learning_rate": 2.2882359045492196e-05, "loss": 1.3034, "step": 29200 }, { "epoch": 7.805986103687868, "grad_norm": 2.090839385986328, "learning_rate": 2.2828938729539728e-05, "loss": 1.2736, "step": 29210 }, { "epoch": 7.808658471405665, "grad_norm": 2.1238749027252197, "learning_rate": 2.2775572807039825e-05, "loss": 1.2777, "step": 29220 }, { "epoch": 7.811330839123463, "grad_norm": 2.1232802867889404, "learning_rate": 2.2722261315607106e-05, "loss": 1.2004, "step": 29230 }, { "epoch": 7.814003206841261, "grad_norm": 2.057396650314331, "learning_rate": 2.2669004292817762e-05, "loss": 1.2156, "step": 29240 }, { "epoch": 7.816675574559059, "grad_norm": 2.0630247592926025, "learning_rate": 2.2615801776209667e-05, "loss": 1.1776, "step": 29250 }, { "epoch": 7.819347942276857, "grad_norm": 2.2614002227783203, "learning_rate": 2.2562653803282142e-05, "loss": 1.2957, "step": 29260 }, { "epoch": 7.822020309994655, "grad_norm": 2.082691192626953, "learning_rate": 2.2509560411496188e-05, "loss": 1.2261, "step": 29270 }, { "epoch": 7.824692677712453, "grad_norm": 1.936834692955017, "learning_rate": 2.2456521638274276e-05, "loss": 1.2929, "step": 29280 }, { "epoch": 7.827365045430251, "grad_norm": 2.020348310470581, "learning_rate": 2.2403537521000406e-05, "loss": 1.2206, "step": 29290 }, { "epoch": 7.830037413148049, "grad_norm": 2.078829288482666, "learning_rate": 2.2350608097020053e-05, "loss": 1.2926, "step": 29300 }, { "epoch": 7.832709780865847, "grad_norm": 2.0276589393615723, "learning_rate": 2.2297733403640075e-05, "loss": 1.2183, "step": 29310 }, { "epoch": 7.835382148583645, "grad_norm": 2.114861488342285, "learning_rate": 2.2244913478128892e-05, "loss": 1.2716, "step": 29320 }, { "epoch": 7.838054516301443, "grad_norm": 2.007420301437378, "learning_rate": 2.2192148357716168e-05, "loss": 1.195, "step": 29330 }, { "epoch": 7.840726884019241, "grad_norm": 2.0975234508514404, "learning_rate": 2.2139438079593034e-05, "loss": 1.2404, "step": 29340 }, { "epoch": 7.843399251737039, "grad_norm": 2.0706093311309814, "learning_rate": 2.2086782680911945e-05, "loss": 1.2111, "step": 29350 }, { "epoch": 7.846071619454837, "grad_norm": 2.136385679244995, "learning_rate": 2.2034182198786666e-05, "loss": 1.1672, "step": 29360 }, { "epoch": 7.848743987172635, "grad_norm": 2.143287420272827, "learning_rate": 2.198163667029226e-05, "loss": 1.3171, "step": 29370 }, { "epoch": 7.851416354890433, "grad_norm": 1.9991885423660278, "learning_rate": 2.1929146132465073e-05, "loss": 1.285, "step": 29380 }, { "epoch": 7.854088722608231, "grad_norm": 2.1113333702087402, "learning_rate": 2.1876710622302598e-05, "loss": 1.2908, "step": 29390 }, { "epoch": 7.856761090326029, "grad_norm": 2.063178539276123, "learning_rate": 2.182433017676374e-05, "loss": 1.2944, "step": 29400 }, { "epoch": 7.859433458043827, "grad_norm": 2.074934244155884, "learning_rate": 2.1772004832768365e-05, "loss": 1.2717, "step": 29410 }, { "epoch": 7.862105825761625, "grad_norm": 2.060929775238037, "learning_rate": 2.1719734627197643e-05, "loss": 1.2044, "step": 29420 }, { "epoch": 7.864778193479423, "grad_norm": 2.197664737701416, "learning_rate": 2.1667519596893836e-05, "loss": 1.3214, "step": 29430 }, { "epoch": 7.867450561197221, "grad_norm": 2.159766435623169, "learning_rate": 2.1615359778660327e-05, "loss": 1.2499, "step": 29440 }, { "epoch": 7.870122928915019, "grad_norm": 2.094172716140747, "learning_rate": 2.156325520926161e-05, "loss": 1.2652, "step": 29450 }, { "epoch": 7.872795296632817, "grad_norm": 2.1401398181915283, "learning_rate": 2.151120592542315e-05, "loss": 1.3186, "step": 29460 }, { "epoch": 7.875467664350615, "grad_norm": 2.1363375186920166, "learning_rate": 2.1459211963831526e-05, "loss": 1.2029, "step": 29470 }, { "epoch": 7.878140032068413, "grad_norm": 1.9351859092712402, "learning_rate": 2.1407273361134293e-05, "loss": 1.2713, "step": 29480 }, { "epoch": 7.880812399786211, "grad_norm": 2.3286333084106445, "learning_rate": 2.1355390153940023e-05, "loss": 1.2721, "step": 29490 }, { "epoch": 7.883484767504008, "grad_norm": 2.230107545852661, "learning_rate": 2.130356237881813e-05, "loss": 1.3185, "step": 29500 }, { "epoch": 7.886157135221806, "grad_norm": 2.3039634227752686, "learning_rate": 2.125179007229915e-05, "loss": 1.2812, "step": 29510 }, { "epoch": 7.888829502939604, "grad_norm": 2.1231534481048584, "learning_rate": 2.120007327087429e-05, "loss": 1.2437, "step": 29520 }, { "epoch": 7.891501870657402, "grad_norm": 2.1449410915374756, "learning_rate": 2.114841201099588e-05, "loss": 1.2607, "step": 29530 }, { "epoch": 7.8941742383752, "grad_norm": 2.2946348190307617, "learning_rate": 2.1096806329076892e-05, "loss": 1.3148, "step": 29540 }, { "epoch": 7.896846606092998, "grad_norm": 2.03314208984375, "learning_rate": 2.104525626149123e-05, "loss": 1.1725, "step": 29550 }, { "epoch": 7.899518973810796, "grad_norm": 2.103083848953247, "learning_rate": 2.099376184457358e-05, "loss": 1.2777, "step": 29560 }, { "epoch": 7.902191341528594, "grad_norm": 2.1012446880340576, "learning_rate": 2.0942323114619432e-05, "loss": 1.3446, "step": 29570 }, { "epoch": 7.904863709246392, "grad_norm": 2.1700327396392822, "learning_rate": 2.0890940107884916e-05, "loss": 1.2592, "step": 29580 }, { "epoch": 7.90753607696419, "grad_norm": 2.192653179168701, "learning_rate": 2.0839612860587076e-05, "loss": 1.2216, "step": 29590 }, { "epoch": 7.910208444681988, "grad_norm": 2.11545467376709, "learning_rate": 2.0788341408903445e-05, "loss": 1.173, "step": 29600 }, { "epoch": 7.912880812399786, "grad_norm": 2.073657512664795, "learning_rate": 2.0737125788972366e-05, "loss": 1.1833, "step": 29610 }, { "epoch": 7.915553180117584, "grad_norm": 2.1943564414978027, "learning_rate": 2.06859660368928e-05, "loss": 1.2882, "step": 29620 }, { "epoch": 7.918225547835382, "grad_norm": 2.3015801906585693, "learning_rate": 2.06348621887243e-05, "loss": 1.2154, "step": 29630 }, { "epoch": 7.92089791555318, "grad_norm": 2.050428867340088, "learning_rate": 2.058381428048708e-05, "loss": 1.1586, "step": 29640 }, { "epoch": 7.923570283270978, "grad_norm": 2.131366491317749, "learning_rate": 2.053282234816182e-05, "loss": 1.2523, "step": 29650 }, { "epoch": 7.926242650988776, "grad_norm": 2.0764682292938232, "learning_rate": 2.048188642768982e-05, "loss": 1.218, "step": 29660 }, { "epoch": 7.928915018706574, "grad_norm": 2.240628719329834, "learning_rate": 2.043100655497291e-05, "loss": 1.2841, "step": 29670 }, { "epoch": 7.931587386424372, "grad_norm": 2.1216185092926025, "learning_rate": 2.038018276587339e-05, "loss": 1.2165, "step": 29680 }, { "epoch": 7.93425975414217, "grad_norm": 2.0891330242156982, "learning_rate": 2.0329415096214022e-05, "loss": 1.2904, "step": 29690 }, { "epoch": 7.936932121859968, "grad_norm": 2.0506865978240967, "learning_rate": 2.0278703581778045e-05, "loss": 1.3114, "step": 29700 }, { "epoch": 7.939604489577766, "grad_norm": 2.0114119052886963, "learning_rate": 2.022804825830904e-05, "loss": 1.2361, "step": 29710 }, { "epoch": 7.942276857295564, "grad_norm": 2.024193286895752, "learning_rate": 2.017744916151112e-05, "loss": 1.1984, "step": 29720 }, { "epoch": 7.944949225013362, "grad_norm": 2.2009425163269043, "learning_rate": 2.012690632704861e-05, "loss": 1.2247, "step": 29730 }, { "epoch": 7.94762159273116, "grad_norm": 2.092966079711914, "learning_rate": 2.0076419790546297e-05, "loss": 1.2011, "step": 29740 }, { "epoch": 7.950293960448958, "grad_norm": 2.0474660396575928, "learning_rate": 2.0025989587589213e-05, "loss": 1.3308, "step": 29750 }, { "epoch": 7.952966328166756, "grad_norm": 2.188830852508545, "learning_rate": 1.9975615753722744e-05, "loss": 1.2081, "step": 29760 }, { "epoch": 7.955638695884554, "grad_norm": 2.28385066986084, "learning_rate": 1.992529832445249e-05, "loss": 1.2707, "step": 29770 }, { "epoch": 7.958311063602352, "grad_norm": 2.2341206073760986, "learning_rate": 1.987503733524437e-05, "loss": 1.2414, "step": 29780 }, { "epoch": 7.96098343132015, "grad_norm": 2.103367805480957, "learning_rate": 1.98248328215244e-05, "loss": 1.1736, "step": 29790 }, { "epoch": 7.963655799037948, "grad_norm": 2.0253405570983887, "learning_rate": 1.977468481867889e-05, "loss": 1.27, "step": 29800 }, { "epoch": 7.966328166755746, "grad_norm": 2.1806020736694336, "learning_rate": 1.97245933620543e-05, "loss": 1.2888, "step": 29810 }, { "epoch": 7.969000534473544, "grad_norm": 2.115906238555908, "learning_rate": 1.9674558486957207e-05, "loss": 1.2915, "step": 29820 }, { "epoch": 7.971672902191342, "grad_norm": 2.048633575439453, "learning_rate": 1.9624580228654366e-05, "loss": 1.2526, "step": 29830 }, { "epoch": 7.97434526990914, "grad_norm": 2.162379264831543, "learning_rate": 1.95746586223725e-05, "loss": 1.3142, "step": 29840 }, { "epoch": 7.977017637626938, "grad_norm": 1.999262809753418, "learning_rate": 1.9524793703298572e-05, "loss": 1.2922, "step": 29850 }, { "epoch": 7.979690005344735, "grad_norm": 2.1235110759735107, "learning_rate": 1.9474985506579446e-05, "loss": 1.2304, "step": 29860 }, { "epoch": 7.982362373062533, "grad_norm": 2.2172183990478516, "learning_rate": 1.9425234067322085e-05, "loss": 1.24, "step": 29870 }, { "epoch": 7.985034740780331, "grad_norm": 2.0238895416259766, "learning_rate": 1.93755394205934e-05, "loss": 1.2907, "step": 29880 }, { "epoch": 7.987707108498129, "grad_norm": 2.0927395820617676, "learning_rate": 1.932590160142036e-05, "loss": 1.2551, "step": 29890 }, { "epoch": 7.990379476215927, "grad_norm": 2.1852126121520996, "learning_rate": 1.92763206447897e-05, "loss": 1.2659, "step": 29900 }, { "epoch": 7.993051843933725, "grad_norm": 2.173623561859131, "learning_rate": 1.922679658564832e-05, "loss": 1.3331, "step": 29910 }, { "epoch": 7.995724211651523, "grad_norm": 2.218395471572876, "learning_rate": 1.9177329458902772e-05, "loss": 1.3252, "step": 29920 }, { "epoch": 7.998396579369321, "grad_norm": 2.0829696655273438, "learning_rate": 1.9127919299419705e-05, "loss": 1.206, "step": 29930 }, { "epoch": 8.00106894708712, "grad_norm": 1.966801404953003, "learning_rate": 1.9078566142025424e-05, "loss": 1.2192, "step": 29940 }, { "epoch": 8.003741314804918, "grad_norm": 1.886599063873291, "learning_rate": 1.9029270021506162e-05, "loss": 1.1609, "step": 29950 }, { "epoch": 8.006413682522716, "grad_norm": 2.034529685974121, "learning_rate": 1.8980030972607933e-05, "loss": 1.2364, "step": 29960 }, { "epoch": 8.009086050240514, "grad_norm": 1.9105424880981445, "learning_rate": 1.8930849030036536e-05, "loss": 1.1542, "step": 29970 }, { "epoch": 8.011758417958312, "grad_norm": 2.1083168983459473, "learning_rate": 1.8881724228457442e-05, "loss": 1.0939, "step": 29980 }, { "epoch": 8.01443078567611, "grad_norm": 2.141692876815796, "learning_rate": 1.8832656602495934e-05, "loss": 1.1055, "step": 29990 }, { "epoch": 8.017103153393908, "grad_norm": 2.1567461490631104, "learning_rate": 1.8783646186736982e-05, "loss": 1.1455, "step": 30000 }, { "epoch": 8.019775521111704, "grad_norm": 2.061882495880127, "learning_rate": 1.8734693015725203e-05, "loss": 1.1745, "step": 30010 }, { "epoch": 8.022447888829502, "grad_norm": 2.381556987762451, "learning_rate": 1.8685797123964886e-05, "loss": 1.2167, "step": 30020 }, { "epoch": 8.0251202565473, "grad_norm": 2.1993541717529297, "learning_rate": 1.8636958545919904e-05, "loss": 1.1297, "step": 30030 }, { "epoch": 8.027792624265098, "grad_norm": 2.132571220397949, "learning_rate": 1.8588177316013833e-05, "loss": 1.1577, "step": 30040 }, { "epoch": 8.030464991982896, "grad_norm": 2.038928508758545, "learning_rate": 1.8539453468629698e-05, "loss": 1.1506, "step": 30050 }, { "epoch": 8.033137359700694, "grad_norm": 2.096400737762451, "learning_rate": 1.8490787038110167e-05, "loss": 1.0896, "step": 30060 }, { "epoch": 8.035809727418492, "grad_norm": 2.067260980606079, "learning_rate": 1.844217805875743e-05, "loss": 1.1404, "step": 30070 }, { "epoch": 8.03848209513629, "grad_norm": 2.1161482334136963, "learning_rate": 1.8393626564833144e-05, "loss": 1.1602, "step": 30080 }, { "epoch": 8.041154462854088, "grad_norm": 2.1654980182647705, "learning_rate": 1.834513259055849e-05, "loss": 1.1428, "step": 30090 }, { "epoch": 8.043826830571886, "grad_norm": 2.059821605682373, "learning_rate": 1.8296696170114092e-05, "loss": 1.1078, "step": 30100 }, { "epoch": 8.046499198289684, "grad_norm": 2.2347376346588135, "learning_rate": 1.8248317337639943e-05, "loss": 1.098, "step": 30110 }, { "epoch": 8.049171566007482, "grad_norm": 2.23544979095459, "learning_rate": 1.819999612723561e-05, "loss": 1.1647, "step": 30120 }, { "epoch": 8.05184393372528, "grad_norm": 2.2311031818389893, "learning_rate": 1.8151732572959857e-05, "loss": 1.172, "step": 30130 }, { "epoch": 8.054516301443078, "grad_norm": 2.089353561401367, "learning_rate": 1.810352670883092e-05, "loss": 1.1726, "step": 30140 }, { "epoch": 8.057188669160876, "grad_norm": 2.0155904293060303, "learning_rate": 1.805537856882634e-05, "loss": 1.1595, "step": 30150 }, { "epoch": 8.059861036878674, "grad_norm": 2.144171953201294, "learning_rate": 1.8007288186882987e-05, "loss": 1.2483, "step": 30160 }, { "epoch": 8.062533404596472, "grad_norm": 1.969314455986023, "learning_rate": 1.795925559689704e-05, "loss": 1.13, "step": 30170 }, { "epoch": 8.06520577231427, "grad_norm": 2.1316988468170166, "learning_rate": 1.7911280832723865e-05, "loss": 1.1821, "step": 30180 }, { "epoch": 8.067878140032068, "grad_norm": 2.348546266555786, "learning_rate": 1.7863363928178122e-05, "loss": 1.1506, "step": 30190 }, { "epoch": 8.070550507749866, "grad_norm": 2.159191846847534, "learning_rate": 1.7815504917033722e-05, "loss": 1.1026, "step": 30200 }, { "epoch": 8.073222875467664, "grad_norm": 2.0628116130828857, "learning_rate": 1.776770383302374e-05, "loss": 1.2313, "step": 30210 }, { "epoch": 8.075895243185462, "grad_norm": 2.302281379699707, "learning_rate": 1.7719960709840342e-05, "loss": 1.1445, "step": 30220 }, { "epoch": 8.07856761090326, "grad_norm": 2.1654512882232666, "learning_rate": 1.7672275581135023e-05, "loss": 1.1616, "step": 30230 }, { "epoch": 8.081239978621058, "grad_norm": 2.1467254161834717, "learning_rate": 1.762464848051819e-05, "loss": 1.1584, "step": 30240 }, { "epoch": 8.083912346338856, "grad_norm": 2.3476336002349854, "learning_rate": 1.7577079441559553e-05, "loss": 1.2594, "step": 30250 }, { "epoch": 8.086584714056654, "grad_norm": 2.1335368156433105, "learning_rate": 1.752956849778772e-05, "loss": 1.212, "step": 30260 }, { "epoch": 8.089257081774452, "grad_norm": 2.0671567916870117, "learning_rate": 1.7482115682690457e-05, "loss": 1.2279, "step": 30270 }, { "epoch": 8.09192944949225, "grad_norm": 2.1876256465911865, "learning_rate": 1.743472102971454e-05, "loss": 1.1069, "step": 30280 }, { "epoch": 8.094601817210048, "grad_norm": 2.2041354179382324, "learning_rate": 1.738738457226574e-05, "loss": 1.1003, "step": 30290 }, { "epoch": 8.097274184927846, "grad_norm": 2.3087782859802246, "learning_rate": 1.734010634370876e-05, "loss": 1.2044, "step": 30300 }, { "epoch": 8.099946552645644, "grad_norm": 2.107633352279663, "learning_rate": 1.72928863773674e-05, "loss": 1.1731, "step": 30310 }, { "epoch": 8.102618920363442, "grad_norm": 2.0872414112091064, "learning_rate": 1.7245724706524234e-05, "loss": 1.1967, "step": 30320 }, { "epoch": 8.10529128808124, "grad_norm": 2.263470411300659, "learning_rate": 1.719862136442083e-05, "loss": 1.226, "step": 30330 }, { "epoch": 8.107963655799038, "grad_norm": 2.2201919555664062, "learning_rate": 1.715157638425765e-05, "loss": 1.1702, "step": 30340 }, { "epoch": 8.110636023516836, "grad_norm": 2.167698383331299, "learning_rate": 1.7104589799193982e-05, "loss": 1.2308, "step": 30350 }, { "epoch": 8.113308391234634, "grad_norm": 2.2280988693237305, "learning_rate": 1.705766164234801e-05, "loss": 1.1297, "step": 30360 }, { "epoch": 8.115980758952432, "grad_norm": 2.1165075302124023, "learning_rate": 1.7010791946796632e-05, "loss": 1.154, "step": 30370 }, { "epoch": 8.11865312667023, "grad_norm": 2.2603657245635986, "learning_rate": 1.6963980745575657e-05, "loss": 1.1188, "step": 30380 }, { "epoch": 8.121325494388028, "grad_norm": 2.1390349864959717, "learning_rate": 1.6917228071679602e-05, "loss": 1.1859, "step": 30390 }, { "epoch": 8.123997862105826, "grad_norm": 2.039499521255493, "learning_rate": 1.6870533958061752e-05, "loss": 1.1416, "step": 30400 }, { "epoch": 8.126670229823624, "grad_norm": 2.24080228805542, "learning_rate": 1.6823898437634088e-05, "loss": 1.1811, "step": 30410 }, { "epoch": 8.129342597541422, "grad_norm": 2.186419725418091, "learning_rate": 1.6777321543267356e-05, "loss": 1.1423, "step": 30420 }, { "epoch": 8.13201496525922, "grad_norm": 2.129504680633545, "learning_rate": 1.6730803307790842e-05, "loss": 1.1633, "step": 30430 }, { "epoch": 8.134687332977018, "grad_norm": 2.101668119430542, "learning_rate": 1.6684343763992703e-05, "loss": 1.2498, "step": 30440 }, { "epoch": 8.137359700694816, "grad_norm": 2.193866729736328, "learning_rate": 1.6637942944619522e-05, "loss": 1.1965, "step": 30450 }, { "epoch": 8.140032068412614, "grad_norm": 2.162787437438965, "learning_rate": 1.6591600882376602e-05, "loss": 1.207, "step": 30460 }, { "epoch": 8.142704436130412, "grad_norm": 2.0925650596618652, "learning_rate": 1.654531760992779e-05, "loss": 1.183, "step": 30470 }, { "epoch": 8.14537680384821, "grad_norm": 2.38498854637146, "learning_rate": 1.6499093159895518e-05, "loss": 1.2175, "step": 30480 }, { "epoch": 8.148049171566008, "grad_norm": 2.302579641342163, "learning_rate": 1.6452927564860743e-05, "loss": 1.1527, "step": 30490 }, { "epoch": 8.150721539283806, "grad_norm": 2.1436870098114014, "learning_rate": 1.640682085736298e-05, "loss": 1.2166, "step": 30500 }, { "epoch": 8.153393907001604, "grad_norm": 2.1942169666290283, "learning_rate": 1.6360773069900148e-05, "loss": 1.1716, "step": 30510 }, { "epoch": 8.156066274719402, "grad_norm": 2.1793153285980225, "learning_rate": 1.6314784234928702e-05, "loss": 1.1684, "step": 30520 }, { "epoch": 8.1587386424372, "grad_norm": 2.14441180229187, "learning_rate": 1.626885438486355e-05, "loss": 1.1195, "step": 30530 }, { "epoch": 8.161411010154998, "grad_norm": 2.1185004711151123, "learning_rate": 1.6222983552078007e-05, "loss": 1.2215, "step": 30540 }, { "epoch": 8.164083377872796, "grad_norm": 2.123196840286255, "learning_rate": 1.61771717689038e-05, "loss": 1.1832, "step": 30550 }, { "epoch": 8.166755745590594, "grad_norm": 2.186194896697998, "learning_rate": 1.613141906763096e-05, "loss": 1.2083, "step": 30560 }, { "epoch": 8.169428113308392, "grad_norm": 2.193206787109375, "learning_rate": 1.608572548050803e-05, "loss": 1.0979, "step": 30570 }, { "epoch": 8.17210048102619, "grad_norm": 2.2169249057769775, "learning_rate": 1.6040091039741744e-05, "loss": 1.12, "step": 30580 }, { "epoch": 8.174772848743988, "grad_norm": 2.133208990097046, "learning_rate": 1.5994515777497223e-05, "loss": 1.1334, "step": 30590 }, { "epoch": 8.177445216461786, "grad_norm": 2.180858850479126, "learning_rate": 1.594899972589783e-05, "loss": 1.1644, "step": 30600 }, { "epoch": 8.180117584179584, "grad_norm": 1.9578543901443481, "learning_rate": 1.590354291702526e-05, "loss": 1.1888, "step": 30610 }, { "epoch": 8.182789951897382, "grad_norm": 2.098051071166992, "learning_rate": 1.5858145382919333e-05, "loss": 1.1086, "step": 30620 }, { "epoch": 8.18546231961518, "grad_norm": 2.2391555309295654, "learning_rate": 1.581280715557827e-05, "loss": 1.1452, "step": 30630 }, { "epoch": 8.188134687332978, "grad_norm": 2.376694917678833, "learning_rate": 1.5767528266958277e-05, "loss": 1.1841, "step": 30640 }, { "epoch": 8.190807055050776, "grad_norm": 2.172715902328491, "learning_rate": 1.5722308748973947e-05, "loss": 1.1453, "step": 30650 }, { "epoch": 8.193479422768572, "grad_norm": 2.26023268699646, "learning_rate": 1.5677148633497853e-05, "loss": 1.2372, "step": 30660 }, { "epoch": 8.19615179048637, "grad_norm": 2.113327980041504, "learning_rate": 1.5632047952360783e-05, "loss": 1.2091, "step": 30670 }, { "epoch": 8.198824158204168, "grad_norm": 2.140690326690674, "learning_rate": 1.5587006737351628e-05, "loss": 1.1531, "step": 30680 }, { "epoch": 8.201496525921966, "grad_norm": 2.1371402740478516, "learning_rate": 1.5542025020217375e-05, "loss": 1.2126, "step": 30690 }, { "epoch": 8.204168893639764, "grad_norm": 2.1615569591522217, "learning_rate": 1.5497102832663023e-05, "loss": 1.2039, "step": 30700 }, { "epoch": 8.206841261357562, "grad_norm": 2.0711669921875, "learning_rate": 1.5452240206351643e-05, "loss": 1.2236, "step": 30710 }, { "epoch": 8.20951362907536, "grad_norm": 2.1842689514160156, "learning_rate": 1.5407437172904336e-05, "loss": 1.1873, "step": 30720 }, { "epoch": 8.212185996793158, "grad_norm": 2.288241147994995, "learning_rate": 1.5362693763900193e-05, "loss": 1.204, "step": 30730 }, { "epoch": 8.214858364510956, "grad_norm": 2.0649006366729736, "learning_rate": 1.5318010010876305e-05, "loss": 1.1229, "step": 30740 }, { "epoch": 8.217530732228754, "grad_norm": 2.0768380165100098, "learning_rate": 1.52733859453276e-05, "loss": 1.1974, "step": 30750 }, { "epoch": 8.220203099946552, "grad_norm": 1.9523011445999146, "learning_rate": 1.5228821598707122e-05, "loss": 1.1893, "step": 30760 }, { "epoch": 8.22287546766435, "grad_norm": 2.1099400520324707, "learning_rate": 1.5184317002425653e-05, "loss": 1.1913, "step": 30770 }, { "epoch": 8.225547835382148, "grad_norm": 2.0837039947509766, "learning_rate": 1.5139872187851944e-05, "loss": 1.1638, "step": 30780 }, { "epoch": 8.228220203099946, "grad_norm": 2.160473585128784, "learning_rate": 1.5095487186312607e-05, "loss": 1.1237, "step": 30790 }, { "epoch": 8.230892570817744, "grad_norm": 2.131002426147461, "learning_rate": 1.505116202909207e-05, "loss": 1.2002, "step": 30800 }, { "epoch": 8.233564938535542, "grad_norm": 2.072420835494995, "learning_rate": 1.5006896747432609e-05, "loss": 1.1823, "step": 30810 }, { "epoch": 8.23623730625334, "grad_norm": 2.480342149734497, "learning_rate": 1.4962691372534288e-05, "loss": 1.1554, "step": 30820 }, { "epoch": 8.238909673971138, "grad_norm": 2.637852668762207, "learning_rate": 1.4918545935554872e-05, "loss": 1.171, "step": 30830 }, { "epoch": 8.241582041688936, "grad_norm": 1.9761701822280884, "learning_rate": 1.4874460467610041e-05, "loss": 1.1365, "step": 30840 }, { "epoch": 8.244254409406734, "grad_norm": 2.2092714309692383, "learning_rate": 1.4830434999773046e-05, "loss": 1.2147, "step": 30850 }, { "epoch": 8.246926777124532, "grad_norm": 2.3081250190734863, "learning_rate": 1.4786469563074933e-05, "loss": 1.1673, "step": 30860 }, { "epoch": 8.24959914484233, "grad_norm": 2.143738031387329, "learning_rate": 1.4742564188504426e-05, "loss": 1.1376, "step": 30870 }, { "epoch": 8.252271512560128, "grad_norm": 2.171787977218628, "learning_rate": 1.469871890700788e-05, "loss": 1.1716, "step": 30880 }, { "epoch": 8.254943880277926, "grad_norm": 2.045050859451294, "learning_rate": 1.4654933749489363e-05, "loss": 1.1857, "step": 30890 }, { "epoch": 8.257616247995724, "grad_norm": 2.2319345474243164, "learning_rate": 1.4611208746810456e-05, "loss": 1.1791, "step": 30900 }, { "epoch": 8.260288615713522, "grad_norm": 2.127666711807251, "learning_rate": 1.456754392979044e-05, "loss": 1.1681, "step": 30910 }, { "epoch": 8.26296098343132, "grad_norm": 2.217803955078125, "learning_rate": 1.4523939329206138e-05, "loss": 1.2134, "step": 30920 }, { "epoch": 8.265633351149118, "grad_norm": 2.1986429691314697, "learning_rate": 1.4480394975791955e-05, "loss": 1.1816, "step": 30930 }, { "epoch": 8.268305718866916, "grad_norm": 2.1153335571289062, "learning_rate": 1.4436910900239731e-05, "loss": 1.1473, "step": 30940 }, { "epoch": 8.270978086584714, "grad_norm": 2.03336763381958, "learning_rate": 1.439348713319898e-05, "loss": 1.1628, "step": 30950 }, { "epoch": 8.273650454302512, "grad_norm": 2.1557438373565674, "learning_rate": 1.4350123705276552e-05, "loss": 1.1765, "step": 30960 }, { "epoch": 8.27632282202031, "grad_norm": 2.291888952255249, "learning_rate": 1.4306820647036923e-05, "loss": 1.1561, "step": 30970 }, { "epoch": 8.278995189738108, "grad_norm": 2.04992413520813, "learning_rate": 1.4263577989001863e-05, "loss": 1.1502, "step": 30980 }, { "epoch": 8.281667557455906, "grad_norm": 2.212059259414673, "learning_rate": 1.422039576165065e-05, "loss": 1.2022, "step": 30990 }, { "epoch": 8.284339925173704, "grad_norm": 2.0827481746673584, "learning_rate": 1.4177273995419982e-05, "loss": 1.2266, "step": 31000 }, { "epoch": 8.287012292891502, "grad_norm": 2.099534273147583, "learning_rate": 1.4134212720703931e-05, "loss": 1.2009, "step": 31010 }, { "epoch": 8.2896846606093, "grad_norm": 2.334784984588623, "learning_rate": 1.4091211967853845e-05, "loss": 1.1279, "step": 31020 }, { "epoch": 8.292357028327098, "grad_norm": 2.196622848510742, "learning_rate": 1.4048271767178579e-05, "loss": 1.1313, "step": 31030 }, { "epoch": 8.295029396044896, "grad_norm": 2.226259231567383, "learning_rate": 1.4005392148944152e-05, "loss": 1.1444, "step": 31040 }, { "epoch": 8.297701763762694, "grad_norm": 1.986608624458313, "learning_rate": 1.3962573143373969e-05, "loss": 1.1371, "step": 31050 }, { "epoch": 8.300374131480492, "grad_norm": 2.1444942951202393, "learning_rate": 1.3919814780648688e-05, "loss": 1.2145, "step": 31060 }, { "epoch": 8.30304649919829, "grad_norm": 2.1759116649627686, "learning_rate": 1.3877117090906222e-05, "loss": 1.222, "step": 31070 }, { "epoch": 8.305718866916088, "grad_norm": 2.197798252105713, "learning_rate": 1.3834480104241754e-05, "loss": 1.1764, "step": 31080 }, { "epoch": 8.308391234633886, "grad_norm": 2.1161584854125977, "learning_rate": 1.3791903850707589e-05, "loss": 1.1809, "step": 31090 }, { "epoch": 8.311063602351684, "grad_norm": 2.106163501739502, "learning_rate": 1.374938836031332e-05, "loss": 1.1629, "step": 31100 }, { "epoch": 8.313735970069482, "grad_norm": 2.179640293121338, "learning_rate": 1.3706933663025678e-05, "loss": 1.187, "step": 31110 }, { "epoch": 8.31640833778728, "grad_norm": 2.1718761920928955, "learning_rate": 1.3664539788768539e-05, "loss": 1.1902, "step": 31120 }, { "epoch": 8.319080705505078, "grad_norm": 2.2546775341033936, "learning_rate": 1.3622206767422907e-05, "loss": 1.171, "step": 31130 }, { "epoch": 8.321753073222876, "grad_norm": 2.2284255027770996, "learning_rate": 1.357993462882693e-05, "loss": 1.1958, "step": 31140 }, { "epoch": 8.324425440940674, "grad_norm": 2.2016327381134033, "learning_rate": 1.3537723402775737e-05, "loss": 1.1807, "step": 31150 }, { "epoch": 8.327097808658472, "grad_norm": 2.0302164554595947, "learning_rate": 1.3495573119021698e-05, "loss": 1.1936, "step": 31160 }, { "epoch": 8.32977017637627, "grad_norm": 2.3190088272094727, "learning_rate": 1.3453483807274048e-05, "loss": 1.1601, "step": 31170 }, { "epoch": 8.332442544094068, "grad_norm": 2.2674684524536133, "learning_rate": 1.3411455497199165e-05, "loss": 1.2164, "step": 31180 }, { "epoch": 8.335114911811866, "grad_norm": 2.414410352706909, "learning_rate": 1.336948821842039e-05, "loss": 1.1185, "step": 31190 }, { "epoch": 8.337787279529664, "grad_norm": 1.936843991279602, "learning_rate": 1.3327582000518035e-05, "loss": 1.1742, "step": 31200 }, { "epoch": 8.340459647247462, "grad_norm": 2.2939600944519043, "learning_rate": 1.3285736873029409e-05, "loss": 1.1519, "step": 31210 }, { "epoch": 8.34313201496526, "grad_norm": 2.153366804122925, "learning_rate": 1.324395286544876e-05, "loss": 1.2037, "step": 31220 }, { "epoch": 8.345804382683058, "grad_norm": 2.1847779750823975, "learning_rate": 1.3202230007227179e-05, "loss": 1.189, "step": 31230 }, { "epoch": 8.348476750400856, "grad_norm": 2.138488531112671, "learning_rate": 1.3160568327772738e-05, "loss": 1.2171, "step": 31240 }, { "epoch": 8.351149118118654, "grad_norm": 2.1546854972839355, "learning_rate": 1.3118967856450382e-05, "loss": 1.2126, "step": 31250 }, { "epoch": 8.353821485836452, "grad_norm": 2.1969501972198486, "learning_rate": 1.3077428622581877e-05, "loss": 1.1706, "step": 31260 }, { "epoch": 8.35649385355425, "grad_norm": 2.181748628616333, "learning_rate": 1.3035950655445872e-05, "loss": 1.1448, "step": 31270 }, { "epoch": 8.359166221272048, "grad_norm": 2.1777846813201904, "learning_rate": 1.2994533984277758e-05, "loss": 1.1601, "step": 31280 }, { "epoch": 8.361838588989844, "grad_norm": 2.219191789627075, "learning_rate": 1.2953178638269825e-05, "loss": 1.3012, "step": 31290 }, { "epoch": 8.364510956707644, "grad_norm": 2.1889684200286865, "learning_rate": 1.2911884646571037e-05, "loss": 1.1901, "step": 31300 }, { "epoch": 8.36718332442544, "grad_norm": 2.0811409950256348, "learning_rate": 1.287065203828719e-05, "loss": 1.1899, "step": 31310 }, { "epoch": 8.369855692143238, "grad_norm": 2.2898550033569336, "learning_rate": 1.2829480842480769e-05, "loss": 1.1818, "step": 31320 }, { "epoch": 8.372528059861036, "grad_norm": 2.0709502696990967, "learning_rate": 1.278837108817098e-05, "loss": 1.2489, "step": 31330 }, { "epoch": 8.375200427578834, "grad_norm": 2.123767614364624, "learning_rate": 1.274732280433375e-05, "loss": 1.1974, "step": 31340 }, { "epoch": 8.377872795296632, "grad_norm": 2.071045160293579, "learning_rate": 1.2706336019901665e-05, "loss": 1.1965, "step": 31350 }, { "epoch": 8.38054516301443, "grad_norm": 2.184329032897949, "learning_rate": 1.2665410763763896e-05, "loss": 1.2172, "step": 31360 }, { "epoch": 8.383217530732228, "grad_norm": 2.3401215076446533, "learning_rate": 1.2624547064766402e-05, "loss": 1.1899, "step": 31370 }, { "epoch": 8.385889898450026, "grad_norm": 2.155318260192871, "learning_rate": 1.2583744951711584e-05, "loss": 1.1792, "step": 31380 }, { "epoch": 8.388562266167824, "grad_norm": 2.20011305809021, "learning_rate": 1.2543004453358531e-05, "loss": 1.2066, "step": 31390 }, { "epoch": 8.391234633885622, "grad_norm": 2.0160932540893555, "learning_rate": 1.2502325598422893e-05, "loss": 1.2222, "step": 31400 }, { "epoch": 8.39390700160342, "grad_norm": 2.0931403636932373, "learning_rate": 1.2461708415576856e-05, "loss": 1.2384, "step": 31410 }, { "epoch": 8.396579369321218, "grad_norm": 2.1289875507354736, "learning_rate": 1.2421152933449165e-05, "loss": 1.1575, "step": 31420 }, { "epoch": 8.399251737039016, "grad_norm": 2.1781466007232666, "learning_rate": 1.2380659180624998e-05, "loss": 1.191, "step": 31430 }, { "epoch": 8.401924104756814, "grad_norm": 2.1087586879730225, "learning_rate": 1.2340227185646113e-05, "loss": 1.1978, "step": 31440 }, { "epoch": 8.404596472474612, "grad_norm": 2.1136343479156494, "learning_rate": 1.2299856977010694e-05, "loss": 1.1861, "step": 31450 }, { "epoch": 8.40726884019241, "grad_norm": 2.284985303878784, "learning_rate": 1.2259548583173409e-05, "loss": 1.1511, "step": 31460 }, { "epoch": 8.409941207910208, "grad_norm": 2.1557559967041016, "learning_rate": 1.2219302032545266e-05, "loss": 1.1971, "step": 31470 }, { "epoch": 8.412613575628006, "grad_norm": 2.25907826423645, "learning_rate": 1.217911735349384e-05, "loss": 1.2251, "step": 31480 }, { "epoch": 8.415285943345804, "grad_norm": 2.07696533203125, "learning_rate": 1.213899457434291e-05, "loss": 1.1539, "step": 31490 }, { "epoch": 8.417958311063602, "grad_norm": 2.1086347103118896, "learning_rate": 1.2098933723372819e-05, "loss": 1.1878, "step": 31500 }, { "epoch": 8.4206306787814, "grad_norm": 2.1808483600616455, "learning_rate": 1.2058934828820079e-05, "loss": 1.154, "step": 31510 }, { "epoch": 8.423303046499198, "grad_norm": 2.208563804626465, "learning_rate": 1.2018997918877662e-05, "loss": 1.1957, "step": 31520 }, { "epoch": 8.425975414216996, "grad_norm": 2.0855329036712646, "learning_rate": 1.1979123021694782e-05, "loss": 1.1778, "step": 31530 }, { "epoch": 8.428647781934794, "grad_norm": 2.3629298210144043, "learning_rate": 1.1939310165376993e-05, "loss": 1.1381, "step": 31540 }, { "epoch": 8.431320149652592, "grad_norm": 2.230238914489746, "learning_rate": 1.1899559377986035e-05, "loss": 1.163, "step": 31550 }, { "epoch": 8.43399251737039, "grad_norm": 2.171351671218872, "learning_rate": 1.1859870687540043e-05, "loss": 1.2073, "step": 31560 }, { "epoch": 8.436664885088188, "grad_norm": 2.2594099044799805, "learning_rate": 1.1820244122013213e-05, "loss": 1.1977, "step": 31570 }, { "epoch": 8.439337252805986, "grad_norm": 2.267430067062378, "learning_rate": 1.1780679709336085e-05, "loss": 1.2261, "step": 31580 }, { "epoch": 8.442009620523784, "grad_norm": 2.2696616649627686, "learning_rate": 1.1741177477395337e-05, "loss": 1.1302, "step": 31590 }, { "epoch": 8.444681988241582, "grad_norm": 2.0815224647521973, "learning_rate": 1.1701737454033812e-05, "loss": 1.1985, "step": 31600 }, { "epoch": 8.44735435595938, "grad_norm": 2.148184299468994, "learning_rate": 1.166235966705056e-05, "loss": 1.1627, "step": 31610 }, { "epoch": 8.450026723677178, "grad_norm": 2.28935170173645, "learning_rate": 1.1623044144200657e-05, "loss": 1.1723, "step": 31620 }, { "epoch": 8.452699091394976, "grad_norm": 2.2608673572540283, "learning_rate": 1.1583790913195402e-05, "loss": 1.2414, "step": 31630 }, { "epoch": 8.455371459112774, "grad_norm": 2.1188554763793945, "learning_rate": 1.1544600001702133e-05, "loss": 1.2185, "step": 31640 }, { "epoch": 8.458043826830572, "grad_norm": 2.191650629043579, "learning_rate": 1.1505471437344272e-05, "loss": 1.1641, "step": 31650 }, { "epoch": 8.46071619454837, "grad_norm": 2.0754432678222656, "learning_rate": 1.1466405247701307e-05, "loss": 1.2299, "step": 31660 }, { "epoch": 8.463388562266168, "grad_norm": 2.1514980792999268, "learning_rate": 1.1427401460308774e-05, "loss": 1.2263, "step": 31670 }, { "epoch": 8.466060929983966, "grad_norm": 2.1117684841156006, "learning_rate": 1.1388460102658138e-05, "loss": 1.1282, "step": 31680 }, { "epoch": 8.468733297701764, "grad_norm": 2.0499234199523926, "learning_rate": 1.1349581202197012e-05, "loss": 1.1178, "step": 31690 }, { "epoch": 8.471405665419562, "grad_norm": 2.1124424934387207, "learning_rate": 1.1310764786328842e-05, "loss": 1.1398, "step": 31700 }, { "epoch": 8.47407803313736, "grad_norm": 2.0329463481903076, "learning_rate": 1.1272010882413109e-05, "loss": 1.1365, "step": 31710 }, { "epoch": 8.476750400855158, "grad_norm": 2.228235960006714, "learning_rate": 1.1233319517765218e-05, "loss": 1.2108, "step": 31720 }, { "epoch": 8.479422768572956, "grad_norm": 2.252894878387451, "learning_rate": 1.1194690719656486e-05, "loss": 1.1789, "step": 31730 }, { "epoch": 8.482095136290754, "grad_norm": 1.9969401359558105, "learning_rate": 1.1156124515314147e-05, "loss": 1.1997, "step": 31740 }, { "epoch": 8.484767504008552, "grad_norm": 2.015465497970581, "learning_rate": 1.1117620931921313e-05, "loss": 1.1992, "step": 31750 }, { "epoch": 8.48743987172635, "grad_norm": 2.102015733718872, "learning_rate": 1.1079179996616906e-05, "loss": 1.2278, "step": 31760 }, { "epoch": 8.490112239444148, "grad_norm": 2.2371857166290283, "learning_rate": 1.1040801736495799e-05, "loss": 1.164, "step": 31770 }, { "epoch": 8.492784607161946, "grad_norm": 2.1743972301483154, "learning_rate": 1.1002486178608573e-05, "loss": 1.1726, "step": 31780 }, { "epoch": 8.495456974879744, "grad_norm": 2.2615981101989746, "learning_rate": 1.0964233349961684e-05, "loss": 1.1675, "step": 31790 }, { "epoch": 8.498129342597542, "grad_norm": 2.159367561340332, "learning_rate": 1.0926043277517373e-05, "loss": 1.1977, "step": 31800 }, { "epoch": 8.50080171031534, "grad_norm": 2.0810561180114746, "learning_rate": 1.0887915988193575e-05, "loss": 1.1934, "step": 31810 }, { "epoch": 8.503474078033138, "grad_norm": 2.270254611968994, "learning_rate": 1.0849851508864117e-05, "loss": 1.1449, "step": 31820 }, { "epoch": 8.506146445750936, "grad_norm": 2.281573534011841, "learning_rate": 1.0811849866358393e-05, "loss": 1.1947, "step": 31830 }, { "epoch": 8.508818813468734, "grad_norm": 2.147365093231201, "learning_rate": 1.0773911087461597e-05, "loss": 1.1604, "step": 31840 }, { "epoch": 8.511491181186532, "grad_norm": 2.095155954360962, "learning_rate": 1.0736035198914606e-05, "loss": 1.1658, "step": 31850 }, { "epoch": 8.51416354890433, "grad_norm": 2.161773920059204, "learning_rate": 1.069822222741398e-05, "loss": 1.123, "step": 31860 }, { "epoch": 8.516835916622128, "grad_norm": 2.12282133102417, "learning_rate": 1.066047219961186e-05, "loss": 1.2099, "step": 31870 }, { "epoch": 8.519508284339926, "grad_norm": 2.1173250675201416, "learning_rate": 1.0622785142116131e-05, "loss": 1.1547, "step": 31880 }, { "epoch": 8.522180652057724, "grad_norm": 2.0824410915374756, "learning_rate": 1.058516108149018e-05, "loss": 1.1521, "step": 31890 }, { "epoch": 8.524853019775522, "grad_norm": 2.1164462566375732, "learning_rate": 1.0547600044253115e-05, "loss": 1.2265, "step": 31900 }, { "epoch": 8.52752538749332, "grad_norm": 2.152815818786621, "learning_rate": 1.05101020568795e-05, "loss": 1.2396, "step": 31910 }, { "epoch": 8.530197755211116, "grad_norm": 2.2693629264831543, "learning_rate": 1.0472667145799543e-05, "loss": 1.1862, "step": 31920 }, { "epoch": 8.532870122928916, "grad_norm": 2.1222035884857178, "learning_rate": 1.0435295337398943e-05, "loss": 1.1686, "step": 31930 }, { "epoch": 8.535542490646712, "grad_norm": 2.096386432647705, "learning_rate": 1.0397986658018988e-05, "loss": 1.2282, "step": 31940 }, { "epoch": 8.538214858364512, "grad_norm": 2.029423475265503, "learning_rate": 1.0360741133956352e-05, "loss": 1.1486, "step": 31950 }, { "epoch": 8.540887226082308, "grad_norm": 2.0768489837646484, "learning_rate": 1.032355879146334e-05, "loss": 1.1587, "step": 31960 }, { "epoch": 8.543559593800106, "grad_norm": 2.2596304416656494, "learning_rate": 1.0286439656747614e-05, "loss": 1.1796, "step": 31970 }, { "epoch": 8.546231961517904, "grad_norm": 2.1683359146118164, "learning_rate": 1.0249383755972319e-05, "loss": 1.1875, "step": 31980 }, { "epoch": 8.548904329235702, "grad_norm": 1.9842551946640015, "learning_rate": 1.0212391115256049e-05, "loss": 1.1538, "step": 31990 }, { "epoch": 8.5515766969535, "grad_norm": 2.1803505420684814, "learning_rate": 1.017546176067279e-05, "loss": 1.2272, "step": 32000 }, { "epoch": 8.554249064671298, "grad_norm": 2.23764705657959, "learning_rate": 1.0138595718251931e-05, "loss": 1.1616, "step": 32010 }, { "epoch": 8.556921432389096, "grad_norm": 2.2283926010131836, "learning_rate": 1.0101793013978212e-05, "loss": 1.2217, "step": 32020 }, { "epoch": 8.559593800106894, "grad_norm": 2.2320849895477295, "learning_rate": 1.0065053673791748e-05, "loss": 1.1324, "step": 32030 }, { "epoch": 8.562266167824692, "grad_norm": 2.1736326217651367, "learning_rate": 1.0028377723588001e-05, "loss": 1.1829, "step": 32040 }, { "epoch": 8.56493853554249, "grad_norm": 2.1591625213623047, "learning_rate": 9.991765189217739e-06, "loss": 1.1342, "step": 32050 }, { "epoch": 8.567610903260288, "grad_norm": 2.165165662765503, "learning_rate": 9.955216096487052e-06, "loss": 1.248, "step": 32060 }, { "epoch": 8.570283270978086, "grad_norm": 2.13219952583313, "learning_rate": 9.918730471157289e-06, "loss": 1.2312, "step": 32070 }, { "epoch": 8.572955638695884, "grad_norm": 2.083112955093384, "learning_rate": 9.882308338945045e-06, "loss": 1.1996, "step": 32080 }, { "epoch": 8.575628006413682, "grad_norm": 2.139697790145874, "learning_rate": 9.845949725522252e-06, "loss": 1.2121, "step": 32090 }, { "epoch": 8.57830037413148, "grad_norm": 2.1033358573913574, "learning_rate": 9.809654656515955e-06, "loss": 1.1711, "step": 32100 }, { "epoch": 8.580972741849278, "grad_norm": 2.103965997695923, "learning_rate": 9.773423157508489e-06, "loss": 1.2165, "step": 32110 }, { "epoch": 8.583645109567076, "grad_norm": 2.147108554840088, "learning_rate": 9.737255254037347e-06, "loss": 1.2132, "step": 32120 }, { "epoch": 8.586317477284874, "grad_norm": 2.2541441917419434, "learning_rate": 9.701150971595218e-06, "loss": 1.231, "step": 32130 }, { "epoch": 8.588989845002672, "grad_norm": 2.222564697265625, "learning_rate": 9.665110335629935e-06, "loss": 1.1506, "step": 32140 }, { "epoch": 8.59166221272047, "grad_norm": 2.3519718647003174, "learning_rate": 9.629133371544496e-06, "loss": 1.2058, "step": 32150 }, { "epoch": 8.594334580438268, "grad_norm": 2.1793272495269775, "learning_rate": 9.593220104696965e-06, "loss": 1.1983, "step": 32160 }, { "epoch": 8.597006948156066, "grad_norm": 2.234360933303833, "learning_rate": 9.557370560400548e-06, "loss": 1.1081, "step": 32170 }, { "epoch": 8.599679315873864, "grad_norm": 2.244149684906006, "learning_rate": 9.521584763923552e-06, "loss": 1.1293, "step": 32180 }, { "epoch": 8.602351683591662, "grad_norm": 2.090513229370117, "learning_rate": 9.485862740489304e-06, "loss": 1.1629, "step": 32190 }, { "epoch": 8.60502405130946, "grad_norm": 2.1351866722106934, "learning_rate": 9.450204515276251e-06, "loss": 1.1316, "step": 32200 }, { "epoch": 8.607696419027258, "grad_norm": 2.268317699432373, "learning_rate": 9.414610113417766e-06, "loss": 1.1728, "step": 32210 }, { "epoch": 8.610368786745056, "grad_norm": 2.255051851272583, "learning_rate": 9.37907956000237e-06, "loss": 1.2554, "step": 32220 }, { "epoch": 8.613041154462854, "grad_norm": 2.1467652320861816, "learning_rate": 9.34361288007346e-06, "loss": 1.1468, "step": 32230 }, { "epoch": 8.615713522180652, "grad_norm": 2.307605266571045, "learning_rate": 9.308210098629488e-06, "loss": 1.1892, "step": 32240 }, { "epoch": 8.61838588989845, "grad_norm": 2.0895166397094727, "learning_rate": 9.272871240623827e-06, "loss": 1.1816, "step": 32250 }, { "epoch": 8.621058257616248, "grad_norm": 2.0934860706329346, "learning_rate": 9.237596330964838e-06, "loss": 1.1487, "step": 32260 }, { "epoch": 8.623730625334046, "grad_norm": 2.2088074684143066, "learning_rate": 9.202385394515733e-06, "loss": 1.2107, "step": 32270 }, { "epoch": 8.626402993051844, "grad_norm": 2.1643431186676025, "learning_rate": 9.16723845609474e-06, "loss": 1.1752, "step": 32280 }, { "epoch": 8.629075360769642, "grad_norm": 2.221729040145874, "learning_rate": 9.132155540474851e-06, "loss": 1.169, "step": 32290 }, { "epoch": 8.63174772848744, "grad_norm": 2.102905035018921, "learning_rate": 9.097136672384078e-06, "loss": 1.1952, "step": 32300 }, { "epoch": 8.634420096205238, "grad_norm": 2.2897191047668457, "learning_rate": 9.06218187650515e-06, "loss": 1.1807, "step": 32310 }, { "epoch": 8.637092463923036, "grad_norm": 2.1335794925689697, "learning_rate": 9.027291177475716e-06, "loss": 1.2587, "step": 32320 }, { "epoch": 8.639764831640834, "grad_norm": 2.032491683959961, "learning_rate": 8.992464599888229e-06, "loss": 1.171, "step": 32330 }, { "epoch": 8.642437199358632, "grad_norm": 2.24336314201355, "learning_rate": 8.95770216828996e-06, "loss": 1.2135, "step": 32340 }, { "epoch": 8.64510956707643, "grad_norm": 2.183116912841797, "learning_rate": 8.923003907182926e-06, "loss": 1.147, "step": 32350 }, { "epoch": 8.647781934794228, "grad_norm": 2.1955769062042236, "learning_rate": 8.888369841023946e-06, "loss": 1.1265, "step": 32360 }, { "epoch": 8.650454302512026, "grad_norm": 2.3789520263671875, "learning_rate": 8.853799994224599e-06, "loss": 1.1907, "step": 32370 }, { "epoch": 8.653126670229824, "grad_norm": 2.164802074432373, "learning_rate": 8.819294391151168e-06, "loss": 1.2056, "step": 32380 }, { "epoch": 8.655799037947622, "grad_norm": 2.162881851196289, "learning_rate": 8.784853056124709e-06, "loss": 1.0927, "step": 32390 }, { "epoch": 8.65847140566542, "grad_norm": 2.1710457801818848, "learning_rate": 8.750476013420883e-06, "loss": 1.2124, "step": 32400 }, { "epoch": 8.661143773383218, "grad_norm": 2.2281243801116943, "learning_rate": 8.716163287270152e-06, "loss": 1.1615, "step": 32410 }, { "epoch": 8.663816141101016, "grad_norm": 2.126492500305176, "learning_rate": 8.681914901857557e-06, "loss": 1.1706, "step": 32420 }, { "epoch": 8.666488508818814, "grad_norm": 2.3496429920196533, "learning_rate": 8.647730881322813e-06, "loss": 1.2675, "step": 32430 }, { "epoch": 8.669160876536612, "grad_norm": 2.2455477714538574, "learning_rate": 8.6136112497603e-06, "loss": 1.1817, "step": 32440 }, { "epoch": 8.67183324425441, "grad_norm": 2.2622618675231934, "learning_rate": 8.579556031218971e-06, "loss": 1.1119, "step": 32450 }, { "epoch": 8.674505611972208, "grad_norm": 2.265237331390381, "learning_rate": 8.545565249702403e-06, "loss": 1.1981, "step": 32460 }, { "epoch": 8.677177979690006, "grad_norm": 2.200049877166748, "learning_rate": 8.511638929168764e-06, "loss": 1.2551, "step": 32470 }, { "epoch": 8.679850347407804, "grad_norm": 2.108189344406128, "learning_rate": 8.477777093530704e-06, "loss": 1.1802, "step": 32480 }, { "epoch": 8.682522715125602, "grad_norm": 2.2164433002471924, "learning_rate": 8.443979766655585e-06, "loss": 1.2256, "step": 32490 }, { "epoch": 8.6851950828434, "grad_norm": 2.0696816444396973, "learning_rate": 8.410246972365131e-06, "loss": 1.1808, "step": 32500 }, { "epoch": 8.687867450561198, "grad_norm": 2.179058790206909, "learning_rate": 8.37657873443567e-06, "loss": 1.1595, "step": 32510 }, { "epoch": 8.690539818278996, "grad_norm": 2.022372245788574, "learning_rate": 8.342975076598014e-06, "loss": 1.1624, "step": 32520 }, { "epoch": 8.693212185996794, "grad_norm": 2.078016519546509, "learning_rate": 8.309436022537465e-06, "loss": 1.2175, "step": 32530 }, { "epoch": 8.695884553714592, "grad_norm": 2.112997055053711, "learning_rate": 8.275961595893777e-06, "loss": 1.2474, "step": 32540 }, { "epoch": 8.69855692143239, "grad_norm": 2.2092154026031494, "learning_rate": 8.242551820261125e-06, "loss": 1.2105, "step": 32550 }, { "epoch": 8.701229289150188, "grad_norm": 2.2042713165283203, "learning_rate": 8.209206719188168e-06, "loss": 1.2052, "step": 32560 }, { "epoch": 8.703901656867984, "grad_norm": 2.080826759338379, "learning_rate": 8.17592631617794e-06, "loss": 1.1409, "step": 32570 }, { "epoch": 8.706574024585784, "grad_norm": 2.205794095993042, "learning_rate": 8.142710634687911e-06, "loss": 1.1895, "step": 32580 }, { "epoch": 8.70924639230358, "grad_norm": 2.2292401790618896, "learning_rate": 8.109559698129853e-06, "loss": 1.1653, "step": 32590 }, { "epoch": 8.71191876002138, "grad_norm": 2.2298977375030518, "learning_rate": 8.076473529870043e-06, "loss": 1.2368, "step": 32600 }, { "epoch": 8.714591127739176, "grad_norm": 2.172769546508789, "learning_rate": 8.043452153228937e-06, "loss": 1.1762, "step": 32610 }, { "epoch": 8.717263495456974, "grad_norm": 2.3154499530792236, "learning_rate": 8.010495591481481e-06, "loss": 1.2175, "step": 32620 }, { "epoch": 8.719935863174772, "grad_norm": 2.349804401397705, "learning_rate": 7.97760386785682e-06, "loss": 1.1963, "step": 32630 }, { "epoch": 8.72260823089257, "grad_norm": 2.2397024631500244, "learning_rate": 7.944777005538451e-06, "loss": 1.184, "step": 32640 }, { "epoch": 8.725280598610368, "grad_norm": 2.1825761795043945, "learning_rate": 7.912015027664144e-06, "loss": 1.1751, "step": 32650 }, { "epoch": 8.727952966328166, "grad_norm": 2.283320188522339, "learning_rate": 7.87931795732596e-06, "loss": 1.2155, "step": 32660 }, { "epoch": 8.730625334045964, "grad_norm": 2.1549315452575684, "learning_rate": 7.846685817570142e-06, "loss": 1.1893, "step": 32670 }, { "epoch": 8.733297701763762, "grad_norm": 2.342792272567749, "learning_rate": 7.814118631397271e-06, "loss": 1.193, "step": 32680 }, { "epoch": 8.73597006948156, "grad_norm": 2.0795183181762695, "learning_rate": 7.781616421762028e-06, "loss": 1.161, "step": 32690 }, { "epoch": 8.738642437199358, "grad_norm": 2.1530849933624268, "learning_rate": 7.749179211573398e-06, "loss": 1.2372, "step": 32700 }, { "epoch": 8.741314804917156, "grad_norm": 2.294534206390381, "learning_rate": 7.716807023694494e-06, "loss": 1.2193, "step": 32710 }, { "epoch": 8.743987172634954, "grad_norm": 2.171541213989258, "learning_rate": 7.684499880942608e-06, "loss": 1.2213, "step": 32720 }, { "epoch": 8.746659540352752, "grad_norm": 2.1561813354492188, "learning_rate": 7.652257806089213e-06, "loss": 1.1438, "step": 32730 }, { "epoch": 8.74933190807055, "grad_norm": 2.322664260864258, "learning_rate": 7.620080821859876e-06, "loss": 1.1736, "step": 32740 }, { "epoch": 8.752004275788348, "grad_norm": 1.9789481163024902, "learning_rate": 7.587968950934299e-06, "loss": 1.2061, "step": 32750 }, { "epoch": 8.754676643506146, "grad_norm": 1.9960687160491943, "learning_rate": 7.55592221594632e-06, "loss": 1.134, "step": 32760 }, { "epoch": 8.757349011223944, "grad_norm": 2.099275588989258, "learning_rate": 7.523940639483828e-06, "loss": 1.2045, "step": 32770 }, { "epoch": 8.760021378941742, "grad_norm": 2.18982195854187, "learning_rate": 7.492024244088813e-06, "loss": 1.2143, "step": 32780 }, { "epoch": 8.76269374665954, "grad_norm": 2.0969889163970947, "learning_rate": 7.460173052257314e-06, "loss": 1.1896, "step": 32790 }, { "epoch": 8.765366114377338, "grad_norm": 2.050344467163086, "learning_rate": 7.42838708643937e-06, "loss": 1.1792, "step": 32800 }, { "epoch": 8.768038482095136, "grad_norm": 2.1144275665283203, "learning_rate": 7.396666369039152e-06, "loss": 1.225, "step": 32810 }, { "epoch": 8.770710849812934, "grad_norm": 2.132254123687744, "learning_rate": 7.365010922414706e-06, "loss": 1.2049, "step": 32820 }, { "epoch": 8.773383217530732, "grad_norm": 2.2373416423797607, "learning_rate": 7.333420768878174e-06, "loss": 1.2315, "step": 32830 }, { "epoch": 8.77605558524853, "grad_norm": 2.2836244106292725, "learning_rate": 7.301895930695635e-06, "loss": 1.2383, "step": 32840 }, { "epoch": 8.778727952966328, "grad_norm": 2.31648588180542, "learning_rate": 7.270436430087146e-06, "loss": 1.2515, "step": 32850 }, { "epoch": 8.781400320684126, "grad_norm": 2.178370952606201, "learning_rate": 7.239042289226694e-06, "loss": 1.18, "step": 32860 }, { "epoch": 8.784072688401924, "grad_norm": 2.1737213134765625, "learning_rate": 7.207713530242233e-06, "loss": 1.2426, "step": 32870 }, { "epoch": 8.786745056119722, "grad_norm": 2.117929220199585, "learning_rate": 7.176450175215555e-06, "loss": 1.1848, "step": 32880 }, { "epoch": 8.78941742383752, "grad_norm": 2.199974775314331, "learning_rate": 7.145252246182443e-06, "loss": 1.1761, "step": 32890 }, { "epoch": 8.792089791555318, "grad_norm": 2.190769910812378, "learning_rate": 7.114119765132521e-06, "loss": 1.1209, "step": 32900 }, { "epoch": 8.794762159273116, "grad_norm": 2.011537790298462, "learning_rate": 7.08305275400929e-06, "loss": 1.2093, "step": 32910 }, { "epoch": 8.797434526990914, "grad_norm": 2.2044901847839355, "learning_rate": 7.052051234710111e-06, "loss": 1.246, "step": 32920 }, { "epoch": 8.800106894708712, "grad_norm": 2.1224358081817627, "learning_rate": 7.0211152290861285e-06, "loss": 1.1448, "step": 32930 }, { "epoch": 8.80277926242651, "grad_norm": 2.1358206272125244, "learning_rate": 6.990244758942432e-06, "loss": 1.2381, "step": 32940 }, { "epoch": 8.805451630144308, "grad_norm": 2.263728380203247, "learning_rate": 6.959439846037796e-06, "loss": 1.1022, "step": 32950 }, { "epoch": 8.808123997862106, "grad_norm": 2.27034068107605, "learning_rate": 6.92870051208484e-06, "loss": 1.1611, "step": 32960 }, { "epoch": 8.810796365579904, "grad_norm": 2.3890955448150635, "learning_rate": 6.8980267787499556e-06, "loss": 1.2175, "step": 32970 }, { "epoch": 8.813468733297702, "grad_norm": 2.088869571685791, "learning_rate": 6.867418667653325e-06, "loss": 1.2053, "step": 32980 }, { "epoch": 8.8161411010155, "grad_norm": 2.095961809158325, "learning_rate": 6.836876200368802e-06, "loss": 1.1894, "step": 32990 }, { "epoch": 8.818813468733298, "grad_norm": 2.2726595401763916, "learning_rate": 6.806399398424079e-06, "loss": 1.2248, "step": 33000 }, { "epoch": 8.821485836451096, "grad_norm": 2.1984798908233643, "learning_rate": 6.775988283300427e-06, "loss": 1.1446, "step": 33010 }, { "epoch": 8.824158204168894, "grad_norm": 2.169297456741333, "learning_rate": 6.745642876432978e-06, "loss": 1.219, "step": 33020 }, { "epoch": 8.826830571886692, "grad_norm": 2.158966302871704, "learning_rate": 6.715363199210423e-06, "loss": 1.2234, "step": 33030 }, { "epoch": 8.82950293960449, "grad_norm": 2.1342318058013916, "learning_rate": 6.685149272975167e-06, "loss": 1.2455, "step": 33040 }, { "epoch": 8.832175307322288, "grad_norm": 2.495408773422241, "learning_rate": 6.655001119023285e-06, "loss": 1.1708, "step": 33050 }, { "epoch": 8.834847675040086, "grad_norm": 2.3328559398651123, "learning_rate": 6.624918758604492e-06, "loss": 1.2229, "step": 33060 }, { "epoch": 8.837520042757884, "grad_norm": 2.2289695739746094, "learning_rate": 6.594902212922083e-06, "loss": 1.1357, "step": 33070 }, { "epoch": 8.840192410475682, "grad_norm": 2.0448060035705566, "learning_rate": 6.564951503133032e-06, "loss": 1.1718, "step": 33080 }, { "epoch": 8.84286477819348, "grad_norm": 2.258244037628174, "learning_rate": 6.535066650347854e-06, "loss": 1.1893, "step": 33090 }, { "epoch": 8.845537145911278, "grad_norm": 2.1701242923736572, "learning_rate": 6.505247675630665e-06, "loss": 1.2069, "step": 33100 }, { "epoch": 8.848209513629076, "grad_norm": 2.173630952835083, "learning_rate": 6.475494599999188e-06, "loss": 1.251, "step": 33110 }, { "epoch": 8.850881881346874, "grad_norm": 2.2525391578674316, "learning_rate": 6.445807444424601e-06, "loss": 1.2198, "step": 33120 }, { "epoch": 8.853554249064672, "grad_norm": 2.163273334503174, "learning_rate": 6.416186229831722e-06, "loss": 1.1957, "step": 33130 }, { "epoch": 8.85622661678247, "grad_norm": 2.2199954986572266, "learning_rate": 6.386630977098828e-06, "loss": 1.1306, "step": 33140 }, { "epoch": 8.858898984500268, "grad_norm": 2.177464723587036, "learning_rate": 6.357141707057712e-06, "loss": 1.2086, "step": 33150 }, { "epoch": 8.861571352218066, "grad_norm": 2.008685350418091, "learning_rate": 6.327718440493679e-06, "loss": 1.2155, "step": 33160 }, { "epoch": 8.864243719935864, "grad_norm": 2.207615613937378, "learning_rate": 6.298361198145497e-06, "loss": 1.2484, "step": 33170 }, { "epoch": 8.866916087653662, "grad_norm": 2.3766024112701416, "learning_rate": 6.269070000705413e-06, "loss": 1.1647, "step": 33180 }, { "epoch": 8.86958845537146, "grad_norm": 2.1413516998291016, "learning_rate": 6.239844868819111e-06, "loss": 1.1999, "step": 33190 }, { "epoch": 8.872260823089258, "grad_norm": 2.293713331222534, "learning_rate": 6.210685823085671e-06, "loss": 1.1767, "step": 33200 }, { "epoch": 8.874933190807056, "grad_norm": 2.025331497192383, "learning_rate": 6.181592884057686e-06, "loss": 1.1347, "step": 33210 }, { "epoch": 8.877605558524852, "grad_norm": 2.303579568862915, "learning_rate": 6.152566072241061e-06, "loss": 1.1863, "step": 33220 }, { "epoch": 8.880277926242652, "grad_norm": 2.131896495819092, "learning_rate": 6.123605408095146e-06, "loss": 1.1295, "step": 33230 }, { "epoch": 8.882950293960448, "grad_norm": 2.34211802482605, "learning_rate": 6.094710912032642e-06, "loss": 1.1889, "step": 33240 }, { "epoch": 8.885622661678246, "grad_norm": 2.198781728744507, "learning_rate": 6.065882604419637e-06, "loss": 1.2392, "step": 33250 }, { "epoch": 8.888295029396044, "grad_norm": 2.364373207092285, "learning_rate": 6.037120505575544e-06, "loss": 1.2118, "step": 33260 }, { "epoch": 8.890967397113842, "grad_norm": 2.2449607849121094, "learning_rate": 6.008424635773102e-06, "loss": 1.1974, "step": 33270 }, { "epoch": 8.89363976483164, "grad_norm": 2.028512716293335, "learning_rate": 5.979795015238398e-06, "loss": 1.2539, "step": 33280 }, { "epoch": 8.896312132549438, "grad_norm": 2.094280242919922, "learning_rate": 5.951231664150803e-06, "loss": 1.1834, "step": 33290 }, { "epoch": 8.898984500267236, "grad_norm": 2.253570556640625, "learning_rate": 5.922734602642999e-06, "loss": 1.2057, "step": 33300 }, { "epoch": 8.901656867985034, "grad_norm": 2.342904567718506, "learning_rate": 5.894303850800886e-06, "loss": 1.2225, "step": 33310 }, { "epoch": 8.904329235702832, "grad_norm": 2.143791675567627, "learning_rate": 5.865939428663725e-06, "loss": 1.1796, "step": 33320 }, { "epoch": 8.90700160342063, "grad_norm": 2.2972323894500732, "learning_rate": 5.837641356223911e-06, "loss": 1.1537, "step": 33330 }, { "epoch": 8.909673971138428, "grad_norm": 2.1303181648254395, "learning_rate": 5.8094096534272e-06, "loss": 1.1488, "step": 33340 }, { "epoch": 8.912346338856226, "grad_norm": 2.275083065032959, "learning_rate": 5.7812443401724535e-06, "loss": 1.1373, "step": 33350 }, { "epoch": 8.915018706574024, "grad_norm": 2.0265774726867676, "learning_rate": 5.75314543631178e-06, "loss": 1.1475, "step": 33360 }, { "epoch": 8.917691074291822, "grad_norm": 2.5210697650909424, "learning_rate": 5.725112961650514e-06, "loss": 1.1884, "step": 33370 }, { "epoch": 8.92036344200962, "grad_norm": 2.2452547550201416, "learning_rate": 5.697146935947128e-06, "loss": 1.2297, "step": 33380 }, { "epoch": 8.923035809727418, "grad_norm": 2.1646676063537598, "learning_rate": 5.669247378913234e-06, "loss": 1.1503, "step": 33390 }, { "epoch": 8.925708177445216, "grad_norm": 2.314223527908325, "learning_rate": 5.641414310213688e-06, "loss": 1.1617, "step": 33400 }, { "epoch": 8.928380545163014, "grad_norm": 2.2160964012145996, "learning_rate": 5.613647749466377e-06, "loss": 1.1467, "step": 33410 }, { "epoch": 8.931052912880812, "grad_norm": 2.2023355960845947, "learning_rate": 5.585947716242379e-06, "loss": 1.1815, "step": 33420 }, { "epoch": 8.93372528059861, "grad_norm": 2.236827850341797, "learning_rate": 5.558314230065842e-06, "loss": 1.1166, "step": 33430 }, { "epoch": 8.936397648316408, "grad_norm": 2.114706039428711, "learning_rate": 5.530747310414042e-06, "loss": 1.1916, "step": 33440 }, { "epoch": 8.939070016034206, "grad_norm": 2.270576238632202, "learning_rate": 5.5032469767173245e-06, "loss": 1.2367, "step": 33450 }, { "epoch": 8.941742383752004, "grad_norm": 2.239755868911743, "learning_rate": 5.475813248359063e-06, "loss": 1.1749, "step": 33460 }, { "epoch": 8.944414751469802, "grad_norm": 2.1854758262634277, "learning_rate": 5.448446144675745e-06, "loss": 1.1235, "step": 33470 }, { "epoch": 8.9470871191876, "grad_norm": 2.4273905754089355, "learning_rate": 5.421145684956852e-06, "loss": 1.1714, "step": 33480 }, { "epoch": 8.949759486905398, "grad_norm": 2.011171817779541, "learning_rate": 5.3939118884449355e-06, "loss": 1.0991, "step": 33490 }, { "epoch": 8.952431854623196, "grad_norm": 2.3507046699523926, "learning_rate": 5.366744774335519e-06, "loss": 1.1888, "step": 33500 }, { "epoch": 8.955104222340994, "grad_norm": 2.2492406368255615, "learning_rate": 5.339644361777163e-06, "loss": 1.1963, "step": 33510 }, { "epoch": 8.957776590058792, "grad_norm": 2.262593984603882, "learning_rate": 5.312610669871354e-06, "loss": 1.1745, "step": 33520 }, { "epoch": 8.96044895777659, "grad_norm": 2.1749863624572754, "learning_rate": 5.285643717672628e-06, "loss": 1.2471, "step": 33530 }, { "epoch": 8.963121325494388, "grad_norm": 2.265610933303833, "learning_rate": 5.258743524188425e-06, "loss": 1.1776, "step": 33540 }, { "epoch": 8.965793693212186, "grad_norm": 2.293970823287964, "learning_rate": 5.231910108379145e-06, "loss": 1.1752, "step": 33550 }, { "epoch": 8.968466060929984, "grad_norm": 2.2802977561950684, "learning_rate": 5.205143489158138e-06, "loss": 1.1997, "step": 33560 }, { "epoch": 8.971138428647782, "grad_norm": 2.1155900955200195, "learning_rate": 5.178443685391643e-06, "loss": 1.2017, "step": 33570 }, { "epoch": 8.97381079636558, "grad_norm": 2.154723644256592, "learning_rate": 5.151810715898819e-06, "loss": 1.2119, "step": 33580 }, { "epoch": 8.976483164083378, "grad_norm": 2.347046375274658, "learning_rate": 5.12524459945174e-06, "loss": 1.1224, "step": 33590 }, { "epoch": 8.979155531801176, "grad_norm": 2.1975371837615967, "learning_rate": 5.098745354775303e-06, "loss": 1.1923, "step": 33600 }, { "epoch": 8.981827899518974, "grad_norm": 2.0898139476776123, "learning_rate": 5.072313000547335e-06, "loss": 1.1291, "step": 33610 }, { "epoch": 8.984500267236772, "grad_norm": 2.156890630722046, "learning_rate": 5.045947555398467e-06, "loss": 1.2336, "step": 33620 }, { "epoch": 8.98717263495457, "grad_norm": 2.219177722930908, "learning_rate": 5.0196490379121885e-06, "loss": 1.2069, "step": 33630 }, { "epoch": 8.989845002672368, "grad_norm": 2.0741376876831055, "learning_rate": 4.993417466624839e-06, "loss": 1.1385, "step": 33640 }, { "epoch": 8.992517370390166, "grad_norm": 2.0923755168914795, "learning_rate": 4.967252860025506e-06, "loss": 1.186, "step": 33650 }, { "epoch": 8.995189738107964, "grad_norm": 2.1709890365600586, "learning_rate": 4.941155236556161e-06, "loss": 1.2221, "step": 33660 }, { "epoch": 8.997862105825762, "grad_norm": 2.1641249656677246, "learning_rate": 4.9151246146115e-06, "loss": 1.1887, "step": 33670 }, { "epoch": 9.00053447354356, "grad_norm": 2.1118524074554443, "learning_rate": 4.889161012539012e-06, "loss": 1.0973, "step": 33680 }, { "epoch": 9.003206841261358, "grad_norm": 2.1524829864501953, "learning_rate": 4.86326444863896e-06, "loss": 1.2212, "step": 33690 }, { "epoch": 9.005879208979156, "grad_norm": 2.078460693359375, "learning_rate": 4.837434941164354e-06, "loss": 1.1211, "step": 33700 }, { "epoch": 9.008551576696954, "grad_norm": 2.1573588848114014, "learning_rate": 4.8116725083208966e-06, "loss": 1.1962, "step": 33710 }, { "epoch": 9.011223944414752, "grad_norm": 2.1500027179718018, "learning_rate": 4.785977168267097e-06, "loss": 1.0888, "step": 33720 }, { "epoch": 9.01389631213255, "grad_norm": 2.233577251434326, "learning_rate": 4.760348939114068e-06, "loss": 1.147, "step": 33730 }, { "epoch": 9.016568679850348, "grad_norm": 2.1687660217285156, "learning_rate": 4.734787838925736e-06, "loss": 1.1486, "step": 33740 }, { "epoch": 9.019241047568146, "grad_norm": 2.109074592590332, "learning_rate": 4.709293885718624e-06, "loss": 1.1148, "step": 33750 }, { "epoch": 9.021913415285944, "grad_norm": 1.9952311515808105, "learning_rate": 4.683867097461947e-06, "loss": 1.113, "step": 33760 }, { "epoch": 9.024585783003742, "grad_norm": 2.157128095626831, "learning_rate": 4.658507492077613e-06, "loss": 1.119, "step": 33770 }, { "epoch": 9.02725815072154, "grad_norm": 2.2225191593170166, "learning_rate": 4.633215087440157e-06, "loss": 1.1487, "step": 33780 }, { "epoch": 9.029930518439338, "grad_norm": 2.1577110290527344, "learning_rate": 4.607989901376708e-06, "loss": 1.1143, "step": 33790 }, { "epoch": 9.032602886157136, "grad_norm": 2.2246758937835693, "learning_rate": 4.5828319516670655e-06, "loss": 1.1549, "step": 33800 }, { "epoch": 9.035275253874934, "grad_norm": 2.27089524269104, "learning_rate": 4.557741256043646e-06, "loss": 1.1351, "step": 33810 }, { "epoch": 9.037947621592732, "grad_norm": 2.040964126586914, "learning_rate": 4.5327178321914135e-06, "loss": 1.1093, "step": 33820 }, { "epoch": 9.04061998931053, "grad_norm": 2.337263345718384, "learning_rate": 4.507761697747981e-06, "loss": 1.1322, "step": 33830 }, { "epoch": 9.043292357028328, "grad_norm": 2.1500494480133057, "learning_rate": 4.482872870303434e-06, "loss": 1.1621, "step": 33840 }, { "epoch": 9.045964724746126, "grad_norm": 2.226703643798828, "learning_rate": 4.458051367400551e-06, "loss": 1.1566, "step": 33850 }, { "epoch": 9.048637092463924, "grad_norm": 2.3370070457458496, "learning_rate": 4.433297206534537e-06, "loss": 1.1253, "step": 33860 }, { "epoch": 9.051309460181722, "grad_norm": 2.151838541030884, "learning_rate": 4.408610405153191e-06, "loss": 1.1143, "step": 33870 }, { "epoch": 9.053981827899518, "grad_norm": 2.4844799041748047, "learning_rate": 4.383990980656816e-06, "loss": 1.2107, "step": 33880 }, { "epoch": 9.056654195617316, "grad_norm": 2.3924453258514404, "learning_rate": 4.359438950398242e-06, "loss": 1.1648, "step": 33890 }, { "epoch": 9.059326563335114, "grad_norm": 2.0562124252319336, "learning_rate": 4.3349543316827836e-06, "loss": 1.087, "step": 33900 }, { "epoch": 9.061998931052912, "grad_norm": 2.1791796684265137, "learning_rate": 4.310537141768267e-06, "loss": 1.1827, "step": 33910 }, { "epoch": 9.06467129877071, "grad_norm": 2.0512163639068604, "learning_rate": 4.286187397864905e-06, "loss": 1.2029, "step": 33920 }, { "epoch": 9.067343666488508, "grad_norm": 2.287886381149292, "learning_rate": 4.261905117135512e-06, "loss": 1.1932, "step": 33930 }, { "epoch": 9.070016034206306, "grad_norm": 2.1602370738983154, "learning_rate": 4.237690316695208e-06, "loss": 1.1276, "step": 33940 }, { "epoch": 9.072688401924104, "grad_norm": 2.3687829971313477, "learning_rate": 4.213543013611654e-06, "loss": 1.1071, "step": 33950 }, { "epoch": 9.075360769641902, "grad_norm": 2.4160573482513428, "learning_rate": 4.1894632249048996e-06, "loss": 1.1945, "step": 33960 }, { "epoch": 9.0780331373597, "grad_norm": 2.2797768115997314, "learning_rate": 4.16545096754738e-06, "loss": 1.1039, "step": 33970 }, { "epoch": 9.080705505077498, "grad_norm": 2.1881837844848633, "learning_rate": 4.14150625846399e-06, "loss": 1.0811, "step": 33980 }, { "epoch": 9.083377872795296, "grad_norm": 2.1956918239593506, "learning_rate": 4.117629114531951e-06, "loss": 1.1245, "step": 33990 }, { "epoch": 9.086050240513094, "grad_norm": 2.291219711303711, "learning_rate": 4.093819552580902e-06, "loss": 1.218, "step": 34000 }, { "epoch": 9.088722608230892, "grad_norm": 2.258108615875244, "learning_rate": 4.0700775893928515e-06, "loss": 1.2022, "step": 34010 }, { "epoch": 9.09139497594869, "grad_norm": 2.1044251918792725, "learning_rate": 4.0464032417021345e-06, "loss": 1.1566, "step": 34020 }, { "epoch": 9.094067343666488, "grad_norm": 2.324979782104492, "learning_rate": 4.022796526195416e-06, "loss": 1.1808, "step": 34030 }, { "epoch": 9.096739711384286, "grad_norm": 2.1966042518615723, "learning_rate": 3.999257459511774e-06, "loss": 1.1668, "step": 34040 }, { "epoch": 9.099412079102084, "grad_norm": 2.2304556369781494, "learning_rate": 3.975786058242481e-06, "loss": 1.1108, "step": 34050 }, { "epoch": 9.102084446819882, "grad_norm": 2.3114962577819824, "learning_rate": 3.952382338931238e-06, "loss": 1.1604, "step": 34060 }, { "epoch": 9.10475681453768, "grad_norm": 2.3876705169677734, "learning_rate": 3.929046318073937e-06, "loss": 1.1378, "step": 34070 }, { "epoch": 9.107429182255478, "grad_norm": 2.2183847427368164, "learning_rate": 3.905778012118821e-06, "loss": 1.1565, "step": 34080 }, { "epoch": 9.110101549973276, "grad_norm": 2.278012752532959, "learning_rate": 3.882577437466395e-06, "loss": 1.1854, "step": 34090 }, { "epoch": 9.112773917691074, "grad_norm": 2.2312142848968506, "learning_rate": 3.859444610469398e-06, "loss": 1.1526, "step": 34100 }, { "epoch": 9.115446285408872, "grad_norm": 2.153632402420044, "learning_rate": 3.836379547432823e-06, "loss": 1.1506, "step": 34110 }, { "epoch": 9.11811865312667, "grad_norm": 2.2880208492279053, "learning_rate": 3.8133822646139293e-06, "loss": 1.1634, "step": 34120 }, { "epoch": 9.120791020844468, "grad_norm": 2.2320468425750732, "learning_rate": 3.7904527782221754e-06, "loss": 1.1958, "step": 34130 }, { "epoch": 9.123463388562266, "grad_norm": 2.315049648284912, "learning_rate": 3.7675911044192327e-06, "loss": 1.136, "step": 34140 }, { "epoch": 9.126135756280064, "grad_norm": 2.208770990371704, "learning_rate": 3.7447972593189884e-06, "loss": 1.1423, "step": 34150 }, { "epoch": 9.128808123997862, "grad_norm": 2.2792012691497803, "learning_rate": 3.722071258987514e-06, "loss": 1.1834, "step": 34160 }, { "epoch": 9.13148049171566, "grad_norm": 2.2620413303375244, "learning_rate": 3.6994131194430846e-06, "loss": 1.1729, "step": 34170 }, { "epoch": 9.134152859433458, "grad_norm": 2.3530848026275635, "learning_rate": 3.6768228566560812e-06, "loss": 1.1507, "step": 34180 }, { "epoch": 9.136825227151256, "grad_norm": 2.227083683013916, "learning_rate": 3.6543004865491113e-06, "loss": 1.2029, "step": 34190 }, { "epoch": 9.139497594869054, "grad_norm": 2.284367561340332, "learning_rate": 3.6318460249968987e-06, "loss": 1.0919, "step": 34200 }, { "epoch": 9.142169962586852, "grad_norm": 2.215280294418335, "learning_rate": 3.6094594878262942e-06, "loss": 1.2249, "step": 34210 }, { "epoch": 9.14484233030465, "grad_norm": 2.3096508979797363, "learning_rate": 3.5871408908162986e-06, "loss": 1.1523, "step": 34220 }, { "epoch": 9.147514698022448, "grad_norm": 2.094024896621704, "learning_rate": 3.564890249698005e-06, "loss": 1.1408, "step": 34230 }, { "epoch": 9.150187065740246, "grad_norm": 2.1089279651641846, "learning_rate": 3.5427075801545915e-06, "loss": 1.2022, "step": 34240 }, { "epoch": 9.152859433458044, "grad_norm": 2.203495740890503, "learning_rate": 3.520592897821395e-06, "loss": 1.1853, "step": 34250 }, { "epoch": 9.155531801175842, "grad_norm": 2.32757830619812, "learning_rate": 3.4985462182857366e-06, "loss": 1.1672, "step": 34260 }, { "epoch": 9.15820416889364, "grad_norm": 2.2531614303588867, "learning_rate": 3.476567557087085e-06, "loss": 1.131, "step": 34270 }, { "epoch": 9.160876536611438, "grad_norm": 2.1226727962493896, "learning_rate": 3.4546569297169372e-06, "loss": 1.129, "step": 34280 }, { "epoch": 9.163548904329236, "grad_norm": 2.287727117538452, "learning_rate": 3.432814351618818e-06, "loss": 1.2042, "step": 34290 }, { "epoch": 9.166221272047034, "grad_norm": 2.398174524307251, "learning_rate": 3.4110398381883237e-06, "loss": 1.1827, "step": 34300 }, { "epoch": 9.168893639764832, "grad_norm": 2.2923951148986816, "learning_rate": 3.3893334047730653e-06, "loss": 1.1051, "step": 34310 }, { "epoch": 9.17156600748263, "grad_norm": 2.2338578701019287, "learning_rate": 3.3676950666726493e-06, "loss": 1.1521, "step": 34320 }, { "epoch": 9.174238375200428, "grad_norm": 2.015608787536621, "learning_rate": 3.346124839138687e-06, "loss": 1.2117, "step": 34330 }, { "epoch": 9.176910742918226, "grad_norm": 2.1158697605133057, "learning_rate": 3.3246227373748164e-06, "loss": 1.1213, "step": 34340 }, { "epoch": 9.179583110636024, "grad_norm": 2.2086422443389893, "learning_rate": 3.3031887765366254e-06, "loss": 1.1276, "step": 34350 }, { "epoch": 9.182255478353822, "grad_norm": 2.3656766414642334, "learning_rate": 3.2818229717316964e-06, "loss": 1.1717, "step": 34360 }, { "epoch": 9.18492784607162, "grad_norm": 2.2100043296813965, "learning_rate": 3.260525338019527e-06, "loss": 1.2049, "step": 34370 }, { "epoch": 9.187600213789418, "grad_norm": 2.2874975204467773, "learning_rate": 3.239295890411631e-06, "loss": 1.1129, "step": 34380 }, { "epoch": 9.190272581507216, "grad_norm": 2.130533218383789, "learning_rate": 3.2181346438714065e-06, "loss": 1.0837, "step": 34390 }, { "epoch": 9.192944949225014, "grad_norm": 2.237393379211426, "learning_rate": 3.1970416133142112e-06, "loss": 1.1709, "step": 34400 }, { "epoch": 9.195617316942812, "grad_norm": 2.257607936859131, "learning_rate": 3.1760168136073074e-06, "loss": 1.1382, "step": 34410 }, { "epoch": 9.19828968466061, "grad_norm": 2.200047492980957, "learning_rate": 3.1550602595698863e-06, "loss": 1.1314, "step": 34420 }, { "epoch": 9.200962052378408, "grad_norm": 2.082170248031616, "learning_rate": 3.1341719659729764e-06, "loss": 1.1748, "step": 34430 }, { "epoch": 9.203634420096206, "grad_norm": 2.2494056224823, "learning_rate": 3.1133519475396e-06, "loss": 1.1469, "step": 34440 }, { "epoch": 9.206306787814004, "grad_norm": 2.122774839401245, "learning_rate": 3.0926002189445302e-06, "loss": 1.124, "step": 34450 }, { "epoch": 9.208979155531802, "grad_norm": 2.2540276050567627, "learning_rate": 3.071916794814522e-06, "loss": 1.1909, "step": 34460 }, { "epoch": 9.2116515232496, "grad_norm": 2.2521321773529053, "learning_rate": 3.051301689728103e-06, "loss": 1.2312, "step": 34470 }, { "epoch": 9.214323890967398, "grad_norm": 2.194652557373047, "learning_rate": 3.0307549182156834e-06, "loss": 1.0393, "step": 34480 }, { "epoch": 9.216996258685196, "grad_norm": 2.143259286880493, "learning_rate": 3.010276494759512e-06, "loss": 1.0846, "step": 34490 }, { "epoch": 9.219668626402994, "grad_norm": 2.321810245513916, "learning_rate": 2.9898664337936644e-06, "loss": 1.1721, "step": 34500 }, { "epoch": 9.222340994120792, "grad_norm": 2.3118536472320557, "learning_rate": 2.969524749703978e-06, "loss": 1.1513, "step": 34510 }, { "epoch": 9.225013361838588, "grad_norm": 2.189937114715576, "learning_rate": 2.949251456828184e-06, "loss": 1.0958, "step": 34520 }, { "epoch": 9.227685729556386, "grad_norm": 2.18630313873291, "learning_rate": 2.9290465694557402e-06, "loss": 1.1264, "step": 34530 }, { "epoch": 9.230358097274184, "grad_norm": 2.2235612869262695, "learning_rate": 2.9089101018279107e-06, "loss": 1.0883, "step": 34540 }, { "epoch": 9.233030464991982, "grad_norm": 2.0618996620178223, "learning_rate": 2.8888420681377647e-06, "loss": 1.0961, "step": 34550 }, { "epoch": 9.23570283270978, "grad_norm": 2.2291855812072754, "learning_rate": 2.8688424825300543e-06, "loss": 1.1772, "step": 34560 }, { "epoch": 9.238375200427578, "grad_norm": 2.2478034496307373, "learning_rate": 2.8489113591013917e-06, "loss": 1.1674, "step": 34570 }, { "epoch": 9.241047568145376, "grad_norm": 2.2389354705810547, "learning_rate": 2.829048711900062e-06, "loss": 1.2088, "step": 34580 }, { "epoch": 9.243719935863174, "grad_norm": 2.271409034729004, "learning_rate": 2.8092545549260997e-06, "loss": 1.1155, "step": 34590 }, { "epoch": 9.246392303580972, "grad_norm": 2.2548491954803467, "learning_rate": 2.7895289021312666e-06, "loss": 1.1604, "step": 34600 }, { "epoch": 9.24906467129877, "grad_norm": 2.3220374584198, "learning_rate": 2.7698717674190633e-06, "loss": 1.1154, "step": 34610 }, { "epoch": 9.251737039016568, "grad_norm": 2.199079990386963, "learning_rate": 2.7502831646446625e-06, "loss": 1.1318, "step": 34620 }, { "epoch": 9.254409406734366, "grad_norm": 2.409346580505371, "learning_rate": 2.7307631076149644e-06, "loss": 1.1433, "step": 34630 }, { "epoch": 9.257081774452164, "grad_norm": 2.2329723834991455, "learning_rate": 2.711311610088485e-06, "loss": 1.1395, "step": 34640 }, { "epoch": 9.259754142169962, "grad_norm": 2.2747905254364014, "learning_rate": 2.6919286857755355e-06, "loss": 1.1135, "step": 34650 }, { "epoch": 9.26242650988776, "grad_norm": 2.1231606006622314, "learning_rate": 2.6726143483379873e-06, "loss": 1.1629, "step": 34660 }, { "epoch": 9.265098877605558, "grad_norm": 2.458559989929199, "learning_rate": 2.6533686113894175e-06, "loss": 1.1092, "step": 34670 }, { "epoch": 9.267771245323356, "grad_norm": 2.1900291442871094, "learning_rate": 2.6341914884950414e-06, "loss": 1.1332, "step": 34680 }, { "epoch": 9.270443613041154, "grad_norm": 2.2296600341796875, "learning_rate": 2.6150829931717137e-06, "loss": 1.1402, "step": 34690 }, { "epoch": 9.273115980758952, "grad_norm": 2.1724588871002197, "learning_rate": 2.596043138887927e-06, "loss": 1.157, "step": 34700 }, { "epoch": 9.27578834847675, "grad_norm": 2.071732997894287, "learning_rate": 2.5770719390637467e-06, "loss": 1.1205, "step": 34710 }, { "epoch": 9.278460716194548, "grad_norm": 2.1276488304138184, "learning_rate": 2.5581694070709204e-06, "loss": 1.1443, "step": 34720 }, { "epoch": 9.281133083912346, "grad_norm": 2.182807445526123, "learning_rate": 2.539335556232736e-06, "loss": 1.0873, "step": 34730 }, { "epoch": 9.283805451630144, "grad_norm": 2.1881489753723145, "learning_rate": 2.520570399824118e-06, "loss": 1.1431, "step": 34740 }, { "epoch": 9.286477819347942, "grad_norm": 2.231375217437744, "learning_rate": 2.5018739510715093e-06, "loss": 1.2077, "step": 34750 }, { "epoch": 9.28915018706574, "grad_norm": 2.198413610458374, "learning_rate": 2.483246223153013e-06, "loss": 1.1648, "step": 34760 }, { "epoch": 9.291822554783538, "grad_norm": 2.266705274581909, "learning_rate": 2.4646872291981926e-06, "loss": 1.1607, "step": 34770 }, { "epoch": 9.294494922501336, "grad_norm": 2.1397669315338135, "learning_rate": 2.4461969822882735e-06, "loss": 1.0897, "step": 34780 }, { "epoch": 9.297167290219134, "grad_norm": 2.2377161979675293, "learning_rate": 2.4277754954559197e-06, "loss": 1.1907, "step": 34790 }, { "epoch": 9.299839657936932, "grad_norm": 2.3266890048980713, "learning_rate": 2.4094227816854107e-06, "loss": 1.1884, "step": 34800 }, { "epoch": 9.30251202565473, "grad_norm": 2.2268149852752686, "learning_rate": 2.3911388539125112e-06, "loss": 1.2367, "step": 34810 }, { "epoch": 9.305184393372528, "grad_norm": 2.183056116104126, "learning_rate": 2.372923725024523e-06, "loss": 1.1417, "step": 34820 }, { "epoch": 9.307856761090326, "grad_norm": 2.2253060340881348, "learning_rate": 2.354777407860209e-06, "loss": 1.1454, "step": 34830 }, { "epoch": 9.310529128808124, "grad_norm": 2.1809163093566895, "learning_rate": 2.3366999152099166e-06, "loss": 1.1244, "step": 34840 }, { "epoch": 9.313201496525922, "grad_norm": 2.2653419971466064, "learning_rate": 2.3186912598154086e-06, "loss": 1.1478, "step": 34850 }, { "epoch": 9.31587386424372, "grad_norm": 2.2308356761932373, "learning_rate": 2.300751454369943e-06, "loss": 1.1471, "step": 34860 }, { "epoch": 9.318546231961518, "grad_norm": 2.16866397857666, "learning_rate": 2.2828805115182816e-06, "loss": 1.1804, "step": 34870 }, { "epoch": 9.321218599679316, "grad_norm": 2.1032817363739014, "learning_rate": 2.2650784438566052e-06, "loss": 1.159, "step": 34880 }, { "epoch": 9.323890967397114, "grad_norm": 2.2278592586517334, "learning_rate": 2.2473452639325986e-06, "loss": 1.23, "step": 34890 }, { "epoch": 9.326563335114912, "grad_norm": 2.134263515472412, "learning_rate": 2.2296809842453194e-06, "loss": 1.1183, "step": 34900 }, { "epoch": 9.32923570283271, "grad_norm": 2.315495014190674, "learning_rate": 2.2120856172453408e-06, "loss": 1.1538, "step": 34910 }, { "epoch": 9.331908070550508, "grad_norm": 2.3744022846221924, "learning_rate": 2.1945591753346207e-06, "loss": 1.0838, "step": 34920 }, { "epoch": 9.334580438268306, "grad_norm": 2.1596031188964844, "learning_rate": 2.1771016708665217e-06, "loss": 1.1911, "step": 34930 }, { "epoch": 9.337252805986104, "grad_norm": 2.2654311656951904, "learning_rate": 2.159713116145867e-06, "loss": 1.1786, "step": 34940 }, { "epoch": 9.339925173703902, "grad_norm": 2.3472368717193604, "learning_rate": 2.1423935234288315e-06, "loss": 1.1545, "step": 34950 }, { "epoch": 9.3425975414217, "grad_norm": 2.106304168701172, "learning_rate": 2.1251429049230054e-06, "loss": 1.1079, "step": 34960 }, { "epoch": 9.345269909139498, "grad_norm": 2.091672897338867, "learning_rate": 2.107961272787362e-06, "loss": 1.2343, "step": 34970 }, { "epoch": 9.347942276857296, "grad_norm": 1.9937862157821655, "learning_rate": 2.090848639132248e-06, "loss": 1.162, "step": 34980 }, { "epoch": 9.350614644575094, "grad_norm": 2.2787647247314453, "learning_rate": 2.0738050160193813e-06, "loss": 1.119, "step": 34990 }, { "epoch": 9.353287012292892, "grad_norm": 2.159125566482544, "learning_rate": 2.056830415461819e-06, "loss": 1.1531, "step": 35000 }, { "epoch": 9.35595938001069, "grad_norm": 2.2107110023498535, "learning_rate": 2.039924849423991e-06, "loss": 1.1955, "step": 35010 }, { "epoch": 9.358631747728488, "grad_norm": 2.2851288318634033, "learning_rate": 2.0230883298216762e-06, "loss": 1.1427, "step": 35020 }, { "epoch": 9.361304115446286, "grad_norm": 2.156013250350952, "learning_rate": 2.0063208685219494e-06, "loss": 1.1963, "step": 35030 }, { "epoch": 9.363976483164084, "grad_norm": 2.153143882751465, "learning_rate": 1.9896224773432335e-06, "loss": 1.1267, "step": 35040 }, { "epoch": 9.366648850881882, "grad_norm": 2.4556376934051514, "learning_rate": 1.9729931680552814e-06, "loss": 1.0745, "step": 35050 }, { "epoch": 9.36932121859968, "grad_norm": 2.2956066131591797, "learning_rate": 1.9564329523791283e-06, "loss": 1.077, "step": 35060 }, { "epoch": 9.371993586317478, "grad_norm": 1.9940216541290283, "learning_rate": 1.939941841987114e-06, "loss": 1.1743, "step": 35070 }, { "epoch": 9.374665954035276, "grad_norm": 2.188178062438965, "learning_rate": 1.9235198485028973e-06, "loss": 1.1612, "step": 35080 }, { "epoch": 9.377338321753074, "grad_norm": 2.148359537124634, "learning_rate": 1.9071669835013627e-06, "loss": 1.1766, "step": 35090 }, { "epoch": 9.380010689470872, "grad_norm": 2.1279687881469727, "learning_rate": 1.8908832585087466e-06, "loss": 1.1796, "step": 35100 }, { "epoch": 9.38268305718867, "grad_norm": 2.182737350463867, "learning_rate": 1.8746686850024897e-06, "loss": 1.0833, "step": 35110 }, { "epoch": 9.385355424906468, "grad_norm": 2.1898319721221924, "learning_rate": 1.8585232744113169e-06, "loss": 1.2201, "step": 35120 }, { "epoch": 9.388027792624266, "grad_norm": 2.154783010482788, "learning_rate": 1.8424470381152137e-06, "loss": 1.2013, "step": 35130 }, { "epoch": 9.390700160342064, "grad_norm": 2.2670187950134277, "learning_rate": 1.826439987445383e-06, "loss": 1.1101, "step": 35140 }, { "epoch": 9.393372528059862, "grad_norm": 2.1478779315948486, "learning_rate": 1.8105021336842665e-06, "loss": 1.1717, "step": 35150 }, { "epoch": 9.39604489577766, "grad_norm": 2.2820193767547607, "learning_rate": 1.7946334880655668e-06, "loss": 1.1958, "step": 35160 }, { "epoch": 9.398717263495456, "grad_norm": 2.1999762058258057, "learning_rate": 1.7788340617741595e-06, "loss": 1.1903, "step": 35170 }, { "epoch": 9.401389631213254, "grad_norm": 2.127655506134033, "learning_rate": 1.7631038659461807e-06, "loss": 1.1321, "step": 35180 }, { "epoch": 9.404061998931052, "grad_norm": 2.36655855178833, "learning_rate": 1.7474429116689173e-06, "loss": 1.1444, "step": 35190 }, { "epoch": 9.40673436664885, "grad_norm": 2.2247183322906494, "learning_rate": 1.7318512099808949e-06, "loss": 1.1389, "step": 35200 }, { "epoch": 9.409406734366648, "grad_norm": 2.115464687347412, "learning_rate": 1.716328771871789e-06, "loss": 1.1312, "step": 35210 }, { "epoch": 9.412079102084446, "grad_norm": 2.1210875511169434, "learning_rate": 1.7008756082825038e-06, "loss": 1.2016, "step": 35220 }, { "epoch": 9.414751469802244, "grad_norm": 2.1347267627716064, "learning_rate": 1.68549173010506e-06, "loss": 1.1493, "step": 35230 }, { "epoch": 9.417423837520042, "grad_norm": 2.0214762687683105, "learning_rate": 1.6701771481826834e-06, "loss": 1.0916, "step": 35240 }, { "epoch": 9.42009620523784, "grad_norm": 2.2505288124084473, "learning_rate": 1.6549318733097395e-06, "loss": 1.1825, "step": 35250 }, { "epoch": 9.422768572955638, "grad_norm": 2.238849401473999, "learning_rate": 1.6397559162317555e-06, "loss": 1.1622, "step": 35260 }, { "epoch": 9.425440940673436, "grad_norm": 2.153085947036743, "learning_rate": 1.6246492876453968e-06, "loss": 1.1791, "step": 35270 }, { "epoch": 9.428113308391234, "grad_norm": 2.3326046466827393, "learning_rate": 1.6096119981984237e-06, "loss": 1.1357, "step": 35280 }, { "epoch": 9.430785676109032, "grad_norm": 2.0615546703338623, "learning_rate": 1.5946440584898136e-06, "loss": 1.1313, "step": 35290 }, { "epoch": 9.43345804382683, "grad_norm": 2.2044572830200195, "learning_rate": 1.5797454790695609e-06, "loss": 1.211, "step": 35300 }, { "epoch": 9.436130411544628, "grad_norm": 2.285611867904663, "learning_rate": 1.564916270438843e-06, "loss": 1.2329, "step": 35310 }, { "epoch": 9.438802779262426, "grad_norm": 2.199610710144043, "learning_rate": 1.5501564430499104e-06, "loss": 1.1352, "step": 35320 }, { "epoch": 9.441475146980224, "grad_norm": 2.1760666370391846, "learning_rate": 1.53546600730613e-06, "loss": 1.1469, "step": 35330 }, { "epoch": 9.444147514698022, "grad_norm": 2.078403949737549, "learning_rate": 1.5208449735619302e-06, "loss": 1.1517, "step": 35340 }, { "epoch": 9.44681988241582, "grad_norm": 2.2231712341308594, "learning_rate": 1.506293352122856e-06, "loss": 1.1508, "step": 35350 }, { "epoch": 9.449492250133618, "grad_norm": 2.1282825469970703, "learning_rate": 1.4918111532454815e-06, "loss": 1.1357, "step": 35360 }, { "epoch": 9.452164617851416, "grad_norm": 2.149660348892212, "learning_rate": 1.4773983871375186e-06, "loss": 1.1193, "step": 35370 }, { "epoch": 9.454836985569214, "grad_norm": 2.314764976501465, "learning_rate": 1.4630550639576635e-06, "loss": 1.1809, "step": 35380 }, { "epoch": 9.457509353287012, "grad_norm": 2.2163586616516113, "learning_rate": 1.448781193815718e-06, "loss": 1.1336, "step": 35390 }, { "epoch": 9.46018172100481, "grad_norm": 2.264066696166992, "learning_rate": 1.4345767867725123e-06, "loss": 1.1527, "step": 35400 }, { "epoch": 9.462854088722608, "grad_norm": 2.1492066383361816, "learning_rate": 1.4204418528399155e-06, "loss": 1.1342, "step": 35410 }, { "epoch": 9.465526456440406, "grad_norm": 2.2571280002593994, "learning_rate": 1.4063764019808468e-06, "loss": 1.1842, "step": 35420 }, { "epoch": 9.468198824158204, "grad_norm": 2.078209638595581, "learning_rate": 1.3923804441092092e-06, "loss": 1.2063, "step": 35430 }, { "epoch": 9.470871191876002, "grad_norm": 2.1398227214813232, "learning_rate": 1.3784539890899563e-06, "loss": 1.0818, "step": 35440 }, { "epoch": 9.4735435595938, "grad_norm": 2.125175952911377, "learning_rate": 1.364597046739058e-06, "loss": 1.1194, "step": 35450 }, { "epoch": 9.476215927311598, "grad_norm": 2.2943966388702393, "learning_rate": 1.3508096268234793e-06, "loss": 1.1576, "step": 35460 }, { "epoch": 9.478888295029396, "grad_norm": 2.25311017036438, "learning_rate": 1.3370917390611692e-06, "loss": 1.162, "step": 35470 }, { "epoch": 9.481560662747194, "grad_norm": 2.2590370178222656, "learning_rate": 1.3234433931210932e-06, "loss": 1.1264, "step": 35480 }, { "epoch": 9.484233030464992, "grad_norm": 2.213134288787842, "learning_rate": 1.3098645986231673e-06, "loss": 1.1393, "step": 35490 }, { "epoch": 9.48690539818279, "grad_norm": 2.335373878479004, "learning_rate": 1.2963553651383243e-06, "loss": 1.1824, "step": 35500 }, { "epoch": 9.489577765900588, "grad_norm": 2.319972515106201, "learning_rate": 1.2829157021884363e-06, "loss": 1.1844, "step": 35510 }, { "epoch": 9.492250133618386, "grad_norm": 2.00939679145813, "learning_rate": 1.2695456192463596e-06, "loss": 1.0984, "step": 35520 }, { "epoch": 9.494922501336184, "grad_norm": 2.128068685531616, "learning_rate": 1.2562451257358887e-06, "loss": 1.1354, "step": 35530 }, { "epoch": 9.497594869053982, "grad_norm": 2.1643667221069336, "learning_rate": 1.2430142310317805e-06, "loss": 1.1354, "step": 35540 }, { "epoch": 9.50026723677178, "grad_norm": 2.1348328590393066, "learning_rate": 1.2298529444597306e-06, "loss": 1.179, "step": 35550 }, { "epoch": 9.502939604489578, "grad_norm": 2.144758939743042, "learning_rate": 1.2167612752963965e-06, "loss": 1.1704, "step": 35560 }, { "epoch": 9.505611972207376, "grad_norm": 1.9720749855041504, "learning_rate": 1.2037392327693075e-06, "loss": 1.1452, "step": 35570 }, { "epoch": 9.508284339925174, "grad_norm": 2.160682439804077, "learning_rate": 1.1907868260569888e-06, "loss": 1.2285, "step": 35580 }, { "epoch": 9.510956707642972, "grad_norm": 2.20426869392395, "learning_rate": 1.177904064288826e-06, "loss": 1.1771, "step": 35590 }, { "epoch": 9.51362907536077, "grad_norm": 2.1965248584747314, "learning_rate": 1.1650909565451562e-06, "loss": 1.1335, "step": 35600 }, { "epoch": 9.516301443078568, "grad_norm": 2.211765766143799, "learning_rate": 1.1523475118571768e-06, "loss": 1.1642, "step": 35610 }, { "epoch": 9.518973810796366, "grad_norm": 2.3630828857421875, "learning_rate": 1.1396737392070479e-06, "loss": 1.161, "step": 35620 }, { "epoch": 9.521646178514164, "grad_norm": 2.196058511734009, "learning_rate": 1.1270696475277565e-06, "loss": 1.2022, "step": 35630 }, { "epoch": 9.524318546231962, "grad_norm": 2.264314889907837, "learning_rate": 1.1145352457032077e-06, "loss": 1.1206, "step": 35640 }, { "epoch": 9.52699091394976, "grad_norm": 2.4490818977355957, "learning_rate": 1.1020705425681898e-06, "loss": 1.1671, "step": 35650 }, { "epoch": 9.529663281667558, "grad_norm": 2.1642322540283203, "learning_rate": 1.089675546908353e-06, "loss": 1.1532, "step": 35660 }, { "epoch": 9.532335649385356, "grad_norm": 2.2017457485198975, "learning_rate": 1.0773502674602199e-06, "loss": 1.2436, "step": 35670 }, { "epoch": 9.535008017103154, "grad_norm": 2.204679250717163, "learning_rate": 1.0650947129111521e-06, "loss": 1.1109, "step": 35680 }, { "epoch": 9.537680384820952, "grad_norm": 2.31233811378479, "learning_rate": 1.052908891899429e-06, "loss": 1.1487, "step": 35690 }, { "epoch": 9.54035275253875, "grad_norm": 2.1574692726135254, "learning_rate": 1.040792813014102e-06, "loss": 1.2181, "step": 35700 }, { "epoch": 9.543025120256548, "grad_norm": 2.236241340637207, "learning_rate": 1.0287464847951177e-06, "loss": 1.1333, "step": 35710 }, { "epoch": 9.545697487974346, "grad_norm": 2.260523557662964, "learning_rate": 1.016769915733229e-06, "loss": 1.1369, "step": 35720 }, { "epoch": 9.548369855692144, "grad_norm": 2.384070873260498, "learning_rate": 1.0048631142700383e-06, "loss": 1.1293, "step": 35730 }, { "epoch": 9.551042223409942, "grad_norm": 2.217766761779785, "learning_rate": 9.93026088797977e-07, "loss": 1.165, "step": 35740 }, { "epoch": 9.55371459112774, "grad_norm": 2.254021644592285, "learning_rate": 9.812588476602712e-07, "loss": 1.1192, "step": 35750 }, { "epoch": 9.556386958845538, "grad_norm": 2.1699182987213135, "learning_rate": 9.695613991509644e-07, "loss": 1.1211, "step": 35760 }, { "epoch": 9.559059326563336, "grad_norm": 2.119053363800049, "learning_rate": 9.57933751514939e-07, "loss": 1.1655, "step": 35770 }, { "epoch": 9.561731694281134, "grad_norm": 2.1433210372924805, "learning_rate": 9.4637591294785e-07, "loss": 1.1766, "step": 35780 }, { "epoch": 9.564404061998932, "grad_norm": 2.2094874382019043, "learning_rate": 9.348878915961479e-07, "loss": 1.1003, "step": 35790 }, { "epoch": 9.567076429716728, "grad_norm": 2.401143789291382, "learning_rate": 9.234696955570776e-07, "loss": 1.1256, "step": 35800 }, { "epoch": 9.569748797434528, "grad_norm": 2.1601922512054443, "learning_rate": 9.121213328786793e-07, "loss": 1.1402, "step": 35810 }, { "epoch": 9.572421165152324, "grad_norm": 2.0809834003448486, "learning_rate": 9.008428115597766e-07, "loss": 1.1373, "step": 35820 }, { "epoch": 9.575093532870122, "grad_norm": 2.3695173263549805, "learning_rate": 8.896341395499108e-07, "loss": 1.1881, "step": 35830 }, { "epoch": 9.57776590058792, "grad_norm": 2.322136163711548, "learning_rate": 8.784953247494732e-07, "loss": 1.148, "step": 35840 }, { "epoch": 9.580438268305718, "grad_norm": 2.2366907596588135, "learning_rate": 8.674263750095502e-07, "loss": 1.1159, "step": 35850 }, { "epoch": 9.583110636023516, "grad_norm": 2.4116995334625244, "learning_rate": 8.564272981320121e-07, "loss": 1.2513, "step": 35860 }, { "epoch": 9.585783003741314, "grad_norm": 2.315009832382202, "learning_rate": 8.454981018694796e-07, "loss": 1.188, "step": 35870 }, { "epoch": 9.588455371459112, "grad_norm": 2.3434865474700928, "learning_rate": 8.346387939253352e-07, "loss": 1.1815, "step": 35880 }, { "epoch": 9.59112773917691, "grad_norm": 2.3306496143341064, "learning_rate": 8.238493819536452e-07, "loss": 1.1481, "step": 35890 }, { "epoch": 9.593800106894708, "grad_norm": 2.046522617340088, "learning_rate": 8.131298735592818e-07, "loss": 1.1876, "step": 35900 }, { "epoch": 9.596472474612506, "grad_norm": 2.22929310798645, "learning_rate": 8.024802762977901e-07, "loss": 1.2076, "step": 35910 }, { "epoch": 9.599144842330304, "grad_norm": 2.208465337753296, "learning_rate": 7.919005976754656e-07, "loss": 1.1427, "step": 35920 }, { "epoch": 9.601817210048102, "grad_norm": 2.2475030422210693, "learning_rate": 7.813908451493102e-07, "loss": 1.3049, "step": 35930 }, { "epoch": 9.6044895777659, "grad_norm": 2.2951815128326416, "learning_rate": 7.709510261270647e-07, "loss": 1.1495, "step": 35940 }, { "epoch": 9.607161945483698, "grad_norm": 2.2295608520507812, "learning_rate": 7.605811479671321e-07, "loss": 1.1845, "step": 35950 }, { "epoch": 9.609834313201496, "grad_norm": 2.262756109237671, "learning_rate": 7.502812179786767e-07, "loss": 1.1801, "step": 35960 }, { "epoch": 9.612506680919294, "grad_norm": 2.2262392044067383, "learning_rate": 7.400512434214912e-07, "loss": 1.2225, "step": 35970 }, { "epoch": 9.615179048637092, "grad_norm": 2.133723497390747, "learning_rate": 7.298912315061302e-07, "loss": 1.0648, "step": 35980 }, { "epoch": 9.61785141635489, "grad_norm": 2.137983798980713, "learning_rate": 7.198011893937761e-07, "loss": 1.1054, "step": 35990 }, { "epoch": 9.620523784072688, "grad_norm": 2.0405776500701904, "learning_rate": 7.097811241963403e-07, "loss": 1.1628, "step": 36000 }, { "epoch": 9.623196151790486, "grad_norm": 2.210630178451538, "learning_rate": 6.998310429763843e-07, "loss": 1.137, "step": 36010 }, { "epoch": 9.625868519508284, "grad_norm": 2.083991289138794, "learning_rate": 6.899509527471537e-07, "loss": 1.1504, "step": 36020 }, { "epoch": 9.628540887226082, "grad_norm": 2.0966291427612305, "learning_rate": 6.801408604725446e-07, "loss": 1.1015, "step": 36030 }, { "epoch": 9.63121325494388, "grad_norm": 2.172142505645752, "learning_rate": 6.70400773067148e-07, "loss": 1.1719, "step": 36040 }, { "epoch": 9.633885622661678, "grad_norm": 2.178936719894409, "learning_rate": 6.607306973961724e-07, "loss": 1.1747, "step": 36050 }, { "epoch": 9.636557990379476, "grad_norm": 2.1253082752227783, "learning_rate": 6.511306402755101e-07, "loss": 1.1207, "step": 36060 }, { "epoch": 9.639230358097274, "grad_norm": 2.2981467247009277, "learning_rate": 6.41600608471693e-07, "loss": 1.0727, "step": 36070 }, { "epoch": 9.641902725815072, "grad_norm": 2.133519172668457, "learning_rate": 6.321406087018811e-07, "loss": 1.1964, "step": 36080 }, { "epoch": 9.64457509353287, "grad_norm": 2.155704975128174, "learning_rate": 6.227506476338963e-07, "loss": 1.1886, "step": 36090 }, { "epoch": 9.647247461250668, "grad_norm": 2.2018682956695557, "learning_rate": 6.134307318861776e-07, "loss": 1.2259, "step": 36100 }, { "epoch": 9.649919828968466, "grad_norm": 2.2575747966766357, "learning_rate": 6.041808680278038e-07, "loss": 1.1502, "step": 36110 }, { "epoch": 9.652592196686264, "grad_norm": 2.2044577598571777, "learning_rate": 5.950010625784707e-07, "loss": 1.1595, "step": 36120 }, { "epoch": 9.655264564404062, "grad_norm": 2.363152265548706, "learning_rate": 5.858913220085027e-07, "loss": 1.1607, "step": 36130 }, { "epoch": 9.65793693212186, "grad_norm": 2.114800214767456, "learning_rate": 5.76851652738819e-07, "loss": 1.2111, "step": 36140 }, { "epoch": 9.660609299839658, "grad_norm": 2.2297019958496094, "learning_rate": 5.678820611409785e-07, "loss": 1.1889, "step": 36150 }, { "epoch": 9.663281667557456, "grad_norm": 2.206164598464966, "learning_rate": 5.58982553537124e-07, "loss": 1.1724, "step": 36160 }, { "epoch": 9.665954035275254, "grad_norm": 2.4453980922698975, "learning_rate": 5.501531362000046e-07, "loss": 1.1992, "step": 36170 }, { "epoch": 9.668626402993052, "grad_norm": 2.4066483974456787, "learning_rate": 5.413938153529752e-07, "loss": 1.2186, "step": 36180 }, { "epoch": 9.67129877071085, "grad_norm": 2.136568784713745, "learning_rate": 5.327045971699862e-07, "loss": 1.173, "step": 36190 }, { "epoch": 9.673971138428648, "grad_norm": 2.1053264141082764, "learning_rate": 5.240854877755497e-07, "loss": 1.1664, "step": 36200 }, { "epoch": 9.676643506146446, "grad_norm": 2.2664544582366943, "learning_rate": 5.155364932447837e-07, "loss": 1.2088, "step": 36210 }, { "epoch": 9.679315873864244, "grad_norm": 2.215672254562378, "learning_rate": 5.070576196034127e-07, "loss": 1.1669, "step": 36220 }, { "epoch": 9.681988241582042, "grad_norm": 2.0596985816955566, "learning_rate": 4.986488728276673e-07, "loss": 1.1515, "step": 36230 }, { "epoch": 9.68466060929984, "grad_norm": 2.085028886795044, "learning_rate": 4.903102588444175e-07, "loss": 1.15, "step": 36240 }, { "epoch": 9.687332977017638, "grad_norm": 2.119126081466675, "learning_rate": 4.820417835310731e-07, "loss": 1.1824, "step": 36250 }, { "epoch": 9.690005344735436, "grad_norm": 2.221674919128418, "learning_rate": 4.738434527155944e-07, "loss": 1.1601, "step": 36260 }, { "epoch": 9.692677712453234, "grad_norm": 2.2603559494018555, "learning_rate": 4.6571527217652566e-07, "loss": 1.1711, "step": 36270 }, { "epoch": 9.695350080171032, "grad_norm": 2.2842955589294434, "learning_rate": 4.576572476429508e-07, "loss": 1.1509, "step": 36280 }, { "epoch": 9.69802244788883, "grad_norm": 2.3427512645721436, "learning_rate": 4.4966938479449326e-07, "loss": 1.1184, "step": 36290 }, { "epoch": 9.700694815606628, "grad_norm": 2.1294307708740234, "learning_rate": 4.417516892613716e-07, "loss": 1.1445, "step": 36300 }, { "epoch": 9.703367183324426, "grad_norm": 2.2346582412719727, "learning_rate": 4.3390416662428823e-07, "loss": 1.1325, "step": 36310 }, { "epoch": 9.706039551042224, "grad_norm": 2.1415388584136963, "learning_rate": 4.2612682241452986e-07, "loss": 1.0958, "step": 36320 }, { "epoch": 9.708711918760022, "grad_norm": 2.217890977859497, "learning_rate": 4.1841966211388914e-07, "loss": 1.1301, "step": 36330 }, { "epoch": 9.71138428647782, "grad_norm": 2.219364881515503, "learning_rate": 4.107826911546986e-07, "loss": 1.1017, "step": 36340 }, { "epoch": 9.714056654195618, "grad_norm": 2.173316717147827, "learning_rate": 4.0321591491983e-07, "loss": 1.159, "step": 36350 }, { "epoch": 9.716729021913416, "grad_norm": 2.2166037559509277, "learning_rate": 3.9571933874266163e-07, "loss": 1.1681, "step": 36360 }, { "epoch": 9.719401389631214, "grad_norm": 2.2103919982910156, "learning_rate": 3.882929679071112e-07, "loss": 1.1577, "step": 36370 }, { "epoch": 9.722073757349012, "grad_norm": 2.0758190155029297, "learning_rate": 3.8093680764758054e-07, "loss": 1.2246, "step": 36380 }, { "epoch": 9.72474612506681, "grad_norm": 2.353356122970581, "learning_rate": 3.736508631490332e-07, "loss": 1.1729, "step": 36390 }, { "epoch": 9.727418492784608, "grad_norm": 1.9840309619903564, "learning_rate": 3.664351395468835e-07, "loss": 1.068, "step": 36400 }, { "epoch": 9.730090860502406, "grad_norm": 2.2292327880859375, "learning_rate": 3.592896419271075e-07, "loss": 1.1288, "step": 36410 }, { "epoch": 9.732763228220204, "grad_norm": 2.07846999168396, "learning_rate": 3.522143753261431e-07, "loss": 1.1392, "step": 36420 }, { "epoch": 9.735435595938002, "grad_norm": 2.6675517559051514, "learning_rate": 3.452093447309346e-07, "loss": 1.1, "step": 36430 }, { "epoch": 9.7381079636558, "grad_norm": 2.240673303604126, "learning_rate": 3.3827455507894345e-07, "loss": 1.1353, "step": 36440 }, { "epoch": 9.740780331373596, "grad_norm": 2.311814785003662, "learning_rate": 3.31410011258082e-07, "loss": 1.1452, "step": 36450 }, { "epoch": 9.743452699091396, "grad_norm": 2.222055673599243, "learning_rate": 3.2461571810679104e-07, "loss": 1.1698, "step": 36460 }, { "epoch": 9.746125066809192, "grad_norm": 2.24259614944458, "learning_rate": 3.178916804139731e-07, "loss": 1.1597, "step": 36470 }, { "epoch": 9.74879743452699, "grad_norm": 2.257625102996826, "learning_rate": 3.112379029190149e-07, "loss": 1.1344, "step": 36480 }, { "epoch": 9.751469802244788, "grad_norm": 2.3131158351898193, "learning_rate": 3.04654390311776e-07, "loss": 1.1931, "step": 36490 }, { "epoch": 9.754142169962586, "grad_norm": 2.151228427886963, "learning_rate": 2.981411472326112e-07, "loss": 1.2228, "step": 36500 }, { "epoch": 9.756814537680384, "grad_norm": 2.3429644107818604, "learning_rate": 2.91698178272326e-07, "loss": 1.1212, "step": 36510 }, { "epoch": 9.759486905398182, "grad_norm": 2.1811740398406982, "learning_rate": 2.853254879721989e-07, "loss": 1.1278, "step": 36520 }, { "epoch": 9.76215927311598, "grad_norm": 2.123075008392334, "learning_rate": 2.790230808239591e-07, "loss": 1.1174, "step": 36530 }, { "epoch": 9.764831640833778, "grad_norm": 2.2247934341430664, "learning_rate": 2.7279096126984203e-07, "loss": 1.1534, "step": 36540 }, { "epoch": 9.767504008551576, "grad_norm": 2.342512845993042, "learning_rate": 2.6662913370248956e-07, "loss": 1.1981, "step": 36550 }, { "epoch": 9.770176376269374, "grad_norm": 2.2304458618164062, "learning_rate": 2.6053760246501634e-07, "loss": 1.1064, "step": 36560 }, { "epoch": 9.772848743987172, "grad_norm": 2.1226627826690674, "learning_rate": 2.545163718510102e-07, "loss": 1.1067, "step": 36570 }, { "epoch": 9.77552111170497, "grad_norm": 2.2388312816619873, "learning_rate": 2.485654461044873e-07, "loss": 1.1208, "step": 36580 }, { "epoch": 9.778193479422768, "grad_norm": 2.459993839263916, "learning_rate": 2.4268482941989244e-07, "loss": 1.1374, "step": 36590 }, { "epoch": 9.780865847140566, "grad_norm": 2.284939765930176, "learning_rate": 2.3687452594216563e-07, "loss": 1.1587, "step": 36600 }, { "epoch": 9.783538214858364, "grad_norm": 2.184542179107666, "learning_rate": 2.3113453976663092e-07, "loss": 1.1153, "step": 36610 }, { "epoch": 9.786210582576162, "grad_norm": 2.275195360183716, "learning_rate": 2.254648749390964e-07, "loss": 1.205, "step": 36620 }, { "epoch": 9.78888295029396, "grad_norm": 2.168830394744873, "learning_rate": 2.198655354557766e-07, "loss": 1.0811, "step": 36630 }, { "epoch": 9.791555318011758, "grad_norm": 2.296387195587158, "learning_rate": 2.143365252633145e-07, "loss": 1.1222, "step": 36640 }, { "epoch": 9.794227685729556, "grad_norm": 2.32746958732605, "learning_rate": 2.0887784825880385e-07, "loss": 1.087, "step": 36650 }, { "epoch": 9.796900053447354, "grad_norm": 2.0658833980560303, "learning_rate": 2.034895082897448e-07, "loss": 1.0945, "step": 36660 }, { "epoch": 9.799572421165152, "grad_norm": 2.0493061542510986, "learning_rate": 1.981715091540659e-07, "loss": 1.1167, "step": 36670 }, { "epoch": 9.80224478888295, "grad_norm": 2.1636769771575928, "learning_rate": 1.929238546001355e-07, "loss": 1.1381, "step": 36680 }, { "epoch": 9.804917156600748, "grad_norm": 2.239959239959717, "learning_rate": 1.8774654832671712e-07, "loss": 1.1333, "step": 36690 }, { "epoch": 9.807589524318546, "grad_norm": 2.1792783737182617, "learning_rate": 1.8263959398298058e-07, "loss": 1.1398, "step": 36700 }, { "epoch": 9.810261892036344, "grad_norm": 2.1390347480773926, "learning_rate": 1.776029951685465e-07, "loss": 1.096, "step": 36710 }, { "epoch": 9.812934259754142, "grad_norm": 2.2639973163604736, "learning_rate": 1.726367554334196e-07, "loss": 1.2209, "step": 36720 }, { "epoch": 9.81560662747194, "grad_norm": 2.128241539001465, "learning_rate": 1.6774087827801098e-07, "loss": 1.1625, "step": 36730 }, { "epoch": 9.818278995189738, "grad_norm": 2.096963405609131, "learning_rate": 1.6291536715314915e-07, "loss": 1.1966, "step": 36740 }, { "epoch": 9.820951362907536, "grad_norm": 2.1843740940093994, "learning_rate": 1.5816022546005782e-07, "loss": 1.1901, "step": 36750 }, { "epoch": 9.823623730625334, "grad_norm": 2.144131660461426, "learning_rate": 1.534754565503671e-07, "loss": 1.1724, "step": 36760 }, { "epoch": 9.826296098343132, "grad_norm": 2.0758657455444336, "learning_rate": 1.4886106372609122e-07, "loss": 1.1841, "step": 36770 }, { "epoch": 9.82896846606093, "grad_norm": 2.2896933555603027, "learning_rate": 1.443170502396507e-07, "loss": 1.1162, "step": 36780 }, { "epoch": 9.831640833778728, "grad_norm": 2.102187395095825, "learning_rate": 1.3984341929388356e-07, "loss": 1.1621, "step": 36790 }, { "epoch": 9.834313201496526, "grad_norm": 1.845706820487976, "learning_rate": 1.3544017404196751e-07, "loss": 1.0704, "step": 36800 }, { "epoch": 9.836985569214324, "grad_norm": 2.300069570541382, "learning_rate": 1.3110731758750882e-07, "loss": 1.1541, "step": 36810 }, { "epoch": 9.839657936932122, "grad_norm": 2.137861728668213, "learning_rate": 1.2684485298449789e-07, "loss": 1.1428, "step": 36820 }, { "epoch": 9.84233030464992, "grad_norm": 2.2372660636901855, "learning_rate": 1.2265278323728703e-07, "loss": 1.1281, "step": 36830 }, { "epoch": 9.845002672367718, "grad_norm": 2.1376864910125732, "learning_rate": 1.1853111130063487e-07, "loss": 1.1876, "step": 36840 }, { "epoch": 9.847675040085516, "grad_norm": 2.1345643997192383, "learning_rate": 1.1447984007966206e-07, "loss": 1.1304, "step": 36850 }, { "epoch": 9.850347407803314, "grad_norm": 2.3281424045562744, "learning_rate": 1.1049897242988438e-07, "loss": 1.1783, "step": 36860 }, { "epoch": 9.853019775521112, "grad_norm": 2.2077133655548096, "learning_rate": 1.0658851115719071e-07, "loss": 1.101, "step": 36870 }, { "epoch": 9.85569214323891, "grad_norm": 2.10617995262146, "learning_rate": 1.0274845901783182e-07, "loss": 1.1355, "step": 36880 }, { "epoch": 9.858364510956708, "grad_norm": 2.1513755321502686, "learning_rate": 9.897881871844262e-08, "loss": 1.1561, "step": 36890 }, { "epoch": 9.861036878674506, "grad_norm": 2.424665689468384, "learning_rate": 9.527959291603105e-08, "loss": 1.2086, "step": 36900 }, { "epoch": 9.863709246392304, "grad_norm": 2.4013102054595947, "learning_rate": 9.165078421795593e-08, "loss": 1.2428, "step": 36910 }, { "epoch": 9.866381614110102, "grad_norm": 2.4641964435577393, "learning_rate": 8.809239518198231e-08, "loss": 1.1453, "step": 36920 }, { "epoch": 9.8690539818279, "grad_norm": 2.3074302673339844, "learning_rate": 8.460442831619286e-08, "loss": 1.1758, "step": 36930 }, { "epoch": 9.871726349545698, "grad_norm": 2.1441409587860107, "learning_rate": 8.118688607905433e-08, "loss": 1.1102, "step": 36940 }, { "epoch": 9.874398717263496, "grad_norm": 2.108098268508911, "learning_rate": 7.783977087940652e-08, "loss": 1.1661, "step": 36950 }, { "epoch": 9.877071084981294, "grad_norm": 2.226764678955078, "learning_rate": 7.456308507642896e-08, "loss": 1.119, "step": 36960 }, { "epoch": 9.879743452699092, "grad_norm": 2.0593080520629883, "learning_rate": 7.135683097968527e-08, "loss": 1.0746, "step": 36970 }, { "epoch": 9.88241582041689, "grad_norm": 2.4087886810302734, "learning_rate": 6.822101084906774e-08, "loss": 1.1997, "step": 36980 }, { "epoch": 9.885088188134688, "grad_norm": 2.1702661514282227, "learning_rate": 6.515562689483056e-08, "loss": 1.1641, "step": 36990 }, { "epoch": 9.887760555852486, "grad_norm": 2.217583179473877, "learning_rate": 6.2160681277601e-08, "loss": 1.1708, "step": 37000 }, { "epoch": 9.890432923570284, "grad_norm": 2.226370096206665, "learning_rate": 5.923617610833487e-08, "loss": 1.2202, "step": 37010 }, { "epoch": 9.893105291288082, "grad_norm": 2.27646803855896, "learning_rate": 5.638211344836108e-08, "loss": 1.1542, "step": 37020 }, { "epoch": 9.89577765900588, "grad_norm": 2.170738697052002, "learning_rate": 5.359849530932603e-08, "loss": 1.1387, "step": 37030 }, { "epoch": 9.898450026723678, "grad_norm": 2.182894468307495, "learning_rate": 5.088532365324916e-08, "loss": 1.1364, "step": 37040 }, { "epoch": 9.901122394441476, "grad_norm": 2.218874216079712, "learning_rate": 4.824260039250072e-08, "loss": 1.1479, "step": 37050 }, { "epoch": 9.903794762159274, "grad_norm": 1.9596279859542847, "learning_rate": 4.567032738976851e-08, "loss": 1.143, "step": 37060 }, { "epoch": 9.906467129877072, "grad_norm": 2.1424190998077393, "learning_rate": 4.316850645810222e-08, "loss": 1.1494, "step": 37070 }, { "epoch": 9.909139497594868, "grad_norm": 2.3702521324157715, "learning_rate": 4.07371393609024e-08, "loss": 1.1363, "step": 37080 }, { "epoch": 9.911811865312668, "grad_norm": 2.1741983890533447, "learning_rate": 3.83762278118982e-08, "loss": 1.1109, "step": 37090 }, { "epoch": 9.914484233030464, "grad_norm": 2.1364736557006836, "learning_rate": 3.6085773475147413e-08, "loss": 1.1601, "step": 37100 }, { "epoch": 9.917156600748264, "grad_norm": 2.38227915763855, "learning_rate": 3.386577796508084e-08, "loss": 1.2247, "step": 37110 }, { "epoch": 9.91982896846606, "grad_norm": 2.1709582805633545, "learning_rate": 3.171624284642461e-08, "loss": 1.1875, "step": 37120 }, { "epoch": 9.922501336183858, "grad_norm": 2.435558319091797, "learning_rate": 2.9637169634277872e-08, "loss": 1.1001, "step": 37130 }, { "epoch": 9.925173703901656, "grad_norm": 2.0772573947906494, "learning_rate": 2.7628559794057317e-08, "loss": 1.138, "step": 37140 }, { "epoch": 9.927846071619454, "grad_norm": 2.2174742221832275, "learning_rate": 2.569041474150824e-08, "loss": 1.1254, "step": 37150 }, { "epoch": 9.930518439337252, "grad_norm": 2.2710330486297607, "learning_rate": 2.3822735842726763e-08, "loss": 1.1296, "step": 37160 }, { "epoch": 9.93319080705505, "grad_norm": 2.1777970790863037, "learning_rate": 2.202552441412653e-08, "loss": 1.1039, "step": 37170 }, { "epoch": 9.935863174772848, "grad_norm": 2.194032669067383, "learning_rate": 2.0298781722460912e-08, "loss": 1.1462, "step": 37180 }, { "epoch": 9.938535542490646, "grad_norm": 2.1591246128082275, "learning_rate": 1.86425089848119e-08, "loss": 1.1566, "step": 37190 }, { "epoch": 9.941207910208444, "grad_norm": 2.110175132751465, "learning_rate": 1.705670736860121e-08, "loss": 1.138, "step": 37200 }, { "epoch": 9.943880277926242, "grad_norm": 2.1057093143463135, "learning_rate": 1.5541377991545868e-08, "loss": 1.175, "step": 37210 }, { "epoch": 9.94655264564404, "grad_norm": 2.1288132667541504, "learning_rate": 1.409652192173594e-08, "loss": 1.187, "step": 37220 }, { "epoch": 9.949225013361838, "grad_norm": 2.1421315670013428, "learning_rate": 1.2722140177567899e-08, "loss": 1.1366, "step": 37230 }, { "epoch": 9.951897381079636, "grad_norm": 2.2259390354156494, "learning_rate": 1.1418233727744642e-08, "loss": 1.1012, "step": 37240 }, { "epoch": 9.954569748797434, "grad_norm": 2.059415578842163, "learning_rate": 1.0184803491342099e-08, "loss": 1.1875, "step": 37250 }, { "epoch": 9.957242116515232, "grad_norm": 2.463301420211792, "learning_rate": 9.021850337709303e-09, "loss": 1.0934, "step": 37260 }, { "epoch": 9.95991448423303, "grad_norm": 2.4153430461883545, "learning_rate": 7.929375086557223e-09, "loss": 1.0634, "step": 37270 }, { "epoch": 9.962586851950828, "grad_norm": 2.280428886413574, "learning_rate": 6.907378507903239e-09, "loss": 1.1093, "step": 37280 }, { "epoch": 9.965259219668626, "grad_norm": 2.283224582672119, "learning_rate": 5.9558613221044615e-09, "loss": 1.1276, "step": 37290 }, { "epoch": 9.967931587386424, "grad_norm": 2.3668198585510254, "learning_rate": 5.074824199824413e-09, "loss": 1.1984, "step": 37300 }, { "epoch": 9.970603955104222, "grad_norm": 2.254871368408203, "learning_rate": 4.264267762044139e-09, "loss": 1.1044, "step": 37310 }, { "epoch": 9.97327632282202, "grad_norm": 2.074324369430542, "learning_rate": 3.524192580106611e-09, "loss": 1.145, "step": 37320 }, { "epoch": 9.975948690539818, "grad_norm": 2.271371603012085, "learning_rate": 2.8545991756168122e-09, "loss": 1.0873, "step": 37330 }, { "epoch": 9.978621058257616, "grad_norm": 2.232445001602173, "learning_rate": 2.255488020552754e-09, "loss": 1.1409, "step": 37340 }, { "epoch": 9.981293425975414, "grad_norm": 2.2682485580444336, "learning_rate": 1.726859537198866e-09, "loss": 1.1406, "step": 37350 }, { "epoch": 9.983965793693212, "grad_norm": 2.0237770080566406, "learning_rate": 1.2687140981348933e-09, "loss": 1.1185, "step": 37360 }, { "epoch": 9.98663816141101, "grad_norm": 2.0874645709991455, "learning_rate": 8.810520262914068e-10, "loss": 1.1659, "step": 37370 }, { "epoch": 9.989310529128808, "grad_norm": 2.3018386363983154, "learning_rate": 5.638735949053953e-10, "loss": 1.1434, "step": 37380 }, { "epoch": 9.991982896846606, "grad_norm": 2.2468602657318115, "learning_rate": 3.171790275424691e-10, "loss": 1.1011, "step": 37390 }, { "epoch": 9.994655264564404, "grad_norm": 2.2814855575561523, "learning_rate": 1.409684980857584e-10, "loss": 1.1761, "step": 37400 }, { "epoch": 9.997327632282202, "grad_norm": 2.0475192070007324, "learning_rate": 3.524213073591298e-11, "loss": 1.1567, "step": 37410 }, { "epoch": 10.0, "grad_norm": 2.189579963684082, "learning_rate": 0.0, "loss": 1.1511, "step": 37420 }, { "epoch": 10.0, "step": 37420, "total_flos": 1.840643671200891e+18, "train_loss": 1.5030417529482818, "train_runtime": 53386.1968, "train_samples_per_second": 2.804, "train_steps_per_second": 0.701 } ], "logging_steps": 10, "max_steps": 37420, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 1.840643671200891e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }