{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.5892491492715407, "eval_steps": 500, "global_step": 20000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.9462457463577036e-05, "grad_norm": 3.3975270929663983, "learning_rate": 2.9455081001472753e-09, "loss": 0.7583, "step": 1 }, { "epoch": 5.892491492715407e-05, "grad_norm": 3.0640388774247445, "learning_rate": 5.8910162002945506e-09, "loss": 0.8509, "step": 2 }, { "epoch": 8.83873723907311e-05, "grad_norm": 3.4068925534044143, "learning_rate": 8.836524300441827e-09, "loss": 0.7815, "step": 3 }, { "epoch": 0.00011784982985430814, "grad_norm": 3.0732441461182867, "learning_rate": 1.1782032400589101e-08, "loss": 0.7146, "step": 4 }, { "epoch": 0.0001473122873178852, "grad_norm": 3.1362673549886906, "learning_rate": 1.4727540500736378e-08, "loss": 0.8727, "step": 5 }, { "epoch": 0.0001767747447814622, "grad_norm": 3.38281076225744, "learning_rate": 1.7673048600883655e-08, "loss": 0.8965, "step": 6 }, { "epoch": 0.00020623720224503925, "grad_norm": 3.707420459343831, "learning_rate": 2.061855670103093e-08, "loss": 1.0532, "step": 7 }, { "epoch": 0.0002356996597086163, "grad_norm": 3.206089955634612, "learning_rate": 2.3564064801178202e-08, "loss": 0.8864, "step": 8 }, { "epoch": 0.00026516211717219333, "grad_norm": 3.4475612030153058, "learning_rate": 2.6509572901325482e-08, "loss": 0.8858, "step": 9 }, { "epoch": 0.0002946245746357704, "grad_norm": 3.283780339440547, "learning_rate": 2.9455081001472756e-08, "loss": 0.8685, "step": 10 }, { "epoch": 0.0003240870320993474, "grad_norm": 3.0982636190279043, "learning_rate": 3.2400589101620036e-08, "loss": 0.8165, "step": 11 }, { "epoch": 0.0003535494895629244, "grad_norm": 3.035198258448494, "learning_rate": 3.534609720176731e-08, "loss": 0.7226, "step": 12 }, { "epoch": 0.0003830119470265015, "grad_norm": 3.5345376932970116, "learning_rate": 3.8291605301914584e-08, "loss": 0.8656, "step": 13 }, { "epoch": 0.0004124744044900785, "grad_norm": 3.2998107831543955, "learning_rate": 4.123711340206186e-08, "loss": 0.6709, "step": 14 }, { "epoch": 0.00044193686195365556, "grad_norm": 3.342657640614706, "learning_rate": 4.418262150220913e-08, "loss": 0.8278, "step": 15 }, { "epoch": 0.0004713993194172326, "grad_norm": 3.4355207489960744, "learning_rate": 4.7128129602356404e-08, "loss": 0.8692, "step": 16 }, { "epoch": 0.0005008617768808096, "grad_norm": 3.6618784758208758, "learning_rate": 5.007363770250369e-08, "loss": 0.9102, "step": 17 }, { "epoch": 0.0005303242343443867, "grad_norm": 3.266906263223464, "learning_rate": 5.3019145802650965e-08, "loss": 0.9601, "step": 18 }, { "epoch": 0.0005597866918079637, "grad_norm": 3.2538398165934854, "learning_rate": 5.596465390279824e-08, "loss": 0.7954, "step": 19 }, { "epoch": 0.0005892491492715408, "grad_norm": 3.377712249749299, "learning_rate": 5.891016200294551e-08, "loss": 0.9325, "step": 20 }, { "epoch": 0.0006187116067351177, "grad_norm": 3.124676962928241, "learning_rate": 6.185567010309278e-08, "loss": 0.7961, "step": 21 }, { "epoch": 0.0006481740641986948, "grad_norm": 3.276958125146171, "learning_rate": 6.480117820324007e-08, "loss": 0.9492, "step": 22 }, { "epoch": 0.0006776365216622719, "grad_norm": 3.405407368856565, "learning_rate": 6.774668630338734e-08, "loss": 0.8213, "step": 23 }, { "epoch": 0.0007070989791258488, "grad_norm": 3.361498990940193, "learning_rate": 7.069219440353462e-08, "loss": 0.7992, "step": 24 }, { "epoch": 0.0007365614365894259, "grad_norm": 3.278256819652335, "learning_rate": 7.363770250368189e-08, "loss": 0.7205, "step": 25 }, { "epoch": 0.000766023894053003, "grad_norm": 3.9838069116736863, "learning_rate": 7.658321060382917e-08, "loss": 0.9412, "step": 26 }, { "epoch": 0.00079548635151658, "grad_norm": 3.66965221047372, "learning_rate": 7.952871870397643e-08, "loss": 0.863, "step": 27 }, { "epoch": 0.000824948808980157, "grad_norm": 3.369145812557608, "learning_rate": 8.247422680412371e-08, "loss": 0.8848, "step": 28 }, { "epoch": 0.0008544112664437341, "grad_norm": 3.2370008589107417, "learning_rate": 8.541973490427098e-08, "loss": 0.9092, "step": 29 }, { "epoch": 0.0008838737239073111, "grad_norm": 3.6467619487192415, "learning_rate": 8.836524300441826e-08, "loss": 0.8708, "step": 30 }, { "epoch": 0.0009133361813708882, "grad_norm": 3.524147424950845, "learning_rate": 9.131075110456556e-08, "loss": 0.9266, "step": 31 }, { "epoch": 0.0009427986388344652, "grad_norm": 3.1607405058327576, "learning_rate": 9.425625920471281e-08, "loss": 0.8853, "step": 32 }, { "epoch": 0.0009722610962980422, "grad_norm": 3.7031060968204654, "learning_rate": 9.72017673048601e-08, "loss": 0.9143, "step": 33 }, { "epoch": 0.0010017235537616192, "grad_norm": 3.0267259055151325, "learning_rate": 1.0014727540500738e-07, "loss": 0.7066, "step": 34 }, { "epoch": 0.0010311860112251962, "grad_norm": 3.382434353626021, "learning_rate": 1.0309278350515465e-07, "loss": 1.0015, "step": 35 }, { "epoch": 0.0010606484686887733, "grad_norm": 3.4583588973861055, "learning_rate": 1.0603829160530193e-07, "loss": 0.8954, "step": 36 }, { "epoch": 0.0010901109261523504, "grad_norm": 3.143201519376105, "learning_rate": 1.089837997054492e-07, "loss": 0.8033, "step": 37 }, { "epoch": 0.0011195733836159274, "grad_norm": 3.205074480972418, "learning_rate": 1.1192930780559648e-07, "loss": 0.9451, "step": 38 }, { "epoch": 0.0011490358410795045, "grad_norm": 3.4654542745123673, "learning_rate": 1.1487481590574376e-07, "loss": 0.8346, "step": 39 }, { "epoch": 0.0011784982985430816, "grad_norm": 3.4964156850651427, "learning_rate": 1.1782032400589102e-07, "loss": 0.973, "step": 40 }, { "epoch": 0.0012079607560066584, "grad_norm": 3.5395471907480687, "learning_rate": 1.2076583210603832e-07, "loss": 0.8733, "step": 41 }, { "epoch": 0.0012374232134702355, "grad_norm": 3.311464578574755, "learning_rate": 1.2371134020618556e-07, "loss": 0.8895, "step": 42 }, { "epoch": 0.0012668856709338126, "grad_norm": 3.341769635596914, "learning_rate": 1.2665684830633285e-07, "loss": 0.9217, "step": 43 }, { "epoch": 0.0012963481283973896, "grad_norm": 3.217379768707353, "learning_rate": 1.2960235640648015e-07, "loss": 0.8019, "step": 44 }, { "epoch": 0.0013258105858609667, "grad_norm": 3.3021942301101603, "learning_rate": 1.325478645066274e-07, "loss": 0.7762, "step": 45 }, { "epoch": 0.0013552730433245438, "grad_norm": 3.803631666044249, "learning_rate": 1.3549337260677468e-07, "loss": 0.8447, "step": 46 }, { "epoch": 0.0013847355007881208, "grad_norm": 3.409873912511111, "learning_rate": 1.3843888070692195e-07, "loss": 0.7127, "step": 47 }, { "epoch": 0.0014141979582516977, "grad_norm": 3.3149484007221393, "learning_rate": 1.4138438880706924e-07, "loss": 0.8177, "step": 48 }, { "epoch": 0.0014436604157152747, "grad_norm": 2.994055744010301, "learning_rate": 1.443298969072165e-07, "loss": 0.6121, "step": 49 }, { "epoch": 0.0014731228731788518, "grad_norm": 3.059279200048262, "learning_rate": 1.4727540500736377e-07, "loss": 0.8966, "step": 50 }, { "epoch": 0.0015025853306424289, "grad_norm": 3.78194225412784, "learning_rate": 1.5022091310751107e-07, "loss": 1.052, "step": 51 }, { "epoch": 0.001532047788106006, "grad_norm": 3.251768782642097, "learning_rate": 1.5316642120765833e-07, "loss": 0.9593, "step": 52 }, { "epoch": 0.001561510245569583, "grad_norm": 3.389365475218616, "learning_rate": 1.561119293078056e-07, "loss": 0.9093, "step": 53 }, { "epoch": 0.00159097270303316, "grad_norm": 3.21702529056747, "learning_rate": 1.5905743740795287e-07, "loss": 0.893, "step": 54 }, { "epoch": 0.001620435160496737, "grad_norm": 3.4132587014738855, "learning_rate": 1.6200294550810016e-07, "loss": 0.8733, "step": 55 }, { "epoch": 0.001649897617960314, "grad_norm": 3.624778238735922, "learning_rate": 1.6494845360824743e-07, "loss": 1.0284, "step": 56 }, { "epoch": 0.001679360075423891, "grad_norm": 3.1635269562009176, "learning_rate": 1.678939617083947e-07, "loss": 0.7891, "step": 57 }, { "epoch": 0.0017088225328874681, "grad_norm": 3.08949308102609, "learning_rate": 1.7083946980854196e-07, "loss": 0.8509, "step": 58 }, { "epoch": 0.0017382849903510452, "grad_norm": 3.174100342385756, "learning_rate": 1.7378497790868928e-07, "loss": 0.8955, "step": 59 }, { "epoch": 0.0017677474478146223, "grad_norm": 3.152191782675316, "learning_rate": 1.7673048600883652e-07, "loss": 0.9352, "step": 60 }, { "epoch": 0.0017972099052781993, "grad_norm": 2.8224571698157317, "learning_rate": 1.796759941089838e-07, "loss": 0.7839, "step": 61 }, { "epoch": 0.0018266723627417764, "grad_norm": 3.2866210584501383, "learning_rate": 1.826215022091311e-07, "loss": 0.8118, "step": 62 }, { "epoch": 0.0018561348202053532, "grad_norm": 2.851159797721307, "learning_rate": 1.8556701030927838e-07, "loss": 0.6894, "step": 63 }, { "epoch": 0.0018855972776689303, "grad_norm": 3.2122948827616136, "learning_rate": 1.8851251840942562e-07, "loss": 0.9145, "step": 64 }, { "epoch": 0.0019150597351325074, "grad_norm": 3.3488921738688777, "learning_rate": 1.9145802650957294e-07, "loss": 0.9934, "step": 65 }, { "epoch": 0.0019445221925960844, "grad_norm": 3.081091327445858, "learning_rate": 1.944035346097202e-07, "loss": 0.8334, "step": 66 }, { "epoch": 0.0019739846500596613, "grad_norm": 2.909288257340718, "learning_rate": 1.9734904270986747e-07, "loss": 0.8314, "step": 67 }, { "epoch": 0.0020034471075232384, "grad_norm": 2.804337659125734, "learning_rate": 2.0029455081001477e-07, "loss": 0.7504, "step": 68 }, { "epoch": 0.0020329095649868154, "grad_norm": 3.329510647269318, "learning_rate": 2.0324005891016203e-07, "loss": 0.8377, "step": 69 }, { "epoch": 0.0020623720224503925, "grad_norm": 3.248737926705015, "learning_rate": 2.061855670103093e-07, "loss": 0.9382, "step": 70 }, { "epoch": 0.0020918344799139696, "grad_norm": 3.005906971624085, "learning_rate": 2.0913107511045657e-07, "loss": 0.8473, "step": 71 }, { "epoch": 0.0021212969373775466, "grad_norm": 2.7500670159805973, "learning_rate": 2.1207658321060386e-07, "loss": 0.7347, "step": 72 }, { "epoch": 0.0021507593948411237, "grad_norm": 2.8114024761854606, "learning_rate": 2.1502209131075113e-07, "loss": 0.7195, "step": 73 }, { "epoch": 0.0021802218523047008, "grad_norm": 2.8608450096011833, "learning_rate": 2.179675994108984e-07, "loss": 0.7262, "step": 74 }, { "epoch": 0.002209684309768278, "grad_norm": 2.69859607915194, "learning_rate": 2.209131075110457e-07, "loss": 0.6944, "step": 75 }, { "epoch": 0.002239146767231855, "grad_norm": 3.1069756331704337, "learning_rate": 2.2385861561119295e-07, "loss": 0.8004, "step": 76 }, { "epoch": 0.002268609224695432, "grad_norm": 2.7002129893233144, "learning_rate": 2.2680412371134022e-07, "loss": 0.732, "step": 77 }, { "epoch": 0.002298071682159009, "grad_norm": 2.87447603382414, "learning_rate": 2.2974963181148751e-07, "loss": 0.8947, "step": 78 }, { "epoch": 0.002327534139622586, "grad_norm": 2.8643946599070356, "learning_rate": 2.3269513991163478e-07, "loss": 0.9564, "step": 79 }, { "epoch": 0.002356996597086163, "grad_norm": 2.8128253534414362, "learning_rate": 2.3564064801178205e-07, "loss": 0.9939, "step": 80 }, { "epoch": 0.00238645905454974, "grad_norm": 2.4876024955187503, "learning_rate": 2.385861561119293e-07, "loss": 0.7662, "step": 81 }, { "epoch": 0.002415921512013317, "grad_norm": 2.8848569904215156, "learning_rate": 2.4153166421207664e-07, "loss": 0.9659, "step": 82 }, { "epoch": 0.002445383969476894, "grad_norm": 3.0498800984186896, "learning_rate": 2.444771723122239e-07, "loss": 0.8066, "step": 83 }, { "epoch": 0.002474846426940471, "grad_norm": 2.724880051518525, "learning_rate": 2.474226804123711e-07, "loss": 0.8505, "step": 84 }, { "epoch": 0.002504308884404048, "grad_norm": 3.0458624507105405, "learning_rate": 2.5036818851251844e-07, "loss": 1.0173, "step": 85 }, { "epoch": 0.002533771341867625, "grad_norm": 2.9690493065425643, "learning_rate": 2.533136966126657e-07, "loss": 0.8831, "step": 86 }, { "epoch": 0.002563233799331202, "grad_norm": 2.5597939656101194, "learning_rate": 2.5625920471281297e-07, "loss": 0.6831, "step": 87 }, { "epoch": 0.0025926962567947793, "grad_norm": 2.842842060154296, "learning_rate": 2.592047128129603e-07, "loss": 0.953, "step": 88 }, { "epoch": 0.0026221587142583563, "grad_norm": 2.7304655788134604, "learning_rate": 2.6215022091310756e-07, "loss": 0.8414, "step": 89 }, { "epoch": 0.0026516211717219334, "grad_norm": 2.8593713851270635, "learning_rate": 2.650957290132548e-07, "loss": 0.9885, "step": 90 }, { "epoch": 0.0026810836291855105, "grad_norm": 2.5543468808316097, "learning_rate": 2.680412371134021e-07, "loss": 0.7642, "step": 91 }, { "epoch": 0.0027105460866490875, "grad_norm": 2.688348818634614, "learning_rate": 2.7098674521354936e-07, "loss": 0.884, "step": 92 }, { "epoch": 0.0027400085441126646, "grad_norm": 2.61240982751056, "learning_rate": 2.739322533136966e-07, "loss": 0.9676, "step": 93 }, { "epoch": 0.0027694710015762417, "grad_norm": 2.6863811647653018, "learning_rate": 2.768777614138439e-07, "loss": 0.8855, "step": 94 }, { "epoch": 0.0027989334590398187, "grad_norm": 2.393655542016689, "learning_rate": 2.798232695139912e-07, "loss": 0.7832, "step": 95 }, { "epoch": 0.0028283959165033954, "grad_norm": 2.7480408222867774, "learning_rate": 2.827687776141385e-07, "loss": 0.7832, "step": 96 }, { "epoch": 0.0028578583739669724, "grad_norm": 2.7073905094540867, "learning_rate": 2.8571428571428575e-07, "loss": 0.9034, "step": 97 }, { "epoch": 0.0028873208314305495, "grad_norm": 2.5963695535977784, "learning_rate": 2.88659793814433e-07, "loss": 0.8684, "step": 98 }, { "epoch": 0.0029167832888941266, "grad_norm": 2.454811649108227, "learning_rate": 2.916053019145803e-07, "loss": 0.8182, "step": 99 }, { "epoch": 0.0029462457463577036, "grad_norm": 2.650877584517891, "learning_rate": 2.9455081001472755e-07, "loss": 0.8137, "step": 100 }, { "epoch": 0.0029757082038212807, "grad_norm": 2.5188097629235595, "learning_rate": 2.974963181148748e-07, "loss": 0.8171, "step": 101 }, { "epoch": 0.0030051706612848578, "grad_norm": 2.5108575960486155, "learning_rate": 3.0044182621502213e-07, "loss": 0.8051, "step": 102 }, { "epoch": 0.003034633118748435, "grad_norm": 2.6770988615605718, "learning_rate": 3.033873343151694e-07, "loss": 0.8725, "step": 103 }, { "epoch": 0.003064095576212012, "grad_norm": 2.539550420649993, "learning_rate": 3.0633284241531667e-07, "loss": 0.7249, "step": 104 }, { "epoch": 0.003093558033675589, "grad_norm": 2.6439795913101025, "learning_rate": 3.0927835051546394e-07, "loss": 0.8273, "step": 105 }, { "epoch": 0.003123020491139166, "grad_norm": 2.4429657349964957, "learning_rate": 3.122238586156112e-07, "loss": 0.7613, "step": 106 }, { "epoch": 0.003152482948602743, "grad_norm": 2.6241572627464307, "learning_rate": 3.1516936671575847e-07, "loss": 0.9701, "step": 107 }, { "epoch": 0.00318194540606632, "grad_norm": 3.143152673430945, "learning_rate": 3.1811487481590574e-07, "loss": 0.8568, "step": 108 }, { "epoch": 0.0032114078635298972, "grad_norm": 2.633427329807624, "learning_rate": 3.21060382916053e-07, "loss": 0.9272, "step": 109 }, { "epoch": 0.003240870320993474, "grad_norm": 2.59608078636099, "learning_rate": 3.240058910162003e-07, "loss": 0.8272, "step": 110 }, { "epoch": 0.003270332778457051, "grad_norm": 2.390581394217168, "learning_rate": 3.269513991163476e-07, "loss": 0.7892, "step": 111 }, { "epoch": 0.003299795235920628, "grad_norm": 2.6674830229846194, "learning_rate": 3.2989690721649486e-07, "loss": 0.9887, "step": 112 }, { "epoch": 0.003329257693384205, "grad_norm": 2.5388403081333277, "learning_rate": 3.328424153166421e-07, "loss": 0.9313, "step": 113 }, { "epoch": 0.003358720150847782, "grad_norm": 2.2871353769406797, "learning_rate": 3.357879234167894e-07, "loss": 0.7167, "step": 114 }, { "epoch": 0.003388182608311359, "grad_norm": 2.1980802301912608, "learning_rate": 3.3873343151693666e-07, "loss": 0.6103, "step": 115 }, { "epoch": 0.0034176450657749362, "grad_norm": 2.1552180939503898, "learning_rate": 3.416789396170839e-07, "loss": 0.7364, "step": 116 }, { "epoch": 0.0034471075232385133, "grad_norm": 2.245140323546912, "learning_rate": 3.446244477172313e-07, "loss": 0.7701, "step": 117 }, { "epoch": 0.0034765699807020904, "grad_norm": 2.4797451320936816, "learning_rate": 3.4756995581737857e-07, "loss": 0.8966, "step": 118 }, { "epoch": 0.0035060324381656674, "grad_norm": 2.33144832554697, "learning_rate": 3.505154639175258e-07, "loss": 0.8032, "step": 119 }, { "epoch": 0.0035354948956292445, "grad_norm": 2.364110484804411, "learning_rate": 3.5346097201767305e-07, "loss": 0.7778, "step": 120 }, { "epoch": 0.0035649573530928216, "grad_norm": 2.3289357926843994, "learning_rate": 3.564064801178203e-07, "loss": 0.9037, "step": 121 }, { "epoch": 0.0035944198105563986, "grad_norm": 2.2230672470899853, "learning_rate": 3.593519882179676e-07, "loss": 0.6202, "step": 122 }, { "epoch": 0.0036238822680199757, "grad_norm": 2.35562396510548, "learning_rate": 3.6229749631811495e-07, "loss": 0.7794, "step": 123 }, { "epoch": 0.0036533447254835528, "grad_norm": 2.162280780527263, "learning_rate": 3.652430044182622e-07, "loss": 0.8352, "step": 124 }, { "epoch": 0.0036828071829471294, "grad_norm": 2.324931553303335, "learning_rate": 3.681885125184095e-07, "loss": 0.8384, "step": 125 }, { "epoch": 0.0037122696404107065, "grad_norm": 2.0775801358649653, "learning_rate": 3.7113402061855675e-07, "loss": 0.6765, "step": 126 }, { "epoch": 0.0037417320978742835, "grad_norm": 2.3351973507607013, "learning_rate": 3.74079528718704e-07, "loss": 0.8511, "step": 127 }, { "epoch": 0.0037711945553378606, "grad_norm": 2.659308380341243, "learning_rate": 3.7702503681885124e-07, "loss": 0.8099, "step": 128 }, { "epoch": 0.0038006570128014377, "grad_norm": 2.857316811585787, "learning_rate": 3.799705449189985e-07, "loss": 0.9222, "step": 129 }, { "epoch": 0.0038301194702650147, "grad_norm": 2.657603672471487, "learning_rate": 3.829160530191459e-07, "loss": 0.6404, "step": 130 }, { "epoch": 0.003859581927728592, "grad_norm": 2.3441494974032913, "learning_rate": 3.8586156111929314e-07, "loss": 0.8037, "step": 131 }, { "epoch": 0.003889044385192169, "grad_norm": 2.3977338633932574, "learning_rate": 3.888070692194404e-07, "loss": 0.8952, "step": 132 }, { "epoch": 0.003918506842655746, "grad_norm": 2.2306134601687404, "learning_rate": 3.917525773195877e-07, "loss": 0.7289, "step": 133 }, { "epoch": 0.003947969300119323, "grad_norm": 1.9691266464034527, "learning_rate": 3.9469808541973494e-07, "loss": 0.6898, "step": 134 }, { "epoch": 0.0039774317575829, "grad_norm": 2.153303454220631, "learning_rate": 3.976435935198822e-07, "loss": 0.7062, "step": 135 }, { "epoch": 0.004006894215046477, "grad_norm": 2.4528827378845497, "learning_rate": 4.0058910162002953e-07, "loss": 0.7271, "step": 136 }, { "epoch": 0.004036356672510054, "grad_norm": 2.183621107542539, "learning_rate": 4.035346097201768e-07, "loss": 0.7993, "step": 137 }, { "epoch": 0.004065819129973631, "grad_norm": 2.310083112755648, "learning_rate": 4.0648011782032406e-07, "loss": 0.7733, "step": 138 }, { "epoch": 0.004095281587437208, "grad_norm": 2.2436843197423224, "learning_rate": 4.0942562592047133e-07, "loss": 0.6191, "step": 139 }, { "epoch": 0.004124744044900785, "grad_norm": 1.9396699947433018, "learning_rate": 4.123711340206186e-07, "loss": 0.6114, "step": 140 }, { "epoch": 0.0041542065023643625, "grad_norm": 2.2263728388531674, "learning_rate": 4.1531664212076587e-07, "loss": 0.7073, "step": 141 }, { "epoch": 0.004183668959827939, "grad_norm": 2.373447272443055, "learning_rate": 4.1826215022091313e-07, "loss": 0.7307, "step": 142 }, { "epoch": 0.004213131417291517, "grad_norm": 2.2550389799355006, "learning_rate": 4.2120765832106045e-07, "loss": 0.8355, "step": 143 }, { "epoch": 0.004242593874755093, "grad_norm": 2.332726564347342, "learning_rate": 4.241531664212077e-07, "loss": 0.8638, "step": 144 }, { "epoch": 0.004272056332218671, "grad_norm": 2.052543486310581, "learning_rate": 4.27098674521355e-07, "loss": 0.7816, "step": 145 }, { "epoch": 0.004301518789682247, "grad_norm": 2.3918080797746724, "learning_rate": 4.3004418262150225e-07, "loss": 0.8481, "step": 146 }, { "epoch": 0.004330981247145824, "grad_norm": 2.148465502093043, "learning_rate": 4.329896907216495e-07, "loss": 0.7365, "step": 147 }, { "epoch": 0.0043604437046094015, "grad_norm": 2.185539988142009, "learning_rate": 4.359351988217968e-07, "loss": 0.6863, "step": 148 }, { "epoch": 0.004389906162072978, "grad_norm": 2.027691455405861, "learning_rate": 4.3888070692194405e-07, "loss": 0.6297, "step": 149 }, { "epoch": 0.004419368619536556, "grad_norm": 2.345051244295254, "learning_rate": 4.418262150220914e-07, "loss": 0.7246, "step": 150 }, { "epoch": 0.004448831077000132, "grad_norm": 2.0722869520297453, "learning_rate": 4.4477172312223864e-07, "loss": 0.6869, "step": 151 }, { "epoch": 0.00447829353446371, "grad_norm": 2.244771459405718, "learning_rate": 4.477172312223859e-07, "loss": 0.8402, "step": 152 }, { "epoch": 0.004507755991927286, "grad_norm": 2.22965103255027, "learning_rate": 4.506627393225332e-07, "loss": 0.8335, "step": 153 }, { "epoch": 0.004537218449390864, "grad_norm": 2.18387240252674, "learning_rate": 4.5360824742268044e-07, "loss": 0.7762, "step": 154 }, { "epoch": 0.0045666809068544405, "grad_norm": 2.174199178946872, "learning_rate": 4.565537555228277e-07, "loss": 0.7696, "step": 155 }, { "epoch": 0.004596143364318018, "grad_norm": 2.124358440903496, "learning_rate": 4.5949926362297503e-07, "loss": 0.6796, "step": 156 }, { "epoch": 0.004625605821781595, "grad_norm": 2.0965355683534646, "learning_rate": 4.624447717231223e-07, "loss": 0.8111, "step": 157 }, { "epoch": 0.004655068279245172, "grad_norm": 2.329452073411998, "learning_rate": 4.6539027982326956e-07, "loss": 0.795, "step": 158 }, { "epoch": 0.004684530736708749, "grad_norm": 2.2643082137449024, "learning_rate": 4.6833578792341683e-07, "loss": 0.7214, "step": 159 }, { "epoch": 0.004713993194172326, "grad_norm": 2.4235190586592004, "learning_rate": 4.712812960235641e-07, "loss": 0.5872, "step": 160 }, { "epoch": 0.004743455651635903, "grad_norm": 2.3645227580239188, "learning_rate": 4.7422680412371136e-07, "loss": 0.9797, "step": 161 }, { "epoch": 0.00477291810909948, "grad_norm": 2.03991363492064, "learning_rate": 4.771723122238586e-07, "loss": 0.8368, "step": 162 }, { "epoch": 0.004802380566563057, "grad_norm": 2.0858157766849414, "learning_rate": 4.80117820324006e-07, "loss": 0.8205, "step": 163 }, { "epoch": 0.004831843024026634, "grad_norm": 1.7812686992803, "learning_rate": 4.830633284241533e-07, "loss": 0.6342, "step": 164 }, { "epoch": 0.004861305481490211, "grad_norm": 2.0382092392849596, "learning_rate": 4.860088365243005e-07, "loss": 0.8348, "step": 165 }, { "epoch": 0.004890767938953788, "grad_norm": 2.1779025777714605, "learning_rate": 4.889543446244478e-07, "loss": 0.7948, "step": 166 }, { "epoch": 0.004920230396417365, "grad_norm": 1.8914201663911436, "learning_rate": 4.918998527245951e-07, "loss": 0.7393, "step": 167 }, { "epoch": 0.004949692853880942, "grad_norm": 1.974472363722072, "learning_rate": 4.948453608247422e-07, "loss": 0.7007, "step": 168 }, { "epoch": 0.0049791553113445195, "grad_norm": 2.1861441661521237, "learning_rate": 4.977908689248895e-07, "loss": 0.7932, "step": 169 }, { "epoch": 0.005008617768808096, "grad_norm": 2.0577762807620212, "learning_rate": 5.007363770250369e-07, "loss": 0.8311, "step": 170 }, { "epoch": 0.005038080226271674, "grad_norm": 2.0730000624039424, "learning_rate": 5.036818851251841e-07, "loss": 0.7848, "step": 171 }, { "epoch": 0.00506754268373525, "grad_norm": 1.8258848928799105, "learning_rate": 5.066273932253314e-07, "loss": 0.6751, "step": 172 }, { "epoch": 0.005097005141198828, "grad_norm": 2.1160839988999682, "learning_rate": 5.095729013254787e-07, "loss": 0.8266, "step": 173 }, { "epoch": 0.005126467598662404, "grad_norm": 1.9370430802760479, "learning_rate": 5.125184094256259e-07, "loss": 0.7721, "step": 174 }, { "epoch": 0.005155930056125982, "grad_norm": 1.7218352588534904, "learning_rate": 5.154639175257732e-07, "loss": 0.598, "step": 175 }, { "epoch": 0.0051853925135895585, "grad_norm": 2.2103333489873616, "learning_rate": 5.184094256259206e-07, "loss": 0.7682, "step": 176 }, { "epoch": 0.005214854971053135, "grad_norm": 1.9978891659265567, "learning_rate": 5.213549337260678e-07, "loss": 0.7134, "step": 177 }, { "epoch": 0.005244317428516713, "grad_norm": 2.1572814015264523, "learning_rate": 5.243004418262151e-07, "loss": 0.7609, "step": 178 }, { "epoch": 0.005273779885980289, "grad_norm": 2.3922072697414705, "learning_rate": 5.272459499263624e-07, "loss": 0.9017, "step": 179 }, { "epoch": 0.005303242343443867, "grad_norm": 2.070124761858788, "learning_rate": 5.301914580265096e-07, "loss": 0.6115, "step": 180 }, { "epoch": 0.005332704800907443, "grad_norm": 1.911108995538107, "learning_rate": 5.331369661266569e-07, "loss": 0.6536, "step": 181 }, { "epoch": 0.005362167258371021, "grad_norm": 2.1791814923455934, "learning_rate": 5.360824742268042e-07, "loss": 0.8967, "step": 182 }, { "epoch": 0.0053916297158345975, "grad_norm": 2.711491849586939, "learning_rate": 5.390279823269515e-07, "loss": 0.6675, "step": 183 }, { "epoch": 0.005421092173298175, "grad_norm": 2.0042717268668904, "learning_rate": 5.419734904270987e-07, "loss": 0.8211, "step": 184 }, { "epoch": 0.005450554630761752, "grad_norm": 2.1285016459341883, "learning_rate": 5.44918998527246e-07, "loss": 0.8216, "step": 185 }, { "epoch": 0.005480017088225329, "grad_norm": 2.0154626438219276, "learning_rate": 5.478645066273933e-07, "loss": 0.6837, "step": 186 }, { "epoch": 0.005509479545688906, "grad_norm": 2.4376268723086856, "learning_rate": 5.508100147275405e-07, "loss": 0.7103, "step": 187 }, { "epoch": 0.005538942003152483, "grad_norm": 1.9626841893383755, "learning_rate": 5.537555228276878e-07, "loss": 0.8746, "step": 188 }, { "epoch": 0.00556840446061606, "grad_norm": 2.001885568700744, "learning_rate": 5.567010309278352e-07, "loss": 0.8383, "step": 189 }, { "epoch": 0.0055978669180796374, "grad_norm": 2.438822488844475, "learning_rate": 5.596465390279824e-07, "loss": 0.7371, "step": 190 }, { "epoch": 0.005627329375543214, "grad_norm": 2.049320058004822, "learning_rate": 5.625920471281297e-07, "loss": 0.7927, "step": 191 }, { "epoch": 0.005656791833006791, "grad_norm": 1.9058759822342333, "learning_rate": 5.65537555228277e-07, "loss": 0.8151, "step": 192 }, { "epoch": 0.005686254290470368, "grad_norm": 2.1236468623331306, "learning_rate": 5.684830633284242e-07, "loss": 0.7744, "step": 193 }, { "epoch": 0.005715716747933945, "grad_norm": 2.412535095096943, "learning_rate": 5.714285714285715e-07, "loss": 0.848, "step": 194 }, { "epoch": 0.005745179205397522, "grad_norm": 1.8732143841344537, "learning_rate": 5.743740795287188e-07, "loss": 0.825, "step": 195 }, { "epoch": 0.005774641662861099, "grad_norm": 1.9403031745175483, "learning_rate": 5.77319587628866e-07, "loss": 0.7167, "step": 196 }, { "epoch": 0.0058041041203246765, "grad_norm": 2.131971331168311, "learning_rate": 5.802650957290133e-07, "loss": 0.5813, "step": 197 }, { "epoch": 0.005833566577788253, "grad_norm": 1.9375659480635954, "learning_rate": 5.832106038291606e-07, "loss": 0.7889, "step": 198 }, { "epoch": 0.005863029035251831, "grad_norm": 2.111041579723023, "learning_rate": 5.861561119293078e-07, "loss": 0.649, "step": 199 }, { "epoch": 0.005892491492715407, "grad_norm": 1.985169658077313, "learning_rate": 5.891016200294551e-07, "loss": 0.6807, "step": 200 }, { "epoch": 0.005921953950178985, "grad_norm": 1.8829994327594657, "learning_rate": 5.920471281296024e-07, "loss": 0.8354, "step": 201 }, { "epoch": 0.005951416407642561, "grad_norm": 1.88933824257824, "learning_rate": 5.949926362297496e-07, "loss": 0.7902, "step": 202 }, { "epoch": 0.005980878865106139, "grad_norm": 2.181819432397705, "learning_rate": 5.97938144329897e-07, "loss": 0.8283, "step": 203 }, { "epoch": 0.0060103413225697155, "grad_norm": 2.0583717254330782, "learning_rate": 6.008836524300443e-07, "loss": 0.6514, "step": 204 }, { "epoch": 0.006039803780033293, "grad_norm": 1.937908244644436, "learning_rate": 6.038291605301915e-07, "loss": 0.6615, "step": 205 }, { "epoch": 0.00606926623749687, "grad_norm": 2.105504125073992, "learning_rate": 6.067746686303388e-07, "loss": 0.6443, "step": 206 }, { "epoch": 0.006098728694960446, "grad_norm": 2.2737658288953484, "learning_rate": 6.097201767304861e-07, "loss": 0.8614, "step": 207 }, { "epoch": 0.006128191152424024, "grad_norm": 2.009703282769577, "learning_rate": 6.126656848306333e-07, "loss": 0.7853, "step": 208 }, { "epoch": 0.0061576536098876, "grad_norm": 1.784708739734312, "learning_rate": 6.156111929307806e-07, "loss": 0.6359, "step": 209 }, { "epoch": 0.006187116067351178, "grad_norm": 2.0640584146651983, "learning_rate": 6.185567010309279e-07, "loss": 0.6753, "step": 210 }, { "epoch": 0.0062165785248147545, "grad_norm": 2.1408907209210013, "learning_rate": 6.215022091310751e-07, "loss": 0.7315, "step": 211 }, { "epoch": 0.006246040982278332, "grad_norm": 1.904294189877089, "learning_rate": 6.244477172312224e-07, "loss": 0.5612, "step": 212 }, { "epoch": 0.006275503439741909, "grad_norm": 2.0147991043879188, "learning_rate": 6.273932253313697e-07, "loss": 0.7248, "step": 213 }, { "epoch": 0.006304965897205486, "grad_norm": 1.9613612128182045, "learning_rate": 6.303387334315169e-07, "loss": 0.5844, "step": 214 }, { "epoch": 0.006334428354669063, "grad_norm": 2.1124592495703363, "learning_rate": 6.332842415316642e-07, "loss": 0.8329, "step": 215 }, { "epoch": 0.00636389081213264, "grad_norm": 1.7166713714334936, "learning_rate": 6.362297496318115e-07, "loss": 0.6114, "step": 216 }, { "epoch": 0.006393353269596217, "grad_norm": 2.0173191039158067, "learning_rate": 6.391752577319587e-07, "loss": 0.8458, "step": 217 }, { "epoch": 0.0064228157270597944, "grad_norm": 1.9451598887900408, "learning_rate": 6.42120765832106e-07, "loss": 0.8092, "step": 218 }, { "epoch": 0.006452278184523371, "grad_norm": 2.0758365383235664, "learning_rate": 6.450662739322535e-07, "loss": 0.7618, "step": 219 }, { "epoch": 0.006481740641986948, "grad_norm": 2.1296620169611242, "learning_rate": 6.480117820324006e-07, "loss": 0.8024, "step": 220 }, { "epoch": 0.006511203099450525, "grad_norm": 2.478449165476901, "learning_rate": 6.509572901325479e-07, "loss": 0.8321, "step": 221 }, { "epoch": 0.006540665556914102, "grad_norm": 1.8774698812875845, "learning_rate": 6.539027982326952e-07, "loss": 0.7097, "step": 222 }, { "epoch": 0.006570128014377679, "grad_norm": 2.137454739481821, "learning_rate": 6.568483063328424e-07, "loss": 0.8196, "step": 223 }, { "epoch": 0.006599590471841256, "grad_norm": 1.9876166861254279, "learning_rate": 6.597938144329897e-07, "loss": 0.7787, "step": 224 }, { "epoch": 0.0066290529293048335, "grad_norm": 2.3672009999147696, "learning_rate": 6.62739322533137e-07, "loss": 0.8408, "step": 225 }, { "epoch": 0.00665851538676841, "grad_norm": 1.8662634126326478, "learning_rate": 6.656848306332842e-07, "loss": 0.6216, "step": 226 }, { "epoch": 0.006687977844231988, "grad_norm": 2.3243947545463524, "learning_rate": 6.686303387334315e-07, "loss": 0.8818, "step": 227 }, { "epoch": 0.006717440301695564, "grad_norm": 1.9342018536647758, "learning_rate": 6.715758468335788e-07, "loss": 0.7514, "step": 228 }, { "epoch": 0.006746902759159142, "grad_norm": 2.1916145698102034, "learning_rate": 6.74521354933726e-07, "loss": 0.7106, "step": 229 }, { "epoch": 0.006776365216622718, "grad_norm": 2.110924088483953, "learning_rate": 6.774668630338733e-07, "loss": 0.775, "step": 230 }, { "epoch": 0.006805827674086296, "grad_norm": 2.0099314885416297, "learning_rate": 6.804123711340206e-07, "loss": 0.6507, "step": 231 }, { "epoch": 0.0068352901315498725, "grad_norm": 1.972449852005018, "learning_rate": 6.833578792341679e-07, "loss": 0.5683, "step": 232 }, { "epoch": 0.00686475258901345, "grad_norm": 1.733481380755906, "learning_rate": 6.863033873343153e-07, "loss": 0.6948, "step": 233 }, { "epoch": 0.006894215046477027, "grad_norm": 1.9392050323563534, "learning_rate": 6.892488954344626e-07, "loss": 0.7178, "step": 234 }, { "epoch": 0.006923677503940603, "grad_norm": 1.967858258336101, "learning_rate": 6.921944035346099e-07, "loss": 0.7614, "step": 235 }, { "epoch": 0.006953139961404181, "grad_norm": 1.8856203750195557, "learning_rate": 6.951399116347571e-07, "loss": 0.897, "step": 236 }, { "epoch": 0.006982602418867757, "grad_norm": 2.281042909035616, "learning_rate": 6.980854197349044e-07, "loss": 0.9155, "step": 237 }, { "epoch": 0.007012064876331335, "grad_norm": 1.8765167032550698, "learning_rate": 7.010309278350516e-07, "loss": 0.6449, "step": 238 }, { "epoch": 0.0070415273337949115, "grad_norm": 1.8594255554566022, "learning_rate": 7.039764359351988e-07, "loss": 0.6689, "step": 239 }, { "epoch": 0.007070989791258489, "grad_norm": 2.2250012969462647, "learning_rate": 7.069219440353461e-07, "loss": 0.7058, "step": 240 }, { "epoch": 0.007100452248722066, "grad_norm": 2.099084468502132, "learning_rate": 7.098674521354934e-07, "loss": 0.7579, "step": 241 }, { "epoch": 0.007129914706185643, "grad_norm": 1.870206620031082, "learning_rate": 7.128129602356406e-07, "loss": 0.6119, "step": 242 }, { "epoch": 0.00715937716364922, "grad_norm": 1.9634119879132463, "learning_rate": 7.157584683357879e-07, "loss": 0.7134, "step": 243 }, { "epoch": 0.007188839621112797, "grad_norm": 1.8738888000854184, "learning_rate": 7.187039764359352e-07, "loss": 0.699, "step": 244 }, { "epoch": 0.007218302078576374, "grad_norm": 2.105832547314225, "learning_rate": 7.216494845360824e-07, "loss": 0.6551, "step": 245 }, { "epoch": 0.007247764536039951, "grad_norm": 1.9020891230534553, "learning_rate": 7.245949926362299e-07, "loss": 0.6145, "step": 246 }, { "epoch": 0.007277226993503528, "grad_norm": 1.986567535309637, "learning_rate": 7.275405007363772e-07, "loss": 0.7228, "step": 247 }, { "epoch": 0.0073066894509671056, "grad_norm": 1.984070105558074, "learning_rate": 7.304860088365244e-07, "loss": 0.606, "step": 248 }, { "epoch": 0.007336151908430682, "grad_norm": 2.219996544448319, "learning_rate": 7.334315169366717e-07, "loss": 0.8883, "step": 249 }, { "epoch": 0.007365614365894259, "grad_norm": 2.072399097830252, "learning_rate": 7.36377025036819e-07, "loss": 0.7104, "step": 250 }, { "epoch": 0.007395076823357836, "grad_norm": 2.108330698099853, "learning_rate": 7.393225331369662e-07, "loss": 0.6945, "step": 251 }, { "epoch": 0.007424539280821413, "grad_norm": 2.215938787316289, "learning_rate": 7.422680412371135e-07, "loss": 0.7168, "step": 252 }, { "epoch": 0.0074540017382849905, "grad_norm": 2.2778097396775925, "learning_rate": 7.452135493372608e-07, "loss": 0.8017, "step": 253 }, { "epoch": 0.007483464195748567, "grad_norm": 2.050251053225639, "learning_rate": 7.48159057437408e-07, "loss": 0.6411, "step": 254 }, { "epoch": 0.007512926653212145, "grad_norm": 2.037516455896947, "learning_rate": 7.511045655375553e-07, "loss": 0.7641, "step": 255 }, { "epoch": 0.007542389110675721, "grad_norm": 2.1121039613748613, "learning_rate": 7.540500736377025e-07, "loss": 0.6918, "step": 256 }, { "epoch": 0.007571851568139299, "grad_norm": 2.1976801255474165, "learning_rate": 7.569955817378497e-07, "loss": 0.7744, "step": 257 }, { "epoch": 0.007601314025602875, "grad_norm": 2.0164859970283695, "learning_rate": 7.59941089837997e-07, "loss": 0.8101, "step": 258 }, { "epoch": 0.007630776483066453, "grad_norm": 2.5547166444454463, "learning_rate": 7.628865979381445e-07, "loss": 0.7613, "step": 259 }, { "epoch": 0.0076602389405300295, "grad_norm": 2.1470468626304484, "learning_rate": 7.658321060382918e-07, "loss": 0.7307, "step": 260 }, { "epoch": 0.007689701397993607, "grad_norm": 2.020323935174846, "learning_rate": 7.68777614138439e-07, "loss": 0.6732, "step": 261 }, { "epoch": 0.007719163855457184, "grad_norm": 1.9516194640679503, "learning_rate": 7.717231222385863e-07, "loss": 0.7294, "step": 262 }, { "epoch": 0.007748626312920761, "grad_norm": 1.916897557495991, "learning_rate": 7.746686303387336e-07, "loss": 0.7889, "step": 263 }, { "epoch": 0.007778088770384338, "grad_norm": 2.110773995555955, "learning_rate": 7.776141384388808e-07, "loss": 0.7585, "step": 264 }, { "epoch": 0.007807551227847914, "grad_norm": 2.378225027467549, "learning_rate": 7.805596465390281e-07, "loss": 0.6016, "step": 265 }, { "epoch": 0.007837013685311492, "grad_norm": 2.260107379490066, "learning_rate": 7.835051546391754e-07, "loss": 0.6614, "step": 266 }, { "epoch": 0.007866476142775069, "grad_norm": 2.1502346168207596, "learning_rate": 7.864506627393226e-07, "loss": 0.6877, "step": 267 }, { "epoch": 0.007895938600238645, "grad_norm": 1.8783185494891856, "learning_rate": 7.893961708394699e-07, "loss": 0.6676, "step": 268 }, { "epoch": 0.007925401057702224, "grad_norm": 1.852373739278623, "learning_rate": 7.923416789396172e-07, "loss": 0.6801, "step": 269 }, { "epoch": 0.0079548635151658, "grad_norm": 2.0545352370605334, "learning_rate": 7.952871870397644e-07, "loss": 0.7102, "step": 270 }, { "epoch": 0.007984325972629377, "grad_norm": 2.2082410992889265, "learning_rate": 7.982326951399117e-07, "loss": 0.7814, "step": 271 }, { "epoch": 0.008013788430092953, "grad_norm": 2.150484382621371, "learning_rate": 8.011782032400591e-07, "loss": 0.8834, "step": 272 }, { "epoch": 0.008043250887556532, "grad_norm": 1.9314693128506295, "learning_rate": 8.041237113402063e-07, "loss": 0.6677, "step": 273 }, { "epoch": 0.008072713345020108, "grad_norm": 2.095052471076663, "learning_rate": 8.070692194403536e-07, "loss": 0.9139, "step": 274 }, { "epoch": 0.008102175802483685, "grad_norm": 1.914828874352904, "learning_rate": 8.100147275405009e-07, "loss": 0.7783, "step": 275 }, { "epoch": 0.008131638259947262, "grad_norm": 1.907792300224663, "learning_rate": 8.129602356406481e-07, "loss": 0.7267, "step": 276 }, { "epoch": 0.00816110071741084, "grad_norm": 2.0493998038838748, "learning_rate": 8.159057437407954e-07, "loss": 0.6371, "step": 277 }, { "epoch": 0.008190563174874417, "grad_norm": 1.8640115362756735, "learning_rate": 8.188512518409427e-07, "loss": 0.7465, "step": 278 }, { "epoch": 0.008220025632337993, "grad_norm": 2.2071411094239295, "learning_rate": 8.217967599410899e-07, "loss": 0.8168, "step": 279 }, { "epoch": 0.00824948808980157, "grad_norm": 2.01535276602148, "learning_rate": 8.247422680412372e-07, "loss": 0.6662, "step": 280 }, { "epoch": 0.008278950547265147, "grad_norm": 1.9579516280967033, "learning_rate": 8.276877761413845e-07, "loss": 0.844, "step": 281 }, { "epoch": 0.008308413004728725, "grad_norm": 1.8146640536891754, "learning_rate": 8.306332842415317e-07, "loss": 0.6956, "step": 282 }, { "epoch": 0.008337875462192302, "grad_norm": 1.818811626979173, "learning_rate": 8.33578792341679e-07, "loss": 0.6577, "step": 283 }, { "epoch": 0.008367337919655878, "grad_norm": 2.390257085916166, "learning_rate": 8.365243004418263e-07, "loss": 0.7035, "step": 284 }, { "epoch": 0.008396800377119455, "grad_norm": 1.8416469641692466, "learning_rate": 8.394698085419735e-07, "loss": 0.5852, "step": 285 }, { "epoch": 0.008426262834583033, "grad_norm": 2.0401429789373307, "learning_rate": 8.424153166421209e-07, "loss": 0.7615, "step": 286 }, { "epoch": 0.00845572529204661, "grad_norm": 2.133400933706827, "learning_rate": 8.453608247422682e-07, "loss": 0.7579, "step": 287 }, { "epoch": 0.008485187749510186, "grad_norm": 1.9821873295517631, "learning_rate": 8.483063328424154e-07, "loss": 0.5632, "step": 288 }, { "epoch": 0.008514650206973763, "grad_norm": 2.021394939171959, "learning_rate": 8.512518409425627e-07, "loss": 0.7226, "step": 289 }, { "epoch": 0.008544112664437341, "grad_norm": 2.148304677878911, "learning_rate": 8.5419734904271e-07, "loss": 0.5824, "step": 290 }, { "epoch": 0.008573575121900918, "grad_norm": 1.8250642847069323, "learning_rate": 8.571428571428572e-07, "loss": 0.6117, "step": 291 }, { "epoch": 0.008603037579364495, "grad_norm": 1.778198466288403, "learning_rate": 8.600883652430045e-07, "loss": 0.6956, "step": 292 }, { "epoch": 0.008632500036828071, "grad_norm": 1.8861836790935527, "learning_rate": 8.630338733431518e-07, "loss": 0.569, "step": 293 }, { "epoch": 0.008661962494291648, "grad_norm": 2.049104655483396, "learning_rate": 8.65979381443299e-07, "loss": 0.6852, "step": 294 }, { "epoch": 0.008691424951755226, "grad_norm": 2.188877547931799, "learning_rate": 8.689248895434463e-07, "loss": 0.6664, "step": 295 }, { "epoch": 0.008720887409218803, "grad_norm": 2.1273998498527655, "learning_rate": 8.718703976435936e-07, "loss": 0.6248, "step": 296 }, { "epoch": 0.00875034986668238, "grad_norm": 1.872913739136276, "learning_rate": 8.748159057437408e-07, "loss": 0.5794, "step": 297 }, { "epoch": 0.008779812324145956, "grad_norm": 2.009805844460052, "learning_rate": 8.777614138438881e-07, "loss": 0.6854, "step": 298 }, { "epoch": 0.008809274781609535, "grad_norm": 1.939787615947096, "learning_rate": 8.807069219440355e-07, "loss": 0.7828, "step": 299 }, { "epoch": 0.008838737239073111, "grad_norm": 1.7873254404974641, "learning_rate": 8.836524300441827e-07, "loss": 0.6816, "step": 300 }, { "epoch": 0.008868199696536688, "grad_norm": 1.8186583471612823, "learning_rate": 8.8659793814433e-07, "loss": 0.6407, "step": 301 }, { "epoch": 0.008897662154000265, "grad_norm": 2.000454782659242, "learning_rate": 8.895434462444773e-07, "loss": 0.6813, "step": 302 }, { "epoch": 0.008927124611463843, "grad_norm": 1.9392225991517285, "learning_rate": 8.924889543446246e-07, "loss": 0.9196, "step": 303 }, { "epoch": 0.00895658706892742, "grad_norm": 2.2360491449388182, "learning_rate": 8.954344624447718e-07, "loss": 0.6435, "step": 304 }, { "epoch": 0.008986049526390996, "grad_norm": 2.2221158855500676, "learning_rate": 8.983799705449191e-07, "loss": 0.8235, "step": 305 }, { "epoch": 0.009015511983854573, "grad_norm": 1.932585894448636, "learning_rate": 9.013254786450664e-07, "loss": 0.5903, "step": 306 }, { "epoch": 0.009044974441318151, "grad_norm": 2.043184014204871, "learning_rate": 9.042709867452136e-07, "loss": 0.6947, "step": 307 }, { "epoch": 0.009074436898781728, "grad_norm": 2.143524629486347, "learning_rate": 9.072164948453609e-07, "loss": 0.7999, "step": 308 }, { "epoch": 0.009103899356245304, "grad_norm": 2.1815726408027016, "learning_rate": 9.101620029455082e-07, "loss": 0.7352, "step": 309 }, { "epoch": 0.009133361813708881, "grad_norm": 1.8900497140339982, "learning_rate": 9.131075110456554e-07, "loss": 0.835, "step": 310 }, { "epoch": 0.009162824271172458, "grad_norm": 1.9528579181074002, "learning_rate": 9.160530191458027e-07, "loss": 0.7521, "step": 311 }, { "epoch": 0.009192286728636036, "grad_norm": 1.9145697666494088, "learning_rate": 9.189985272459501e-07, "loss": 0.8368, "step": 312 }, { "epoch": 0.009221749186099613, "grad_norm": 1.9649979977298762, "learning_rate": 9.219440353460973e-07, "loss": 0.7507, "step": 313 }, { "epoch": 0.00925121164356319, "grad_norm": 1.8771917429376908, "learning_rate": 9.248895434462446e-07, "loss": 0.8472, "step": 314 }, { "epoch": 0.009280674101026766, "grad_norm": 1.848811724379296, "learning_rate": 9.278350515463919e-07, "loss": 0.7735, "step": 315 }, { "epoch": 0.009310136558490344, "grad_norm": 1.7205177550909185, "learning_rate": 9.307805596465391e-07, "loss": 0.4982, "step": 316 }, { "epoch": 0.009339599015953921, "grad_norm": 2.086224244855351, "learning_rate": 9.337260677466864e-07, "loss": 0.7955, "step": 317 }, { "epoch": 0.009369061473417498, "grad_norm": 2.0284202827918243, "learning_rate": 9.366715758468337e-07, "loss": 0.7849, "step": 318 }, { "epoch": 0.009398523930881074, "grad_norm": 2.228879536515546, "learning_rate": 9.396170839469809e-07, "loss": 0.5426, "step": 319 }, { "epoch": 0.009427986388344653, "grad_norm": 2.40641282979687, "learning_rate": 9.425625920471282e-07, "loss": 0.7839, "step": 320 }, { "epoch": 0.00945744884580823, "grad_norm": 2.367779441567426, "learning_rate": 9.455081001472755e-07, "loss": 0.9607, "step": 321 }, { "epoch": 0.009486911303271806, "grad_norm": 2.3896839725356265, "learning_rate": 9.484536082474227e-07, "loss": 0.477, "step": 322 }, { "epoch": 0.009516373760735383, "grad_norm": 2.0687241188215393, "learning_rate": 9.5139911634757e-07, "loss": 0.7592, "step": 323 }, { "epoch": 0.00954583621819896, "grad_norm": 2.1048417150356156, "learning_rate": 9.543446244477173e-07, "loss": 0.7113, "step": 324 }, { "epoch": 0.009575298675662538, "grad_norm": 1.9198560405459335, "learning_rate": 9.572901325478646e-07, "loss": 0.8338, "step": 325 }, { "epoch": 0.009604761133126114, "grad_norm": 2.189262386569016, "learning_rate": 9.60235640648012e-07, "loss": 0.7949, "step": 326 }, { "epoch": 0.00963422359058969, "grad_norm": 1.9960389555830225, "learning_rate": 9.631811487481592e-07, "loss": 0.7461, "step": 327 }, { "epoch": 0.009663686048053267, "grad_norm": 1.819281666643158, "learning_rate": 9.661266568483065e-07, "loss": 0.6603, "step": 328 }, { "epoch": 0.009693148505516846, "grad_norm": 1.9374394559267645, "learning_rate": 9.690721649484537e-07, "loss": 0.6278, "step": 329 }, { "epoch": 0.009722610962980422, "grad_norm": 2.1201861298564033, "learning_rate": 9.72017673048601e-07, "loss": 0.8082, "step": 330 }, { "epoch": 0.009752073420443999, "grad_norm": 2.2333748329253202, "learning_rate": 9.749631811487482e-07, "loss": 0.6519, "step": 331 }, { "epoch": 0.009781535877907576, "grad_norm": 1.9407745886732948, "learning_rate": 9.779086892488956e-07, "loss": 0.6659, "step": 332 }, { "epoch": 0.009810998335371154, "grad_norm": 1.6432663585878415, "learning_rate": 9.808541973490428e-07, "loss": 0.5044, "step": 333 }, { "epoch": 0.00984046079283473, "grad_norm": 1.9514889170557665, "learning_rate": 9.837997054491901e-07, "loss": 0.827, "step": 334 }, { "epoch": 0.009869923250298307, "grad_norm": 2.056784573876092, "learning_rate": 9.867452135493373e-07, "loss": 0.7812, "step": 335 }, { "epoch": 0.009899385707761884, "grad_norm": 2.1227683564779913, "learning_rate": 9.896907216494845e-07, "loss": 0.6097, "step": 336 }, { "epoch": 0.009928848165225462, "grad_norm": 2.2401353469031724, "learning_rate": 9.926362297496318e-07, "loss": 0.8581, "step": 337 }, { "epoch": 0.009958310622689039, "grad_norm": 2.1942853708469747, "learning_rate": 9.95581737849779e-07, "loss": 0.6264, "step": 338 }, { "epoch": 0.009987773080152616, "grad_norm": 1.8780691188918897, "learning_rate": 9.985272459499266e-07, "loss": 0.5656, "step": 339 }, { "epoch": 0.010017235537616192, "grad_norm": 2.302028814894412, "learning_rate": 1.0014727540500737e-06, "loss": 0.8139, "step": 340 }, { "epoch": 0.010046697995079769, "grad_norm": 1.861941985617829, "learning_rate": 1.0044182621502211e-06, "loss": 0.6644, "step": 341 }, { "epoch": 0.010076160452543347, "grad_norm": 1.8531296082781725, "learning_rate": 1.0073637702503683e-06, "loss": 0.7474, "step": 342 }, { "epoch": 0.010105622910006924, "grad_norm": 2.039025193022249, "learning_rate": 1.0103092783505157e-06, "loss": 0.768, "step": 343 }, { "epoch": 0.0101350853674705, "grad_norm": 1.9304130925501437, "learning_rate": 1.0132547864506628e-06, "loss": 0.5249, "step": 344 }, { "epoch": 0.010164547824934077, "grad_norm": 2.0868301713403685, "learning_rate": 1.0162002945508102e-06, "loss": 0.7327, "step": 345 }, { "epoch": 0.010194010282397655, "grad_norm": 2.111891571910081, "learning_rate": 1.0191458026509573e-06, "loss": 0.6505, "step": 346 }, { "epoch": 0.010223472739861232, "grad_norm": 2.068352539240744, "learning_rate": 1.0220913107511047e-06, "loss": 0.584, "step": 347 }, { "epoch": 0.010252935197324809, "grad_norm": 1.827122783366161, "learning_rate": 1.0250368188512519e-06, "loss": 0.7086, "step": 348 }, { "epoch": 0.010282397654788385, "grad_norm": 1.7879647429326135, "learning_rate": 1.0279823269513993e-06, "loss": 0.6288, "step": 349 }, { "epoch": 0.010311860112251964, "grad_norm": 1.8040487475650395, "learning_rate": 1.0309278350515464e-06, "loss": 0.6586, "step": 350 }, { "epoch": 0.01034132256971554, "grad_norm": 1.834853375460971, "learning_rate": 1.0338733431516938e-06, "loss": 0.6636, "step": 351 }, { "epoch": 0.010370785027179117, "grad_norm": 1.7881401623926518, "learning_rate": 1.0368188512518412e-06, "loss": 0.5737, "step": 352 }, { "epoch": 0.010400247484642694, "grad_norm": 2.0267893283382334, "learning_rate": 1.0397643593519883e-06, "loss": 0.7102, "step": 353 }, { "epoch": 0.01042970994210627, "grad_norm": 1.7454267194582271, "learning_rate": 1.0427098674521357e-06, "loss": 0.6549, "step": 354 }, { "epoch": 0.010459172399569849, "grad_norm": 1.807969140053665, "learning_rate": 1.0456553755522829e-06, "loss": 0.7155, "step": 355 }, { "epoch": 0.010488634857033425, "grad_norm": 1.9533024641145547, "learning_rate": 1.0486008836524302e-06, "loss": 0.7128, "step": 356 }, { "epoch": 0.010518097314497002, "grad_norm": 2.139111069835842, "learning_rate": 1.0515463917525774e-06, "loss": 0.7004, "step": 357 }, { "epoch": 0.010547559771960579, "grad_norm": 1.7738147363528116, "learning_rate": 1.0544918998527248e-06, "loss": 0.6179, "step": 358 }, { "epoch": 0.010577022229424157, "grad_norm": 1.8282619275114669, "learning_rate": 1.057437407952872e-06, "loss": 0.6021, "step": 359 }, { "epoch": 0.010606484686887734, "grad_norm": 1.9674963592033403, "learning_rate": 1.0603829160530193e-06, "loss": 0.6751, "step": 360 }, { "epoch": 0.01063594714435131, "grad_norm": 1.8485064443898338, "learning_rate": 1.0633284241531665e-06, "loss": 0.58, "step": 361 }, { "epoch": 0.010665409601814887, "grad_norm": 2.264121347488417, "learning_rate": 1.0662739322533138e-06, "loss": 0.7039, "step": 362 }, { "epoch": 0.010694872059278465, "grad_norm": 2.1659382174461213, "learning_rate": 1.069219440353461e-06, "loss": 0.6903, "step": 363 }, { "epoch": 0.010724334516742042, "grad_norm": 2.011004158692836, "learning_rate": 1.0721649484536084e-06, "loss": 0.7068, "step": 364 }, { "epoch": 0.010753796974205618, "grad_norm": 2.2009716441063096, "learning_rate": 1.0751104565537557e-06, "loss": 0.8441, "step": 365 }, { "epoch": 0.010783259431669195, "grad_norm": 2.2269031022310584, "learning_rate": 1.078055964653903e-06, "loss": 0.9536, "step": 366 }, { "epoch": 0.010812721889132772, "grad_norm": 1.9214404290246856, "learning_rate": 1.0810014727540503e-06, "loss": 0.7803, "step": 367 }, { "epoch": 0.01084218434659635, "grad_norm": 1.930223176241039, "learning_rate": 1.0839469808541974e-06, "loss": 0.8135, "step": 368 }, { "epoch": 0.010871646804059927, "grad_norm": 1.9946997426319026, "learning_rate": 1.0868924889543448e-06, "loss": 0.7947, "step": 369 }, { "epoch": 0.010901109261523503, "grad_norm": 1.918265866764682, "learning_rate": 1.089837997054492e-06, "loss": 0.6325, "step": 370 }, { "epoch": 0.01093057171898708, "grad_norm": 1.8821760299678565, "learning_rate": 1.0927835051546393e-06, "loss": 0.6811, "step": 371 }, { "epoch": 0.010960034176450658, "grad_norm": 2.029340985077854, "learning_rate": 1.0957290132547865e-06, "loss": 0.8034, "step": 372 }, { "epoch": 0.010989496633914235, "grad_norm": 2.1978039253620105, "learning_rate": 1.0986745213549339e-06, "loss": 0.5884, "step": 373 }, { "epoch": 0.011018959091377812, "grad_norm": 2.006314973007293, "learning_rate": 1.101620029455081e-06, "loss": 0.7602, "step": 374 }, { "epoch": 0.011048421548841388, "grad_norm": 2.0043643781826037, "learning_rate": 1.1045655375552284e-06, "loss": 0.6883, "step": 375 }, { "epoch": 0.011077884006304967, "grad_norm": 1.7943062164250272, "learning_rate": 1.1075110456553756e-06, "loss": 0.6382, "step": 376 }, { "epoch": 0.011107346463768543, "grad_norm": 1.804130393964889, "learning_rate": 1.110456553755523e-06, "loss": 0.7427, "step": 377 }, { "epoch": 0.01113680892123212, "grad_norm": 2.0947879309023714, "learning_rate": 1.1134020618556703e-06, "loss": 0.5379, "step": 378 }, { "epoch": 0.011166271378695697, "grad_norm": 2.1697164035837773, "learning_rate": 1.1163475699558175e-06, "loss": 0.6941, "step": 379 }, { "epoch": 0.011195733836159275, "grad_norm": 1.7125411094333298, "learning_rate": 1.1192930780559649e-06, "loss": 0.6862, "step": 380 }, { "epoch": 0.011225196293622852, "grad_norm": 2.224672274135788, "learning_rate": 1.122238586156112e-06, "loss": 0.7874, "step": 381 }, { "epoch": 0.011254658751086428, "grad_norm": 2.1585299995350287, "learning_rate": 1.1251840942562594e-06, "loss": 0.6955, "step": 382 }, { "epoch": 0.011284121208550005, "grad_norm": 1.9875240074520002, "learning_rate": 1.1281296023564065e-06, "loss": 0.803, "step": 383 }, { "epoch": 0.011313583666013581, "grad_norm": 1.9422171045338898, "learning_rate": 1.131075110456554e-06, "loss": 0.7251, "step": 384 }, { "epoch": 0.01134304612347716, "grad_norm": 2.0723686718114767, "learning_rate": 1.134020618556701e-06, "loss": 0.5433, "step": 385 }, { "epoch": 0.011372508580940736, "grad_norm": 1.7999341665770234, "learning_rate": 1.1369661266568485e-06, "loss": 0.6896, "step": 386 }, { "epoch": 0.011401971038404313, "grad_norm": 1.998620680490772, "learning_rate": 1.1399116347569956e-06, "loss": 0.6771, "step": 387 }, { "epoch": 0.01143143349586789, "grad_norm": 1.9991347309012597, "learning_rate": 1.142857142857143e-06, "loss": 0.7044, "step": 388 }, { "epoch": 0.011460895953331468, "grad_norm": 1.9979775437728762, "learning_rate": 1.1458026509572901e-06, "loss": 0.7259, "step": 389 }, { "epoch": 0.011490358410795045, "grad_norm": 1.8491776800426882, "learning_rate": 1.1487481590574375e-06, "loss": 0.5741, "step": 390 }, { "epoch": 0.011519820868258621, "grad_norm": 1.963474884077475, "learning_rate": 1.1516936671575847e-06, "loss": 0.6729, "step": 391 }, { "epoch": 0.011549283325722198, "grad_norm": 1.9454352326233402, "learning_rate": 1.154639175257732e-06, "loss": 0.7184, "step": 392 }, { "epoch": 0.011578745783185776, "grad_norm": 2.079139820535558, "learning_rate": 1.1575846833578794e-06, "loss": 0.454, "step": 393 }, { "epoch": 0.011608208240649353, "grad_norm": 1.8664242160858597, "learning_rate": 1.1605301914580266e-06, "loss": 0.8008, "step": 394 }, { "epoch": 0.01163767069811293, "grad_norm": 1.812623327183686, "learning_rate": 1.163475699558174e-06, "loss": 0.6989, "step": 395 }, { "epoch": 0.011667133155576506, "grad_norm": 2.1485471220965877, "learning_rate": 1.1664212076583211e-06, "loss": 0.7004, "step": 396 }, { "epoch": 0.011696595613040083, "grad_norm": 2.0994538836880143, "learning_rate": 1.1693667157584685e-06, "loss": 0.8145, "step": 397 }, { "epoch": 0.011726058070503661, "grad_norm": 2.0957894592699695, "learning_rate": 1.1723122238586157e-06, "loss": 0.6358, "step": 398 }, { "epoch": 0.011755520527967238, "grad_norm": 2.2338690777741794, "learning_rate": 1.175257731958763e-06, "loss": 0.695, "step": 399 }, { "epoch": 0.011784982985430814, "grad_norm": 1.9391729750210285, "learning_rate": 1.1782032400589102e-06, "loss": 0.577, "step": 400 }, { "epoch": 0.011814445442894391, "grad_norm": 1.9624721860508019, "learning_rate": 1.1811487481590576e-06, "loss": 0.7409, "step": 401 }, { "epoch": 0.01184390790035797, "grad_norm": 1.9899431209998166, "learning_rate": 1.1840942562592047e-06, "loss": 0.5083, "step": 402 }, { "epoch": 0.011873370357821546, "grad_norm": 1.7585889929416165, "learning_rate": 1.187039764359352e-06, "loss": 0.632, "step": 403 }, { "epoch": 0.011902832815285123, "grad_norm": 2.0608140910683277, "learning_rate": 1.1899852724594993e-06, "loss": 0.8455, "step": 404 }, { "epoch": 0.0119322952727487, "grad_norm": 1.9378896598910476, "learning_rate": 1.1929307805596466e-06, "loss": 0.5878, "step": 405 }, { "epoch": 0.011961757730212278, "grad_norm": 1.8952983129738266, "learning_rate": 1.195876288659794e-06, "loss": 0.7066, "step": 406 }, { "epoch": 0.011991220187675854, "grad_norm": 1.790177559143386, "learning_rate": 1.1988217967599412e-06, "loss": 0.5524, "step": 407 }, { "epoch": 0.012020682645139431, "grad_norm": 2.077650288593039, "learning_rate": 1.2017673048600885e-06, "loss": 0.6778, "step": 408 }, { "epoch": 0.012050145102603008, "grad_norm": 2.1195362465618635, "learning_rate": 1.2047128129602357e-06, "loss": 0.6589, "step": 409 }, { "epoch": 0.012079607560066586, "grad_norm": 1.8250922704083754, "learning_rate": 1.207658321060383e-06, "loss": 0.588, "step": 410 }, { "epoch": 0.012109070017530163, "grad_norm": 2.008579591785247, "learning_rate": 1.2106038291605302e-06, "loss": 0.6602, "step": 411 }, { "epoch": 0.01213853247499374, "grad_norm": 2.077401808641877, "learning_rate": 1.2135493372606776e-06, "loss": 0.6536, "step": 412 }, { "epoch": 0.012167994932457316, "grad_norm": 1.9205198621632247, "learning_rate": 1.2164948453608248e-06, "loss": 0.6714, "step": 413 }, { "epoch": 0.012197457389920893, "grad_norm": 1.8584190334149933, "learning_rate": 1.2194403534609721e-06, "loss": 0.5846, "step": 414 }, { "epoch": 0.012226919847384471, "grad_norm": 2.203074995662405, "learning_rate": 1.2223858615611193e-06, "loss": 0.9206, "step": 415 }, { "epoch": 0.012256382304848048, "grad_norm": 1.7476958780697769, "learning_rate": 1.2253313696612667e-06, "loss": 0.5481, "step": 416 }, { "epoch": 0.012285844762311624, "grad_norm": 2.034749981087228, "learning_rate": 1.2282768777614138e-06, "loss": 0.6346, "step": 417 }, { "epoch": 0.0123153072197752, "grad_norm": 1.8954194585240987, "learning_rate": 1.2312223858615612e-06, "loss": 0.5506, "step": 418 }, { "epoch": 0.01234476967723878, "grad_norm": 2.102530273469792, "learning_rate": 1.2341678939617086e-06, "loss": 0.6941, "step": 419 }, { "epoch": 0.012374232134702356, "grad_norm": 2.1948871737579947, "learning_rate": 1.2371134020618557e-06, "loss": 0.7035, "step": 420 }, { "epoch": 0.012403694592165932, "grad_norm": 1.9707138130809592, "learning_rate": 1.2400589101620031e-06, "loss": 0.7677, "step": 421 }, { "epoch": 0.012433157049629509, "grad_norm": 1.9879681171326737, "learning_rate": 1.2430044182621503e-06, "loss": 0.7998, "step": 422 }, { "epoch": 0.012462619507093087, "grad_norm": 1.9936721484618964, "learning_rate": 1.2459499263622976e-06, "loss": 0.603, "step": 423 }, { "epoch": 0.012492081964556664, "grad_norm": 2.2189205390155506, "learning_rate": 1.2488954344624448e-06, "loss": 0.7177, "step": 424 }, { "epoch": 0.01252154442202024, "grad_norm": 2.0820223420401693, "learning_rate": 1.2518409425625922e-06, "loss": 0.6096, "step": 425 }, { "epoch": 0.012551006879483817, "grad_norm": 1.9006414329238484, "learning_rate": 1.2547864506627393e-06, "loss": 0.7658, "step": 426 }, { "epoch": 0.012580469336947394, "grad_norm": 1.9654051677445825, "learning_rate": 1.2577319587628867e-06, "loss": 0.5379, "step": 427 }, { "epoch": 0.012609931794410972, "grad_norm": 1.9689930679759469, "learning_rate": 1.2606774668630339e-06, "loss": 0.8184, "step": 428 }, { "epoch": 0.012639394251874549, "grad_norm": 1.9335522584553297, "learning_rate": 1.2636229749631813e-06, "loss": 0.6925, "step": 429 }, { "epoch": 0.012668856709338126, "grad_norm": 1.8839572137407667, "learning_rate": 1.2665684830633284e-06, "loss": 0.7016, "step": 430 }, { "epoch": 0.012698319166801702, "grad_norm": 1.8763368473243385, "learning_rate": 1.2695139911634758e-06, "loss": 0.7259, "step": 431 }, { "epoch": 0.01272778162426528, "grad_norm": 2.0762453739259867, "learning_rate": 1.272459499263623e-06, "loss": 0.5023, "step": 432 }, { "epoch": 0.012757244081728857, "grad_norm": 1.9281103904486994, "learning_rate": 1.2754050073637703e-06, "loss": 0.864, "step": 433 }, { "epoch": 0.012786706539192434, "grad_norm": 1.7466679618827823, "learning_rate": 1.2783505154639175e-06, "loss": 0.6022, "step": 434 }, { "epoch": 0.01281616899665601, "grad_norm": 1.873825546659215, "learning_rate": 1.2812960235640649e-06, "loss": 0.6746, "step": 435 }, { "epoch": 0.012845631454119589, "grad_norm": 2.1190924444927117, "learning_rate": 1.284241531664212e-06, "loss": 0.839, "step": 436 }, { "epoch": 0.012875093911583166, "grad_norm": 1.938934526126422, "learning_rate": 1.2871870397643594e-06, "loss": 0.5884, "step": 437 }, { "epoch": 0.012904556369046742, "grad_norm": 1.9828226102188364, "learning_rate": 1.290132547864507e-06, "loss": 0.6656, "step": 438 }, { "epoch": 0.012934018826510319, "grad_norm": 2.215806054347702, "learning_rate": 1.2930780559646541e-06, "loss": 0.6089, "step": 439 }, { "epoch": 0.012963481283973895, "grad_norm": 2.0100202885376666, "learning_rate": 1.2960235640648013e-06, "loss": 0.5742, "step": 440 }, { "epoch": 0.012992943741437474, "grad_norm": 2.158846600853426, "learning_rate": 1.2989690721649487e-06, "loss": 0.5988, "step": 441 }, { "epoch": 0.01302240619890105, "grad_norm": 1.9617729912760307, "learning_rate": 1.3019145802650958e-06, "loss": 0.71, "step": 442 }, { "epoch": 0.013051868656364627, "grad_norm": 1.8933083621173872, "learning_rate": 1.3048600883652432e-06, "loss": 0.6367, "step": 443 }, { "epoch": 0.013081331113828204, "grad_norm": 1.8084126725267844, "learning_rate": 1.3078055964653904e-06, "loss": 0.7032, "step": 444 }, { "epoch": 0.013110793571291782, "grad_norm": 1.877064268711968, "learning_rate": 1.3107511045655377e-06, "loss": 0.6964, "step": 445 }, { "epoch": 0.013140256028755359, "grad_norm": 1.8891242647558106, "learning_rate": 1.313696612665685e-06, "loss": 0.5256, "step": 446 }, { "epoch": 0.013169718486218935, "grad_norm": 1.928767997636866, "learning_rate": 1.3166421207658323e-06, "loss": 0.7201, "step": 447 }, { "epoch": 0.013199180943682512, "grad_norm": 1.8463095053262704, "learning_rate": 1.3195876288659794e-06, "loss": 0.5957, "step": 448 }, { "epoch": 0.01322864340114609, "grad_norm": 2.0017015065500368, "learning_rate": 1.3225331369661268e-06, "loss": 0.6469, "step": 449 }, { "epoch": 0.013258105858609667, "grad_norm": 2.0180595546477527, "learning_rate": 1.325478645066274e-06, "loss": 0.5214, "step": 450 }, { "epoch": 0.013287568316073244, "grad_norm": 1.8306505091638954, "learning_rate": 1.3284241531664213e-06, "loss": 0.649, "step": 451 }, { "epoch": 0.01331703077353682, "grad_norm": 2.0864239562283595, "learning_rate": 1.3313696612665685e-06, "loss": 0.7421, "step": 452 }, { "epoch": 0.013346493231000399, "grad_norm": 2.053867439597934, "learning_rate": 1.3343151693667159e-06, "loss": 0.5466, "step": 453 }, { "epoch": 0.013375955688463975, "grad_norm": 1.8738076581553837, "learning_rate": 1.337260677466863e-06, "loss": 0.605, "step": 454 }, { "epoch": 0.013405418145927552, "grad_norm": 2.1333042842340744, "learning_rate": 1.3402061855670104e-06, "loss": 0.7329, "step": 455 }, { "epoch": 0.013434880603391128, "grad_norm": 1.904743073573424, "learning_rate": 1.3431516936671576e-06, "loss": 0.824, "step": 456 }, { "epoch": 0.013464343060854705, "grad_norm": 1.9549076694891507, "learning_rate": 1.346097201767305e-06, "loss": 0.8559, "step": 457 }, { "epoch": 0.013493805518318283, "grad_norm": 2.088690592669801, "learning_rate": 1.349042709867452e-06, "loss": 0.7764, "step": 458 }, { "epoch": 0.01352326797578186, "grad_norm": 2.0446607402136507, "learning_rate": 1.3519882179675995e-06, "loss": 0.6016, "step": 459 }, { "epoch": 0.013552730433245437, "grad_norm": 1.97466696300013, "learning_rate": 1.3549337260677466e-06, "loss": 0.6897, "step": 460 }, { "epoch": 0.013582192890709013, "grad_norm": 1.849797591166829, "learning_rate": 1.357879234167894e-06, "loss": 0.6943, "step": 461 }, { "epoch": 0.013611655348172592, "grad_norm": 2.080239660451618, "learning_rate": 1.3608247422680412e-06, "loss": 0.6845, "step": 462 }, { "epoch": 0.013641117805636168, "grad_norm": 1.6444558320343814, "learning_rate": 1.3637702503681885e-06, "loss": 0.5707, "step": 463 }, { "epoch": 0.013670580263099745, "grad_norm": 2.156553420331648, "learning_rate": 1.3667157584683357e-06, "loss": 0.4397, "step": 464 }, { "epoch": 0.013700042720563322, "grad_norm": 2.1192948328795347, "learning_rate": 1.3696612665684833e-06, "loss": 0.7218, "step": 465 }, { "epoch": 0.0137295051780269, "grad_norm": 2.300195101058383, "learning_rate": 1.3726067746686307e-06, "loss": 0.6872, "step": 466 }, { "epoch": 0.013758967635490477, "grad_norm": 1.92854783266479, "learning_rate": 1.3755522827687778e-06, "loss": 0.6876, "step": 467 }, { "epoch": 0.013788430092954053, "grad_norm": 2.0196422172101807, "learning_rate": 1.3784977908689252e-06, "loss": 0.4602, "step": 468 }, { "epoch": 0.01381789255041763, "grad_norm": 1.9879671456252102, "learning_rate": 1.3814432989690724e-06, "loss": 0.749, "step": 469 }, { "epoch": 0.013847355007881207, "grad_norm": 1.9661388169339205, "learning_rate": 1.3843888070692197e-06, "loss": 0.779, "step": 470 }, { "epoch": 0.013876817465344785, "grad_norm": 1.930778872989141, "learning_rate": 1.3873343151693669e-06, "loss": 0.7245, "step": 471 }, { "epoch": 0.013906279922808362, "grad_norm": 2.1827984412521872, "learning_rate": 1.3902798232695143e-06, "loss": 0.6835, "step": 472 }, { "epoch": 0.013935742380271938, "grad_norm": 1.8687016121423712, "learning_rate": 1.3932253313696614e-06, "loss": 0.7087, "step": 473 }, { "epoch": 0.013965204837735515, "grad_norm": 1.9917258532893622, "learning_rate": 1.3961708394698088e-06, "loss": 0.7081, "step": 474 }, { "epoch": 0.013994667295199093, "grad_norm": 2.096910344257487, "learning_rate": 1.399116347569956e-06, "loss": 0.7245, "step": 475 }, { "epoch": 0.01402412975266267, "grad_norm": 1.7796893749272005, "learning_rate": 1.4020618556701031e-06, "loss": 0.6166, "step": 476 }, { "epoch": 0.014053592210126246, "grad_norm": 1.8883828527838575, "learning_rate": 1.4050073637702505e-06, "loss": 0.702, "step": 477 }, { "epoch": 0.014083054667589823, "grad_norm": 1.9785950283604585, "learning_rate": 1.4079528718703977e-06, "loss": 0.6398, "step": 478 }, { "epoch": 0.014112517125053401, "grad_norm": 1.9028770348590824, "learning_rate": 1.410898379970545e-06, "loss": 0.6587, "step": 479 }, { "epoch": 0.014141979582516978, "grad_norm": 1.880367015207762, "learning_rate": 1.4138438880706922e-06, "loss": 0.7105, "step": 480 }, { "epoch": 0.014171442039980555, "grad_norm": 1.9496946051813253, "learning_rate": 1.4167893961708396e-06, "loss": 0.7805, "step": 481 }, { "epoch": 0.014200904497444131, "grad_norm": 1.842452281477552, "learning_rate": 1.4197349042709867e-06, "loss": 0.722, "step": 482 }, { "epoch": 0.01423036695490771, "grad_norm": 1.9687296184580392, "learning_rate": 1.422680412371134e-06, "loss": 0.5558, "step": 483 }, { "epoch": 0.014259829412371286, "grad_norm": 1.781024309235228, "learning_rate": 1.4256259204712813e-06, "loss": 0.5083, "step": 484 }, { "epoch": 0.014289291869834863, "grad_norm": 1.8597325180051905, "learning_rate": 1.4285714285714286e-06, "loss": 0.7589, "step": 485 }, { "epoch": 0.01431875432729844, "grad_norm": 2.0400372260878368, "learning_rate": 1.4315169366715758e-06, "loss": 0.6195, "step": 486 }, { "epoch": 0.014348216784762016, "grad_norm": 2.0427582121483043, "learning_rate": 1.4344624447717232e-06, "loss": 0.7232, "step": 487 }, { "epoch": 0.014377679242225595, "grad_norm": 1.851663250467893, "learning_rate": 1.4374079528718703e-06, "loss": 0.7116, "step": 488 }, { "epoch": 0.014407141699689171, "grad_norm": 2.360541393476528, "learning_rate": 1.4403534609720177e-06, "loss": 0.6238, "step": 489 }, { "epoch": 0.014436604157152748, "grad_norm": 1.8927653246340972, "learning_rate": 1.4432989690721649e-06, "loss": 0.5191, "step": 490 }, { "epoch": 0.014466066614616324, "grad_norm": 2.0041717147807248, "learning_rate": 1.4462444771723124e-06, "loss": 0.632, "step": 491 }, { "epoch": 0.014495529072079903, "grad_norm": 1.9773236917082666, "learning_rate": 1.4491899852724598e-06, "loss": 0.6522, "step": 492 }, { "epoch": 0.01452499152954348, "grad_norm": 2.2684566063886153, "learning_rate": 1.452135493372607e-06, "loss": 0.549, "step": 493 }, { "epoch": 0.014554453987007056, "grad_norm": 1.9664704331072755, "learning_rate": 1.4550810014727543e-06, "loss": 0.5464, "step": 494 }, { "epoch": 0.014583916444470633, "grad_norm": 2.0988421197327387, "learning_rate": 1.4580265095729015e-06, "loss": 0.6199, "step": 495 }, { "epoch": 0.014613378901934211, "grad_norm": 1.9505229968852662, "learning_rate": 1.4609720176730489e-06, "loss": 0.5323, "step": 496 }, { "epoch": 0.014642841359397788, "grad_norm": 2.2084342285234952, "learning_rate": 1.463917525773196e-06, "loss": 0.777, "step": 497 }, { "epoch": 0.014672303816861364, "grad_norm": 2.1451100015702815, "learning_rate": 1.4668630338733434e-06, "loss": 0.7534, "step": 498 }, { "epoch": 0.014701766274324941, "grad_norm": 1.8001319511357115, "learning_rate": 1.4698085419734906e-06, "loss": 0.6219, "step": 499 }, { "epoch": 0.014731228731788518, "grad_norm": 1.94629502757718, "learning_rate": 1.472754050073638e-06, "loss": 0.6092, "step": 500 }, { "epoch": 0.014760691189252096, "grad_norm": 2.2057355881901324, "learning_rate": 1.4756995581737851e-06, "loss": 0.6168, "step": 501 }, { "epoch": 0.014790153646715673, "grad_norm": 1.6427346940269103, "learning_rate": 1.4786450662739325e-06, "loss": 0.5142, "step": 502 }, { "epoch": 0.01481961610417925, "grad_norm": 1.7751539659280429, "learning_rate": 1.4815905743740796e-06, "loss": 0.5201, "step": 503 }, { "epoch": 0.014849078561642826, "grad_norm": 2.0100443911587944, "learning_rate": 1.484536082474227e-06, "loss": 0.7252, "step": 504 }, { "epoch": 0.014878541019106404, "grad_norm": 1.8643837017193314, "learning_rate": 1.4874815905743742e-06, "loss": 0.5812, "step": 505 }, { "epoch": 0.014908003476569981, "grad_norm": 2.1839247431010604, "learning_rate": 1.4904270986745216e-06, "loss": 0.7188, "step": 506 }, { "epoch": 0.014937465934033558, "grad_norm": 2.06341702245163, "learning_rate": 1.4933726067746687e-06, "loss": 0.5469, "step": 507 }, { "epoch": 0.014966928391497134, "grad_norm": 2.4857807528109728, "learning_rate": 1.496318114874816e-06, "loss": 0.7753, "step": 508 }, { "epoch": 0.014996390848960713, "grad_norm": 1.9193385017165188, "learning_rate": 1.4992636229749632e-06, "loss": 0.7381, "step": 509 }, { "epoch": 0.01502585330642429, "grad_norm": 1.780462756964605, "learning_rate": 1.5022091310751106e-06, "loss": 0.4983, "step": 510 }, { "epoch": 0.015055315763887866, "grad_norm": 1.6696476864407024, "learning_rate": 1.5051546391752578e-06, "loss": 0.5886, "step": 511 }, { "epoch": 0.015084778221351442, "grad_norm": 2.0083804714076847, "learning_rate": 1.508100147275405e-06, "loss": 0.6913, "step": 512 }, { "epoch": 0.01511424067881502, "grad_norm": 1.9374491765196864, "learning_rate": 1.5110456553755523e-06, "loss": 0.6364, "step": 513 }, { "epoch": 0.015143703136278597, "grad_norm": 2.107147133251536, "learning_rate": 1.5139911634756995e-06, "loss": 0.6123, "step": 514 }, { "epoch": 0.015173165593742174, "grad_norm": 1.8157442130423107, "learning_rate": 1.5169366715758468e-06, "loss": 0.5513, "step": 515 }, { "epoch": 0.01520262805120575, "grad_norm": 2.050337524345592, "learning_rate": 1.519882179675994e-06, "loss": 0.6378, "step": 516 }, { "epoch": 0.015232090508669327, "grad_norm": 2.013795399645252, "learning_rate": 1.5228276877761414e-06, "loss": 0.6788, "step": 517 }, { "epoch": 0.015261552966132906, "grad_norm": 1.954399440047036, "learning_rate": 1.525773195876289e-06, "loss": 0.5754, "step": 518 }, { "epoch": 0.015291015423596482, "grad_norm": 2.019263474030876, "learning_rate": 1.5287187039764361e-06, "loss": 0.6569, "step": 519 }, { "epoch": 0.015320477881060059, "grad_norm": 2.1201062745690944, "learning_rate": 1.5316642120765835e-06, "loss": 0.5269, "step": 520 }, { "epoch": 0.015349940338523636, "grad_norm": 1.9419250721152173, "learning_rate": 1.5346097201767307e-06, "loss": 0.5506, "step": 521 }, { "epoch": 0.015379402795987214, "grad_norm": 1.785825711112377, "learning_rate": 1.537555228276878e-06, "loss": 0.6279, "step": 522 }, { "epoch": 0.01540886525345079, "grad_norm": 1.8293586409539389, "learning_rate": 1.5405007363770252e-06, "loss": 0.6661, "step": 523 }, { "epoch": 0.015438327710914367, "grad_norm": 1.9771575717316288, "learning_rate": 1.5434462444771726e-06, "loss": 0.7222, "step": 524 }, { "epoch": 0.015467790168377944, "grad_norm": 1.6942468926557013, "learning_rate": 1.5463917525773197e-06, "loss": 0.4809, "step": 525 }, { "epoch": 0.015497252625841522, "grad_norm": 1.9851815345585966, "learning_rate": 1.549337260677467e-06, "loss": 0.6938, "step": 526 }, { "epoch": 0.015526715083305099, "grad_norm": 2.1274713902782576, "learning_rate": 1.5522827687776143e-06, "loss": 0.7334, "step": 527 }, { "epoch": 0.015556177540768676, "grad_norm": 1.9851019956686868, "learning_rate": 1.5552282768777616e-06, "loss": 0.7107, "step": 528 }, { "epoch": 0.015585639998232252, "grad_norm": 2.106524258548117, "learning_rate": 1.5581737849779088e-06, "loss": 0.8288, "step": 529 }, { "epoch": 0.015615102455695829, "grad_norm": 1.8989848562745353, "learning_rate": 1.5611192930780562e-06, "loss": 0.6391, "step": 530 }, { "epoch": 0.015644564913159405, "grad_norm": 1.9289627294458334, "learning_rate": 1.5640648011782033e-06, "loss": 0.7222, "step": 531 }, { "epoch": 0.015674027370622984, "grad_norm": 1.8756624682362162, "learning_rate": 1.5670103092783507e-06, "loss": 0.6788, "step": 532 }, { "epoch": 0.015703489828086562, "grad_norm": 1.7138436546173716, "learning_rate": 1.5699558173784979e-06, "loss": 0.6395, "step": 533 }, { "epoch": 0.015732952285550137, "grad_norm": 1.742923230877165, "learning_rate": 1.5729013254786452e-06, "loss": 0.603, "step": 534 }, { "epoch": 0.015762414743013715, "grad_norm": 1.7337682656709497, "learning_rate": 1.5758468335787924e-06, "loss": 0.5845, "step": 535 }, { "epoch": 0.01579187720047729, "grad_norm": 2.001609720062542, "learning_rate": 1.5787923416789398e-06, "loss": 0.6409, "step": 536 }, { "epoch": 0.01582133965794087, "grad_norm": 1.7824719864182403, "learning_rate": 1.581737849779087e-06, "loss": 0.4217, "step": 537 }, { "epoch": 0.015850802115404447, "grad_norm": 2.0082199582204554, "learning_rate": 1.5846833578792343e-06, "loss": 0.5861, "step": 538 }, { "epoch": 0.015880264572868022, "grad_norm": 1.9279601558179928, "learning_rate": 1.5876288659793815e-06, "loss": 0.6493, "step": 539 }, { "epoch": 0.0159097270303316, "grad_norm": 1.7576326012616905, "learning_rate": 1.5905743740795288e-06, "loss": 0.6109, "step": 540 }, { "epoch": 0.01593918948779518, "grad_norm": 1.963617177720186, "learning_rate": 1.593519882179676e-06, "loss": 0.5656, "step": 541 }, { "epoch": 0.015968651945258754, "grad_norm": 1.8271902367420307, "learning_rate": 1.5964653902798234e-06, "loss": 0.4288, "step": 542 }, { "epoch": 0.015998114402722332, "grad_norm": 2.053400502918421, "learning_rate": 1.5994108983799705e-06, "loss": 0.5384, "step": 543 }, { "epoch": 0.016027576860185907, "grad_norm": 2.4294623750469957, "learning_rate": 1.6023564064801181e-06, "loss": 0.7252, "step": 544 }, { "epoch": 0.016057039317649485, "grad_norm": 2.0627350984830395, "learning_rate": 1.6053019145802653e-06, "loss": 0.7452, "step": 545 }, { "epoch": 0.016086501775113064, "grad_norm": 2.095465728894966, "learning_rate": 1.6082474226804127e-06, "loss": 0.6143, "step": 546 }, { "epoch": 0.01611596423257664, "grad_norm": 1.7734713373878743, "learning_rate": 1.6111929307805598e-06, "loss": 0.5657, "step": 547 }, { "epoch": 0.016145426690040217, "grad_norm": 1.815067507085739, "learning_rate": 1.6141384388807072e-06, "loss": 0.6322, "step": 548 }, { "epoch": 0.016174889147503792, "grad_norm": 1.9195318496572522, "learning_rate": 1.6170839469808544e-06, "loss": 0.6033, "step": 549 }, { "epoch": 0.01620435160496737, "grad_norm": 1.72965936212517, "learning_rate": 1.6200294550810017e-06, "loss": 0.5137, "step": 550 }, { "epoch": 0.01623381406243095, "grad_norm": 2.0632793527785256, "learning_rate": 1.6229749631811489e-06, "loss": 0.5711, "step": 551 }, { "epoch": 0.016263276519894523, "grad_norm": 1.7961230981115113, "learning_rate": 1.6259204712812963e-06, "loss": 0.5804, "step": 552 }, { "epoch": 0.016292738977358102, "grad_norm": 1.9686788594005862, "learning_rate": 1.6288659793814434e-06, "loss": 0.5205, "step": 553 }, { "epoch": 0.01632220143482168, "grad_norm": 1.7839607444810588, "learning_rate": 1.6318114874815908e-06, "loss": 0.7015, "step": 554 }, { "epoch": 0.016351663892285255, "grad_norm": 2.0427602671233345, "learning_rate": 1.634756995581738e-06, "loss": 0.7087, "step": 555 }, { "epoch": 0.016381126349748833, "grad_norm": 2.0200475392385377, "learning_rate": 1.6377025036818853e-06, "loss": 0.8105, "step": 556 }, { "epoch": 0.01641058880721241, "grad_norm": 2.1915656014991756, "learning_rate": 1.6406480117820325e-06, "loss": 0.673, "step": 557 }, { "epoch": 0.016440051264675987, "grad_norm": 2.070957484392669, "learning_rate": 1.6435935198821799e-06, "loss": 0.6022, "step": 558 }, { "epoch": 0.016469513722139565, "grad_norm": 1.89074550427448, "learning_rate": 1.646539027982327e-06, "loss": 0.685, "step": 559 }, { "epoch": 0.01649897617960314, "grad_norm": 1.977558291865041, "learning_rate": 1.6494845360824744e-06, "loss": 0.5096, "step": 560 }, { "epoch": 0.01652843863706672, "grad_norm": 2.015609044203737, "learning_rate": 1.6524300441826216e-06, "loss": 0.5374, "step": 561 }, { "epoch": 0.016557901094530293, "grad_norm": 1.9689523627169052, "learning_rate": 1.655375552282769e-06, "loss": 0.7842, "step": 562 }, { "epoch": 0.01658736355199387, "grad_norm": 1.9803148156903334, "learning_rate": 1.658321060382916e-06, "loss": 0.6067, "step": 563 }, { "epoch": 0.01661682600945745, "grad_norm": 1.9436784087754306, "learning_rate": 1.6612665684830635e-06, "loss": 0.6518, "step": 564 }, { "epoch": 0.016646288466921025, "grad_norm": 2.0692208939976786, "learning_rate": 1.6642120765832106e-06, "loss": 0.5814, "step": 565 }, { "epoch": 0.016675750924384603, "grad_norm": 2.257006628075569, "learning_rate": 1.667157584683358e-06, "loss": 0.7098, "step": 566 }, { "epoch": 0.01670521338184818, "grad_norm": 1.9035204700333908, "learning_rate": 1.6701030927835052e-06, "loss": 0.5154, "step": 567 }, { "epoch": 0.016734675839311756, "grad_norm": 1.9535780668677203, "learning_rate": 1.6730486008836525e-06, "loss": 0.5979, "step": 568 }, { "epoch": 0.016764138296775335, "grad_norm": 1.8458253797212874, "learning_rate": 1.6759941089837997e-06, "loss": 0.6506, "step": 569 }, { "epoch": 0.01679360075423891, "grad_norm": 2.089395289413484, "learning_rate": 1.678939617083947e-06, "loss": 0.5848, "step": 570 }, { "epoch": 0.016823063211702488, "grad_norm": 2.0249373127171784, "learning_rate": 1.6818851251840944e-06, "loss": 0.6018, "step": 571 }, { "epoch": 0.016852525669166066, "grad_norm": 1.8186679721621652, "learning_rate": 1.6848306332842418e-06, "loss": 0.6365, "step": 572 }, { "epoch": 0.01688198812662964, "grad_norm": 1.906038889111751, "learning_rate": 1.687776141384389e-06, "loss": 0.7371, "step": 573 }, { "epoch": 0.01691145058409322, "grad_norm": 1.8952524040397207, "learning_rate": 1.6907216494845363e-06, "loss": 0.7983, "step": 574 }, { "epoch": 0.016940913041556795, "grad_norm": 2.103475280624237, "learning_rate": 1.6936671575846835e-06, "loss": 0.6714, "step": 575 }, { "epoch": 0.016970375499020373, "grad_norm": 1.9616806303916796, "learning_rate": 1.6966126656848309e-06, "loss": 0.7124, "step": 576 }, { "epoch": 0.01699983795648395, "grad_norm": 1.942048874348961, "learning_rate": 1.699558173784978e-06, "loss": 0.5252, "step": 577 }, { "epoch": 0.017029300413947526, "grad_norm": 1.9082057331831057, "learning_rate": 1.7025036818851254e-06, "loss": 0.7482, "step": 578 }, { "epoch": 0.017058762871411105, "grad_norm": 1.8126701117873856, "learning_rate": 1.7054491899852726e-06, "loss": 0.6565, "step": 579 }, { "epoch": 0.017088225328874683, "grad_norm": 1.9943745056416808, "learning_rate": 1.70839469808542e-06, "loss": 0.5232, "step": 580 }, { "epoch": 0.017117687786338258, "grad_norm": 1.8372794097039251, "learning_rate": 1.7113402061855671e-06, "loss": 0.7056, "step": 581 }, { "epoch": 0.017147150243801836, "grad_norm": 1.960901057618469, "learning_rate": 1.7142857142857145e-06, "loss": 0.8304, "step": 582 }, { "epoch": 0.01717661270126541, "grad_norm": 2.0814891984797983, "learning_rate": 1.7172312223858616e-06, "loss": 0.7637, "step": 583 }, { "epoch": 0.01720607515872899, "grad_norm": 1.8665544638423062, "learning_rate": 1.720176730486009e-06, "loss": 0.6091, "step": 584 }, { "epoch": 0.017235537616192568, "grad_norm": 1.7807532665736256, "learning_rate": 1.7231222385861562e-06, "loss": 0.6985, "step": 585 }, { "epoch": 0.017265000073656143, "grad_norm": 2.2464011168020583, "learning_rate": 1.7260677466863035e-06, "loss": 0.7514, "step": 586 }, { "epoch": 0.01729446253111972, "grad_norm": 1.8010633870304995, "learning_rate": 1.7290132547864507e-06, "loss": 0.6516, "step": 587 }, { "epoch": 0.017323924988583296, "grad_norm": 1.8819364126019844, "learning_rate": 1.731958762886598e-06, "loss": 0.5643, "step": 588 }, { "epoch": 0.017353387446046874, "grad_norm": 1.9597570025753337, "learning_rate": 1.7349042709867452e-06, "loss": 0.631, "step": 589 }, { "epoch": 0.017382849903510453, "grad_norm": 2.2007930998475076, "learning_rate": 1.7378497790868926e-06, "loss": 0.5931, "step": 590 }, { "epoch": 0.017412312360974028, "grad_norm": 2.028107997868743, "learning_rate": 1.7407952871870398e-06, "loss": 0.7036, "step": 591 }, { "epoch": 0.017441774818437606, "grad_norm": 2.122764935811526, "learning_rate": 1.7437407952871872e-06, "loss": 0.5268, "step": 592 }, { "epoch": 0.017471237275901184, "grad_norm": 2.069053399283462, "learning_rate": 1.7466863033873343e-06, "loss": 0.5681, "step": 593 }, { "epoch": 0.01750069973336476, "grad_norm": 2.171352807673751, "learning_rate": 1.7496318114874817e-06, "loss": 0.6328, "step": 594 }, { "epoch": 0.017530162190828338, "grad_norm": 2.051716631318637, "learning_rate": 1.7525773195876288e-06, "loss": 0.5467, "step": 595 }, { "epoch": 0.017559624648291913, "grad_norm": 1.9906611532450667, "learning_rate": 1.7555228276877762e-06, "loss": 0.962, "step": 596 }, { "epoch": 0.01758908710575549, "grad_norm": 2.009127221526757, "learning_rate": 1.7584683357879236e-06, "loss": 0.7843, "step": 597 }, { "epoch": 0.01761854956321907, "grad_norm": 1.8885413913622742, "learning_rate": 1.761413843888071e-06, "loss": 0.6214, "step": 598 }, { "epoch": 0.017648012020682644, "grad_norm": 2.017926749837632, "learning_rate": 1.7643593519882181e-06, "loss": 0.6492, "step": 599 }, { "epoch": 0.017677474478146223, "grad_norm": 2.0459115677033703, "learning_rate": 1.7673048600883655e-06, "loss": 0.5653, "step": 600 }, { "epoch": 0.017706936935609797, "grad_norm": 1.9333937501706437, "learning_rate": 1.7702503681885127e-06, "loss": 0.6165, "step": 601 }, { "epoch": 0.017736399393073376, "grad_norm": 1.66893151521534, "learning_rate": 1.77319587628866e-06, "loss": 0.5204, "step": 602 }, { "epoch": 0.017765861850536954, "grad_norm": 2.1305701557045578, "learning_rate": 1.7761413843888072e-06, "loss": 0.6449, "step": 603 }, { "epoch": 0.01779532430800053, "grad_norm": 1.9107351324685018, "learning_rate": 1.7790868924889546e-06, "loss": 0.7103, "step": 604 }, { "epoch": 0.017824786765464107, "grad_norm": 1.9920616694065074, "learning_rate": 1.7820324005891017e-06, "loss": 0.7262, "step": 605 }, { "epoch": 0.017854249222927686, "grad_norm": 1.9795273693233941, "learning_rate": 1.784977908689249e-06, "loss": 0.575, "step": 606 }, { "epoch": 0.01788371168039126, "grad_norm": 1.930494804460241, "learning_rate": 1.7879234167893963e-06, "loss": 0.4716, "step": 607 }, { "epoch": 0.01791317413785484, "grad_norm": 2.062554579936836, "learning_rate": 1.7908689248895436e-06, "loss": 0.8751, "step": 608 }, { "epoch": 0.017942636595318414, "grad_norm": 1.9226603657871664, "learning_rate": 1.7938144329896908e-06, "loss": 0.6347, "step": 609 }, { "epoch": 0.017972099052781992, "grad_norm": 2.067394369589317, "learning_rate": 1.7967599410898382e-06, "loss": 0.7383, "step": 610 }, { "epoch": 0.01800156151024557, "grad_norm": 1.89305050820442, "learning_rate": 1.7997054491899853e-06, "loss": 0.6752, "step": 611 }, { "epoch": 0.018031023967709146, "grad_norm": 2.529597018556632, "learning_rate": 1.8026509572901327e-06, "loss": 0.8127, "step": 612 }, { "epoch": 0.018060486425172724, "grad_norm": 2.127313229080787, "learning_rate": 1.8055964653902799e-06, "loss": 0.5437, "step": 613 }, { "epoch": 0.018089948882636302, "grad_norm": 1.8315797897142667, "learning_rate": 1.8085419734904272e-06, "loss": 0.6101, "step": 614 }, { "epoch": 0.018119411340099877, "grad_norm": 1.9740586748316369, "learning_rate": 1.8114874815905744e-06, "loss": 0.7716, "step": 615 }, { "epoch": 0.018148873797563456, "grad_norm": 1.9895652218376145, "learning_rate": 1.8144329896907218e-06, "loss": 0.8147, "step": 616 }, { "epoch": 0.01817833625502703, "grad_norm": 2.030839020665043, "learning_rate": 1.817378497790869e-06, "loss": 0.6811, "step": 617 }, { "epoch": 0.01820779871249061, "grad_norm": 1.8608735866965243, "learning_rate": 1.8203240058910163e-06, "loss": 0.6175, "step": 618 }, { "epoch": 0.018237261169954187, "grad_norm": 1.8609370026543597, "learning_rate": 1.8232695139911635e-06, "loss": 0.5431, "step": 619 }, { "epoch": 0.018266723627417762, "grad_norm": 2.202097135113315, "learning_rate": 1.8262150220913108e-06, "loss": 0.5666, "step": 620 }, { "epoch": 0.01829618608488134, "grad_norm": 1.9046199706382858, "learning_rate": 1.829160530191458e-06, "loss": 0.5548, "step": 621 }, { "epoch": 0.018325648542344915, "grad_norm": 1.8174798005519843, "learning_rate": 1.8321060382916054e-06, "loss": 0.5584, "step": 622 }, { "epoch": 0.018355110999808494, "grad_norm": 1.7661774497842921, "learning_rate": 1.8350515463917525e-06, "loss": 0.6155, "step": 623 }, { "epoch": 0.018384573457272072, "grad_norm": 1.8600090473528517, "learning_rate": 1.8379970544919001e-06, "loss": 0.5058, "step": 624 }, { "epoch": 0.018414035914735647, "grad_norm": 1.920328289340628, "learning_rate": 1.8409425625920475e-06, "loss": 0.6537, "step": 625 }, { "epoch": 0.018443498372199225, "grad_norm": 1.8508101232676313, "learning_rate": 1.8438880706921947e-06, "loss": 0.629, "step": 626 }, { "epoch": 0.018472960829662804, "grad_norm": 1.7753277559272251, "learning_rate": 1.846833578792342e-06, "loss": 0.5047, "step": 627 }, { "epoch": 0.01850242328712638, "grad_norm": 2.0806571631479125, "learning_rate": 1.8497790868924892e-06, "loss": 0.5603, "step": 628 }, { "epoch": 0.018531885744589957, "grad_norm": 1.7372484422551209, "learning_rate": 1.8527245949926366e-06, "loss": 0.5132, "step": 629 }, { "epoch": 0.018561348202053532, "grad_norm": 2.097356008571614, "learning_rate": 1.8556701030927837e-06, "loss": 0.7272, "step": 630 }, { "epoch": 0.01859081065951711, "grad_norm": 2.001790423243973, "learning_rate": 1.858615611192931e-06, "loss": 0.7285, "step": 631 }, { "epoch": 0.01862027311698069, "grad_norm": 1.928085023373647, "learning_rate": 1.8615611192930783e-06, "loss": 0.6516, "step": 632 }, { "epoch": 0.018649735574444264, "grad_norm": 1.954543106551111, "learning_rate": 1.8645066273932254e-06, "loss": 0.6883, "step": 633 }, { "epoch": 0.018679198031907842, "grad_norm": 2.0483842157232948, "learning_rate": 1.8674521354933728e-06, "loss": 0.6853, "step": 634 }, { "epoch": 0.018708660489371417, "grad_norm": 2.004112965364135, "learning_rate": 1.87039764359352e-06, "loss": 0.5113, "step": 635 }, { "epoch": 0.018738122946834995, "grad_norm": 1.7637832001659017, "learning_rate": 1.8733431516936673e-06, "loss": 0.6182, "step": 636 }, { "epoch": 0.018767585404298574, "grad_norm": 2.0799805167047674, "learning_rate": 1.8762886597938145e-06, "loss": 0.5608, "step": 637 }, { "epoch": 0.01879704786176215, "grad_norm": 1.925222262846493, "learning_rate": 1.8792341678939619e-06, "loss": 0.6503, "step": 638 }, { "epoch": 0.018826510319225727, "grad_norm": 1.9387582246501476, "learning_rate": 1.882179675994109e-06, "loss": 0.4701, "step": 639 }, { "epoch": 0.018855972776689305, "grad_norm": 2.35893254199786, "learning_rate": 1.8851251840942564e-06, "loss": 0.5845, "step": 640 }, { "epoch": 0.01888543523415288, "grad_norm": 1.8658400810637166, "learning_rate": 1.8880706921944036e-06, "loss": 0.6932, "step": 641 }, { "epoch": 0.01891489769161646, "grad_norm": 1.910944392809792, "learning_rate": 1.891016200294551e-06, "loss": 0.461, "step": 642 }, { "epoch": 0.018944360149080033, "grad_norm": 1.9049795252809738, "learning_rate": 1.893961708394698e-06, "loss": 0.5707, "step": 643 }, { "epoch": 0.018973822606543612, "grad_norm": 1.989033400550009, "learning_rate": 1.8969072164948455e-06, "loss": 0.6108, "step": 644 }, { "epoch": 0.01900328506400719, "grad_norm": 1.9723883142683023, "learning_rate": 1.8998527245949926e-06, "loss": 0.667, "step": 645 }, { "epoch": 0.019032747521470765, "grad_norm": 1.7852279068378982, "learning_rate": 1.90279823269514e-06, "loss": 0.6788, "step": 646 }, { "epoch": 0.019062209978934343, "grad_norm": 1.948659240957841, "learning_rate": 1.9057437407952872e-06, "loss": 0.5215, "step": 647 }, { "epoch": 0.01909167243639792, "grad_norm": 2.0927020772689198, "learning_rate": 1.9086892488954345e-06, "loss": 0.7397, "step": 648 }, { "epoch": 0.019121134893861497, "grad_norm": 1.8531813742596555, "learning_rate": 1.9116347569955817e-06, "loss": 0.5222, "step": 649 }, { "epoch": 0.019150597351325075, "grad_norm": 2.3815365819309364, "learning_rate": 1.9145802650957293e-06, "loss": 0.8203, "step": 650 }, { "epoch": 0.01918005980878865, "grad_norm": 2.3084785231704723, "learning_rate": 1.9175257731958764e-06, "loss": 0.7391, "step": 651 }, { "epoch": 0.01920952226625223, "grad_norm": 2.122351534503438, "learning_rate": 1.920471281296024e-06, "loss": 0.6707, "step": 652 }, { "epoch": 0.019238984723715807, "grad_norm": 2.0070450953304624, "learning_rate": 1.923416789396171e-06, "loss": 0.7222, "step": 653 }, { "epoch": 0.01926844718117938, "grad_norm": 2.1288083834890856, "learning_rate": 1.9263622974963183e-06, "loss": 0.6179, "step": 654 }, { "epoch": 0.01929790963864296, "grad_norm": 1.9414949457843222, "learning_rate": 1.9293078055964655e-06, "loss": 0.625, "step": 655 }, { "epoch": 0.019327372096106535, "grad_norm": 1.922066673713756, "learning_rate": 1.932253313696613e-06, "loss": 0.6412, "step": 656 }, { "epoch": 0.019356834553570113, "grad_norm": 1.9853512267467155, "learning_rate": 1.9351988217967602e-06, "loss": 0.555, "step": 657 }, { "epoch": 0.01938629701103369, "grad_norm": 1.8369605536884737, "learning_rate": 1.9381443298969074e-06, "loss": 0.6846, "step": 658 }, { "epoch": 0.019415759468497266, "grad_norm": 1.93245825852344, "learning_rate": 1.9410898379970546e-06, "loss": 0.6845, "step": 659 }, { "epoch": 0.019445221925960845, "grad_norm": 1.820424848163725, "learning_rate": 1.944035346097202e-06, "loss": 0.5341, "step": 660 }, { "epoch": 0.01947468438342442, "grad_norm": 1.8361827288348815, "learning_rate": 1.9469808541973493e-06, "loss": 0.5911, "step": 661 }, { "epoch": 0.019504146840887998, "grad_norm": 1.8433927589680712, "learning_rate": 1.9499263622974965e-06, "loss": 0.5608, "step": 662 }, { "epoch": 0.019533609298351576, "grad_norm": 1.9376633761919853, "learning_rate": 1.9528718703976436e-06, "loss": 0.7816, "step": 663 }, { "epoch": 0.01956307175581515, "grad_norm": 2.014389777712104, "learning_rate": 1.9558173784977912e-06, "loss": 0.4679, "step": 664 }, { "epoch": 0.01959253421327873, "grad_norm": 1.9653145917220072, "learning_rate": 1.9587628865979384e-06, "loss": 0.6064, "step": 665 }, { "epoch": 0.019621996670742308, "grad_norm": 2.1914460245428717, "learning_rate": 1.9617083946980855e-06, "loss": 0.7007, "step": 666 }, { "epoch": 0.019651459128205883, "grad_norm": 1.8560036088722125, "learning_rate": 1.9646539027982327e-06, "loss": 0.7274, "step": 667 }, { "epoch": 0.01968092158566946, "grad_norm": 2.001294405579344, "learning_rate": 1.9675994108983803e-06, "loss": 0.5875, "step": 668 }, { "epoch": 0.019710384043133036, "grad_norm": 1.9077073126521495, "learning_rate": 1.9705449189985275e-06, "loss": 0.6239, "step": 669 }, { "epoch": 0.019739846500596615, "grad_norm": 2.095293228606563, "learning_rate": 1.9734904270986746e-06, "loss": 0.599, "step": 670 }, { "epoch": 0.019769308958060193, "grad_norm": 2.015509488023608, "learning_rate": 1.9764359351988218e-06, "loss": 0.5379, "step": 671 }, { "epoch": 0.019798771415523768, "grad_norm": 1.948305318156256, "learning_rate": 1.979381443298969e-06, "loss": 0.5917, "step": 672 }, { "epoch": 0.019828233872987346, "grad_norm": 1.9694516621322495, "learning_rate": 1.9823269513991165e-06, "loss": 0.693, "step": 673 }, { "epoch": 0.019857696330450925, "grad_norm": 1.9629997047389796, "learning_rate": 1.9852724594992637e-06, "loss": 0.4566, "step": 674 }, { "epoch": 0.0198871587879145, "grad_norm": 2.0922909767043847, "learning_rate": 1.988217967599411e-06, "loss": 0.6514, "step": 675 }, { "epoch": 0.019916621245378078, "grad_norm": 2.014684042909192, "learning_rate": 1.991163475699558e-06, "loss": 0.678, "step": 676 }, { "epoch": 0.019946083702841653, "grad_norm": 1.8995295796213165, "learning_rate": 1.9941089837997056e-06, "loss": 0.6547, "step": 677 }, { "epoch": 0.01997554616030523, "grad_norm": 1.7781248083826546, "learning_rate": 1.997054491899853e-06, "loss": 0.6992, "step": 678 }, { "epoch": 0.02000500861776881, "grad_norm": 1.9847563864166196, "learning_rate": 2.0000000000000003e-06, "loss": 0.6139, "step": 679 }, { "epoch": 0.020034471075232384, "grad_norm": 1.9882590651185488, "learning_rate": 2.0029455081001475e-06, "loss": 0.4586, "step": 680 }, { "epoch": 0.020063933532695963, "grad_norm": 2.1785043198921086, "learning_rate": 2.0058910162002947e-06, "loss": 0.8393, "step": 681 }, { "epoch": 0.020093395990159538, "grad_norm": 2.066811631948016, "learning_rate": 2.0088365243004422e-06, "loss": 0.6314, "step": 682 }, { "epoch": 0.020122858447623116, "grad_norm": 1.8108936475623385, "learning_rate": 2.0117820324005894e-06, "loss": 0.5815, "step": 683 }, { "epoch": 0.020152320905086694, "grad_norm": 1.9886921011864795, "learning_rate": 2.0147275405007366e-06, "loss": 0.5565, "step": 684 }, { "epoch": 0.02018178336255027, "grad_norm": 2.2057448444846735, "learning_rate": 2.0176730486008837e-06, "loss": 0.5684, "step": 685 }, { "epoch": 0.020211245820013848, "grad_norm": 2.0529665304309854, "learning_rate": 2.0206185567010313e-06, "loss": 0.6379, "step": 686 }, { "epoch": 0.020240708277477426, "grad_norm": 2.04659991809095, "learning_rate": 2.0235640648011785e-06, "loss": 0.6638, "step": 687 }, { "epoch": 0.020270170734941, "grad_norm": 1.777157739388691, "learning_rate": 2.0265095729013256e-06, "loss": 0.6486, "step": 688 }, { "epoch": 0.02029963319240458, "grad_norm": 1.8332438940172402, "learning_rate": 2.029455081001473e-06, "loss": 0.586, "step": 689 }, { "epoch": 0.020329095649868154, "grad_norm": 1.9501945147629611, "learning_rate": 2.0324005891016204e-06, "loss": 0.6403, "step": 690 }, { "epoch": 0.020358558107331733, "grad_norm": 2.376200045716969, "learning_rate": 2.0353460972017675e-06, "loss": 0.6714, "step": 691 }, { "epoch": 0.02038802056479531, "grad_norm": 2.097652960783795, "learning_rate": 2.0382916053019147e-06, "loss": 0.6251, "step": 692 }, { "epoch": 0.020417483022258886, "grad_norm": 1.8579286084615847, "learning_rate": 2.041237113402062e-06, "loss": 0.6531, "step": 693 }, { "epoch": 0.020446945479722464, "grad_norm": 2.080470610401799, "learning_rate": 2.0441826215022094e-06, "loss": 0.6355, "step": 694 }, { "epoch": 0.02047640793718604, "grad_norm": 1.9852933151973926, "learning_rate": 2.0471281296023566e-06, "loss": 0.6119, "step": 695 }, { "epoch": 0.020505870394649617, "grad_norm": 2.1288544179063478, "learning_rate": 2.0500736377025038e-06, "loss": 0.5439, "step": 696 }, { "epoch": 0.020535332852113196, "grad_norm": 1.7736134538192563, "learning_rate": 2.053019145802651e-06, "loss": 0.5892, "step": 697 }, { "epoch": 0.02056479530957677, "grad_norm": 1.8455133131605352, "learning_rate": 2.0559646539027985e-06, "loss": 0.6623, "step": 698 }, { "epoch": 0.02059425776704035, "grad_norm": 2.1121120402046354, "learning_rate": 2.0589101620029457e-06, "loss": 0.6828, "step": 699 }, { "epoch": 0.020623720224503928, "grad_norm": 2.1691486780990443, "learning_rate": 2.061855670103093e-06, "loss": 0.59, "step": 700 }, { "epoch": 0.020653182681967502, "grad_norm": 2.3652659514552226, "learning_rate": 2.06480117820324e-06, "loss": 0.5492, "step": 701 }, { "epoch": 0.02068264513943108, "grad_norm": 2.1243088157332197, "learning_rate": 2.0677466863033876e-06, "loss": 0.6971, "step": 702 }, { "epoch": 0.020712107596894656, "grad_norm": 2.0463967208617357, "learning_rate": 2.0706921944035347e-06, "loss": 0.6138, "step": 703 }, { "epoch": 0.020741570054358234, "grad_norm": 2.103943241344562, "learning_rate": 2.0736377025036823e-06, "loss": 0.7473, "step": 704 }, { "epoch": 0.020771032511821812, "grad_norm": 2.147741841125541, "learning_rate": 2.0765832106038295e-06, "loss": 0.6011, "step": 705 }, { "epoch": 0.020800494969285387, "grad_norm": 1.828808438401294, "learning_rate": 2.0795287187039766e-06, "loss": 0.6258, "step": 706 }, { "epoch": 0.020829957426748966, "grad_norm": 1.9182736693343345, "learning_rate": 2.082474226804124e-06, "loss": 0.6051, "step": 707 }, { "epoch": 0.02085941988421254, "grad_norm": 1.8091011798644157, "learning_rate": 2.0854197349042714e-06, "loss": 0.4927, "step": 708 }, { "epoch": 0.02088888234167612, "grad_norm": 1.8357530983637909, "learning_rate": 2.0883652430044186e-06, "loss": 0.5874, "step": 709 }, { "epoch": 0.020918344799139697, "grad_norm": 2.090526937461394, "learning_rate": 2.0913107511045657e-06, "loss": 0.5751, "step": 710 }, { "epoch": 0.020947807256603272, "grad_norm": 1.7879633998321693, "learning_rate": 2.094256259204713e-06, "loss": 0.5796, "step": 711 }, { "epoch": 0.02097726971406685, "grad_norm": 1.744306678756968, "learning_rate": 2.0972017673048605e-06, "loss": 0.5743, "step": 712 }, { "epoch": 0.02100673217153043, "grad_norm": 1.9764475499643717, "learning_rate": 2.1001472754050076e-06, "loss": 0.4983, "step": 713 }, { "epoch": 0.021036194628994004, "grad_norm": 1.910321161290648, "learning_rate": 2.1030927835051548e-06, "loss": 0.6154, "step": 714 }, { "epoch": 0.021065657086457582, "grad_norm": 1.9153592648075517, "learning_rate": 2.106038291605302e-06, "loss": 0.6074, "step": 715 }, { "epoch": 0.021095119543921157, "grad_norm": 1.913711251497642, "learning_rate": 2.1089837997054495e-06, "loss": 0.5445, "step": 716 }, { "epoch": 0.021124582001384735, "grad_norm": 1.784180805925093, "learning_rate": 2.1119293078055967e-06, "loss": 0.5679, "step": 717 }, { "epoch": 0.021154044458848314, "grad_norm": 1.8145842378176427, "learning_rate": 2.114874815905744e-06, "loss": 0.6318, "step": 718 }, { "epoch": 0.02118350691631189, "grad_norm": 2.004608970755499, "learning_rate": 2.117820324005891e-06, "loss": 0.4542, "step": 719 }, { "epoch": 0.021212969373775467, "grad_norm": 2.170409882644035, "learning_rate": 2.1207658321060386e-06, "loss": 0.6461, "step": 720 }, { "epoch": 0.021242431831239042, "grad_norm": 2.0597836073583555, "learning_rate": 2.1237113402061858e-06, "loss": 0.5699, "step": 721 }, { "epoch": 0.02127189428870262, "grad_norm": 1.7575102501087714, "learning_rate": 2.126656848306333e-06, "loss": 0.5189, "step": 722 }, { "epoch": 0.0213013567461662, "grad_norm": 1.9233228400902895, "learning_rate": 2.12960235640648e-06, "loss": 0.6227, "step": 723 }, { "epoch": 0.021330819203629774, "grad_norm": 1.9563253900119622, "learning_rate": 2.1325478645066277e-06, "loss": 0.5641, "step": 724 }, { "epoch": 0.021360281661093352, "grad_norm": 1.7795988762900916, "learning_rate": 2.135493372606775e-06, "loss": 0.5723, "step": 725 }, { "epoch": 0.02138974411855693, "grad_norm": 1.9747839814001291, "learning_rate": 2.138438880706922e-06, "loss": 0.6111, "step": 726 }, { "epoch": 0.021419206576020505, "grad_norm": 2.3902541878971504, "learning_rate": 2.141384388807069e-06, "loss": 0.5686, "step": 727 }, { "epoch": 0.021448669033484084, "grad_norm": 1.6531436095904384, "learning_rate": 2.1443298969072167e-06, "loss": 0.4856, "step": 728 }, { "epoch": 0.02147813149094766, "grad_norm": 1.9438431276845896, "learning_rate": 2.147275405007364e-06, "loss": 0.4252, "step": 729 }, { "epoch": 0.021507593948411237, "grad_norm": 1.842279535843941, "learning_rate": 2.1502209131075115e-06, "loss": 0.5973, "step": 730 }, { "epoch": 0.021537056405874815, "grad_norm": 1.947203386617615, "learning_rate": 2.1531664212076586e-06, "loss": 0.7497, "step": 731 }, { "epoch": 0.02156651886333839, "grad_norm": 1.8762047290209272, "learning_rate": 2.156111929307806e-06, "loss": 0.5273, "step": 732 }, { "epoch": 0.02159598132080197, "grad_norm": 1.8590948333376558, "learning_rate": 2.159057437407953e-06, "loss": 0.5327, "step": 733 }, { "epoch": 0.021625443778265543, "grad_norm": 1.8020537624383786, "learning_rate": 2.1620029455081005e-06, "loss": 0.5729, "step": 734 }, { "epoch": 0.021654906235729122, "grad_norm": 1.868703843299661, "learning_rate": 2.1649484536082477e-06, "loss": 0.4975, "step": 735 }, { "epoch": 0.0216843686931927, "grad_norm": 1.9509699951269523, "learning_rate": 2.167893961708395e-06, "loss": 0.7365, "step": 736 }, { "epoch": 0.021713831150656275, "grad_norm": 2.0060661934411046, "learning_rate": 2.170839469808542e-06, "loss": 0.5758, "step": 737 }, { "epoch": 0.021743293608119853, "grad_norm": 1.924582520840817, "learning_rate": 2.1737849779086896e-06, "loss": 0.7433, "step": 738 }, { "epoch": 0.021772756065583432, "grad_norm": 2.294858601978259, "learning_rate": 2.1767304860088368e-06, "loss": 0.6626, "step": 739 }, { "epoch": 0.021802218523047007, "grad_norm": 1.959475611678914, "learning_rate": 2.179675994108984e-06, "loss": 0.7343, "step": 740 }, { "epoch": 0.021831680980510585, "grad_norm": 1.778493801757602, "learning_rate": 2.182621502209131e-06, "loss": 0.6074, "step": 741 }, { "epoch": 0.02186114343797416, "grad_norm": 1.8903190494092965, "learning_rate": 2.1855670103092787e-06, "loss": 0.4498, "step": 742 }, { "epoch": 0.02189060589543774, "grad_norm": 2.0557948058427837, "learning_rate": 2.188512518409426e-06, "loss": 0.7224, "step": 743 }, { "epoch": 0.021920068352901317, "grad_norm": 1.9055228350443858, "learning_rate": 2.191458026509573e-06, "loss": 0.6689, "step": 744 }, { "epoch": 0.02194953081036489, "grad_norm": 2.175965275372262, "learning_rate": 2.19440353460972e-06, "loss": 0.7258, "step": 745 }, { "epoch": 0.02197899326782847, "grad_norm": 1.9889674410930587, "learning_rate": 2.1973490427098678e-06, "loss": 0.608, "step": 746 }, { "epoch": 0.02200845572529205, "grad_norm": 1.9847976218798316, "learning_rate": 2.200294550810015e-06, "loss": 0.6296, "step": 747 }, { "epoch": 0.022037918182755623, "grad_norm": 1.9450070792718026, "learning_rate": 2.203240058910162e-06, "loss": 0.5959, "step": 748 }, { "epoch": 0.0220673806402192, "grad_norm": 1.977258446563219, "learning_rate": 2.2061855670103092e-06, "loss": 0.6837, "step": 749 }, { "epoch": 0.022096843097682776, "grad_norm": 2.083336326365424, "learning_rate": 2.209131075110457e-06, "loss": 0.7082, "step": 750 }, { "epoch": 0.022126305555146355, "grad_norm": 2.2142913087425526, "learning_rate": 2.212076583210604e-06, "loss": 0.7144, "step": 751 }, { "epoch": 0.022155768012609933, "grad_norm": 2.390593143089231, "learning_rate": 2.215022091310751e-06, "loss": 0.6044, "step": 752 }, { "epoch": 0.022185230470073508, "grad_norm": 1.7948701553628907, "learning_rate": 2.2179675994108983e-06, "loss": 0.6739, "step": 753 }, { "epoch": 0.022214692927537086, "grad_norm": 1.972390254710042, "learning_rate": 2.220913107511046e-06, "loss": 0.7337, "step": 754 }, { "epoch": 0.02224415538500066, "grad_norm": 1.9855274705179897, "learning_rate": 2.223858615611193e-06, "loss": 0.7042, "step": 755 }, { "epoch": 0.02227361784246424, "grad_norm": 1.9770018334663348, "learning_rate": 2.2268041237113406e-06, "loss": 0.6311, "step": 756 }, { "epoch": 0.022303080299927818, "grad_norm": 1.9559752989616326, "learning_rate": 2.229749631811488e-06, "loss": 0.524, "step": 757 }, { "epoch": 0.022332542757391393, "grad_norm": 1.8693127822053799, "learning_rate": 2.232695139911635e-06, "loss": 0.6008, "step": 758 }, { "epoch": 0.02236200521485497, "grad_norm": 2.045047197602606, "learning_rate": 2.235640648011782e-06, "loss": 0.5308, "step": 759 }, { "epoch": 0.02239146767231855, "grad_norm": 1.8251324418185255, "learning_rate": 2.2385861561119297e-06, "loss": 0.6874, "step": 760 }, { "epoch": 0.022420930129782125, "grad_norm": 1.8165860262375864, "learning_rate": 2.241531664212077e-06, "loss": 0.4409, "step": 761 }, { "epoch": 0.022450392587245703, "grad_norm": 1.9821357745914345, "learning_rate": 2.244477172312224e-06, "loss": 0.3853, "step": 762 }, { "epoch": 0.022479855044709278, "grad_norm": 1.8734308570094498, "learning_rate": 2.247422680412371e-06, "loss": 0.5364, "step": 763 }, { "epoch": 0.022509317502172856, "grad_norm": 1.841570524990013, "learning_rate": 2.2503681885125188e-06, "loss": 0.6077, "step": 764 }, { "epoch": 0.022538779959636435, "grad_norm": 1.813802004960827, "learning_rate": 2.253313696612666e-06, "loss": 0.7215, "step": 765 }, { "epoch": 0.02256824241710001, "grad_norm": 1.8764450796126402, "learning_rate": 2.256259204712813e-06, "loss": 0.6015, "step": 766 }, { "epoch": 0.022597704874563588, "grad_norm": 2.004302873229497, "learning_rate": 2.2592047128129603e-06, "loss": 0.6547, "step": 767 }, { "epoch": 0.022627167332027163, "grad_norm": 1.743463613300445, "learning_rate": 2.262150220913108e-06, "loss": 0.4436, "step": 768 }, { "epoch": 0.02265662978949074, "grad_norm": 2.02967850502819, "learning_rate": 2.265095729013255e-06, "loss": 0.7466, "step": 769 }, { "epoch": 0.02268609224695432, "grad_norm": 1.6649015957453757, "learning_rate": 2.268041237113402e-06, "loss": 0.5379, "step": 770 }, { "epoch": 0.022715554704417894, "grad_norm": 1.8313359522683246, "learning_rate": 2.2709867452135493e-06, "loss": 0.6622, "step": 771 }, { "epoch": 0.022745017161881473, "grad_norm": 1.6672655420620857, "learning_rate": 2.273932253313697e-06, "loss": 0.4602, "step": 772 }, { "epoch": 0.02277447961934505, "grad_norm": 2.0869143797841048, "learning_rate": 2.276877761413844e-06, "loss": 0.5538, "step": 773 }, { "epoch": 0.022803942076808626, "grad_norm": 1.982726265439794, "learning_rate": 2.2798232695139912e-06, "loss": 0.736, "step": 774 }, { "epoch": 0.022833404534272204, "grad_norm": 1.80941543799498, "learning_rate": 2.2827687776141384e-06, "loss": 0.6003, "step": 775 }, { "epoch": 0.02286286699173578, "grad_norm": 1.9297116402118377, "learning_rate": 2.285714285714286e-06, "loss": 0.5893, "step": 776 }, { "epoch": 0.022892329449199358, "grad_norm": 1.894164185617672, "learning_rate": 2.288659793814433e-06, "loss": 0.5197, "step": 777 }, { "epoch": 0.022921791906662936, "grad_norm": 1.9554743637806289, "learning_rate": 2.2916053019145803e-06, "loss": 0.5763, "step": 778 }, { "epoch": 0.02295125436412651, "grad_norm": 1.8747831171981588, "learning_rate": 2.2945508100147275e-06, "loss": 0.6065, "step": 779 }, { "epoch": 0.02298071682159009, "grad_norm": 2.0554850034845216, "learning_rate": 2.297496318114875e-06, "loss": 0.6341, "step": 780 }, { "epoch": 0.023010179279053664, "grad_norm": 1.99663062963476, "learning_rate": 2.300441826215022e-06, "loss": 0.6375, "step": 781 }, { "epoch": 0.023039641736517243, "grad_norm": 1.9240892388640534, "learning_rate": 2.3033873343151694e-06, "loss": 0.473, "step": 782 }, { "epoch": 0.02306910419398082, "grad_norm": 2.05412212921259, "learning_rate": 2.306332842415317e-06, "loss": 0.6409, "step": 783 }, { "epoch": 0.023098566651444396, "grad_norm": 2.158431725150559, "learning_rate": 2.309278350515464e-06, "loss": 0.8308, "step": 784 }, { "epoch": 0.023128029108907974, "grad_norm": 1.968890213222344, "learning_rate": 2.3122238586156117e-06, "loss": 0.5897, "step": 785 }, { "epoch": 0.023157491566371553, "grad_norm": 1.9463428677887706, "learning_rate": 2.315169366715759e-06, "loss": 0.7745, "step": 786 }, { "epoch": 0.023186954023835128, "grad_norm": 2.0712724522356187, "learning_rate": 2.318114874815906e-06, "loss": 0.5141, "step": 787 }, { "epoch": 0.023216416481298706, "grad_norm": 1.9899084067510235, "learning_rate": 2.321060382916053e-06, "loss": 0.7169, "step": 788 }, { "epoch": 0.02324587893876228, "grad_norm": 1.8790874353912854, "learning_rate": 2.3240058910162008e-06, "loss": 0.5718, "step": 789 }, { "epoch": 0.02327534139622586, "grad_norm": 2.1399560826178976, "learning_rate": 2.326951399116348e-06, "loss": 0.6099, "step": 790 }, { "epoch": 0.023304803853689438, "grad_norm": 1.9956953114458698, "learning_rate": 2.329896907216495e-06, "loss": 0.6524, "step": 791 }, { "epoch": 0.023334266311153012, "grad_norm": 1.9121017778484566, "learning_rate": 2.3328424153166422e-06, "loss": 0.6995, "step": 792 }, { "epoch": 0.02336372876861659, "grad_norm": 1.795542005175802, "learning_rate": 2.3357879234167894e-06, "loss": 0.5843, "step": 793 }, { "epoch": 0.023393191226080166, "grad_norm": 1.855725186361052, "learning_rate": 2.338733431516937e-06, "loss": 0.6492, "step": 794 }, { "epoch": 0.023422653683543744, "grad_norm": 1.8713438904677502, "learning_rate": 2.341678939617084e-06, "loss": 0.4629, "step": 795 }, { "epoch": 0.023452116141007322, "grad_norm": 1.9594344531848238, "learning_rate": 2.3446244477172313e-06, "loss": 0.5972, "step": 796 }, { "epoch": 0.023481578598470897, "grad_norm": 2.1392371779211365, "learning_rate": 2.3475699558173785e-06, "loss": 0.7043, "step": 797 }, { "epoch": 0.023511041055934476, "grad_norm": 2.0134037551422748, "learning_rate": 2.350515463917526e-06, "loss": 0.771, "step": 798 }, { "epoch": 0.023540503513398054, "grad_norm": 1.9392133326420635, "learning_rate": 2.3534609720176732e-06, "loss": 0.742, "step": 799 }, { "epoch": 0.02356996597086163, "grad_norm": 1.9360889842893685, "learning_rate": 2.3564064801178204e-06, "loss": 0.6688, "step": 800 }, { "epoch": 0.023599428428325207, "grad_norm": 2.0537120541826774, "learning_rate": 2.3593519882179675e-06, "loss": 0.6598, "step": 801 }, { "epoch": 0.023628890885788782, "grad_norm": 2.1412066111044847, "learning_rate": 2.362297496318115e-06, "loss": 0.4678, "step": 802 }, { "epoch": 0.02365835334325236, "grad_norm": 1.9489444765328194, "learning_rate": 2.3652430044182623e-06, "loss": 0.527, "step": 803 }, { "epoch": 0.02368781580071594, "grad_norm": 2.1562942718986386, "learning_rate": 2.3681885125184095e-06, "loss": 0.6335, "step": 804 }, { "epoch": 0.023717278258179514, "grad_norm": 2.084755295765894, "learning_rate": 2.3711340206185566e-06, "loss": 0.8054, "step": 805 }, { "epoch": 0.023746740715643092, "grad_norm": 2.0983714953326698, "learning_rate": 2.374079528718704e-06, "loss": 0.6065, "step": 806 }, { "epoch": 0.023776203173106667, "grad_norm": 2.1212065316077853, "learning_rate": 2.3770250368188514e-06, "loss": 0.7527, "step": 807 }, { "epoch": 0.023805665630570245, "grad_norm": 2.111229302336915, "learning_rate": 2.3799705449189985e-06, "loss": 0.6184, "step": 808 }, { "epoch": 0.023835128088033824, "grad_norm": 1.691738837380502, "learning_rate": 2.382916053019146e-06, "loss": 0.4099, "step": 809 }, { "epoch": 0.0238645905454974, "grad_norm": 1.8918807907942858, "learning_rate": 2.3858615611192933e-06, "loss": 0.504, "step": 810 }, { "epoch": 0.023894053002960977, "grad_norm": 1.9646351088192946, "learning_rate": 2.388807069219441e-06, "loss": 0.5813, "step": 811 }, { "epoch": 0.023923515460424555, "grad_norm": 2.1020170068286514, "learning_rate": 2.391752577319588e-06, "loss": 0.5976, "step": 812 }, { "epoch": 0.02395297791788813, "grad_norm": 1.9522209794691803, "learning_rate": 2.394698085419735e-06, "loss": 0.6625, "step": 813 }, { "epoch": 0.02398244037535171, "grad_norm": 1.9224107963048451, "learning_rate": 2.3976435935198823e-06, "loss": 0.5993, "step": 814 }, { "epoch": 0.024011902832815284, "grad_norm": 1.7556526161968686, "learning_rate": 2.40058910162003e-06, "loss": 0.7022, "step": 815 }, { "epoch": 0.024041365290278862, "grad_norm": 1.8765536468331039, "learning_rate": 2.403534609720177e-06, "loss": 0.7265, "step": 816 }, { "epoch": 0.02407082774774244, "grad_norm": 1.6117990967602667, "learning_rate": 2.4064801178203242e-06, "loss": 0.4362, "step": 817 }, { "epoch": 0.024100290205206015, "grad_norm": 2.251776559959521, "learning_rate": 2.4094256259204714e-06, "loss": 0.5391, "step": 818 }, { "epoch": 0.024129752662669594, "grad_norm": 1.9135080704748988, "learning_rate": 2.412371134020619e-06, "loss": 0.595, "step": 819 }, { "epoch": 0.024159215120133172, "grad_norm": 1.8473826955680135, "learning_rate": 2.415316642120766e-06, "loss": 0.7242, "step": 820 }, { "epoch": 0.024188677577596747, "grad_norm": 2.0071261435096166, "learning_rate": 2.4182621502209133e-06, "loss": 0.7531, "step": 821 }, { "epoch": 0.024218140035060325, "grad_norm": 2.0502229391053377, "learning_rate": 2.4212076583210605e-06, "loss": 0.6396, "step": 822 }, { "epoch": 0.0242476024925239, "grad_norm": 2.067691128603722, "learning_rate": 2.424153166421208e-06, "loss": 0.6835, "step": 823 }, { "epoch": 0.02427706494998748, "grad_norm": 1.703186646373656, "learning_rate": 2.4270986745213552e-06, "loss": 0.5782, "step": 824 }, { "epoch": 0.024306527407451057, "grad_norm": 2.219533771339116, "learning_rate": 2.4300441826215024e-06, "loss": 0.7595, "step": 825 }, { "epoch": 0.024335989864914632, "grad_norm": 1.849338312646917, "learning_rate": 2.4329896907216495e-06, "loss": 0.4664, "step": 826 }, { "epoch": 0.02436545232237821, "grad_norm": 1.9094218920931085, "learning_rate": 2.435935198821797e-06, "loss": 0.7491, "step": 827 }, { "epoch": 0.024394914779841785, "grad_norm": 1.928086379625141, "learning_rate": 2.4388807069219443e-06, "loss": 0.6606, "step": 828 }, { "epoch": 0.024424377237305363, "grad_norm": 1.9551808144908365, "learning_rate": 2.4418262150220914e-06, "loss": 0.6296, "step": 829 }, { "epoch": 0.024453839694768942, "grad_norm": 2.048015538537753, "learning_rate": 2.4447717231222386e-06, "loss": 0.6501, "step": 830 }, { "epoch": 0.024483302152232517, "grad_norm": 2.076159950770384, "learning_rate": 2.4477172312223858e-06, "loss": 0.6646, "step": 831 }, { "epoch": 0.024512764609696095, "grad_norm": 2.1636213324395213, "learning_rate": 2.4506627393225333e-06, "loss": 0.3949, "step": 832 }, { "epoch": 0.024542227067159673, "grad_norm": 1.8112769228861876, "learning_rate": 2.4536082474226805e-06, "loss": 0.5425, "step": 833 }, { "epoch": 0.02457168952462325, "grad_norm": 2.1579461868731493, "learning_rate": 2.4565537555228277e-06, "loss": 0.626, "step": 834 }, { "epoch": 0.024601151982086827, "grad_norm": 1.9016957461619939, "learning_rate": 2.459499263622975e-06, "loss": 0.6118, "step": 835 }, { "epoch": 0.0246306144395504, "grad_norm": 1.9664998408484742, "learning_rate": 2.4624447717231224e-06, "loss": 0.7158, "step": 836 }, { "epoch": 0.02466007689701398, "grad_norm": 1.9003778047670687, "learning_rate": 2.46539027982327e-06, "loss": 0.6158, "step": 837 }, { "epoch": 0.02468953935447756, "grad_norm": 1.9729352354433884, "learning_rate": 2.468335787923417e-06, "loss": 0.6776, "step": 838 }, { "epoch": 0.024719001811941133, "grad_norm": 1.9694128182065191, "learning_rate": 2.4712812960235643e-06, "loss": 0.6491, "step": 839 }, { "epoch": 0.02474846426940471, "grad_norm": 1.8947011471872388, "learning_rate": 2.4742268041237115e-06, "loss": 0.5681, "step": 840 }, { "epoch": 0.024777926726868287, "grad_norm": 1.9482724838057301, "learning_rate": 2.477172312223859e-06, "loss": 0.8129, "step": 841 }, { "epoch": 0.024807389184331865, "grad_norm": 2.040213277969708, "learning_rate": 2.4801178203240062e-06, "loss": 0.653, "step": 842 }, { "epoch": 0.024836851641795443, "grad_norm": 1.96515620361694, "learning_rate": 2.4830633284241534e-06, "loss": 0.7761, "step": 843 }, { "epoch": 0.024866314099259018, "grad_norm": 1.6436736964711465, "learning_rate": 2.4860088365243006e-06, "loss": 0.469, "step": 844 }, { "epoch": 0.024895776556722597, "grad_norm": 1.7973042104873218, "learning_rate": 2.488954344624448e-06, "loss": 0.6047, "step": 845 }, { "epoch": 0.024925239014186175, "grad_norm": 1.853424327713698, "learning_rate": 2.4918998527245953e-06, "loss": 0.6801, "step": 846 }, { "epoch": 0.02495470147164975, "grad_norm": 1.9385645637273403, "learning_rate": 2.4948453608247425e-06, "loss": 0.5603, "step": 847 }, { "epoch": 0.024984163929113328, "grad_norm": 1.8984701009025218, "learning_rate": 2.4977908689248896e-06, "loss": 0.6267, "step": 848 }, { "epoch": 0.025013626386576903, "grad_norm": 1.8512598709349972, "learning_rate": 2.500736377025037e-06, "loss": 0.5747, "step": 849 }, { "epoch": 0.02504308884404048, "grad_norm": 1.6745967607817542, "learning_rate": 2.5036818851251844e-06, "loss": 0.5481, "step": 850 }, { "epoch": 0.02507255130150406, "grad_norm": 1.7964093920999396, "learning_rate": 2.5066273932253315e-06, "loss": 0.6519, "step": 851 }, { "epoch": 0.025102013758967635, "grad_norm": 1.8041170458121925, "learning_rate": 2.5095729013254787e-06, "loss": 0.6218, "step": 852 }, { "epoch": 0.025131476216431213, "grad_norm": 1.793420752513756, "learning_rate": 2.5125184094256263e-06, "loss": 0.5689, "step": 853 }, { "epoch": 0.025160938673894788, "grad_norm": 1.8809872104448577, "learning_rate": 2.5154639175257734e-06, "loss": 0.6698, "step": 854 }, { "epoch": 0.025190401131358366, "grad_norm": 2.0610599517926484, "learning_rate": 2.5184094256259206e-06, "loss": 0.7061, "step": 855 }, { "epoch": 0.025219863588821945, "grad_norm": 1.8643623427730276, "learning_rate": 2.5213549337260678e-06, "loss": 0.6065, "step": 856 }, { "epoch": 0.02524932604628552, "grad_norm": 1.8275425141725943, "learning_rate": 2.5243004418262153e-06, "loss": 0.6058, "step": 857 }, { "epoch": 0.025278788503749098, "grad_norm": 2.129065251938754, "learning_rate": 2.5272459499263625e-06, "loss": 0.5751, "step": 858 }, { "epoch": 0.025308250961212676, "grad_norm": 2.0852402533982892, "learning_rate": 2.5301914580265097e-06, "loss": 0.6056, "step": 859 }, { "epoch": 0.02533771341867625, "grad_norm": 2.3305968223044173, "learning_rate": 2.533136966126657e-06, "loss": 0.6888, "step": 860 }, { "epoch": 0.02536717587613983, "grad_norm": 2.246670089605027, "learning_rate": 2.5360824742268044e-06, "loss": 0.6562, "step": 861 }, { "epoch": 0.025396638333603404, "grad_norm": 2.0280911515939946, "learning_rate": 2.5390279823269516e-06, "loss": 0.7872, "step": 862 }, { "epoch": 0.025426100791066983, "grad_norm": 2.068312103453192, "learning_rate": 2.5419734904270987e-06, "loss": 0.8721, "step": 863 }, { "epoch": 0.02545556324853056, "grad_norm": 2.0914345894706248, "learning_rate": 2.544918998527246e-06, "loss": 0.5771, "step": 864 }, { "epoch": 0.025485025705994136, "grad_norm": 2.049333188209842, "learning_rate": 2.547864506627393e-06, "loss": 0.6467, "step": 865 }, { "epoch": 0.025514488163457714, "grad_norm": 1.939915406969431, "learning_rate": 2.5508100147275406e-06, "loss": 0.744, "step": 866 }, { "epoch": 0.02554395062092129, "grad_norm": 1.8831474380648818, "learning_rate": 2.553755522827688e-06, "loss": 0.6335, "step": 867 }, { "epoch": 0.025573413078384868, "grad_norm": 1.9876779972338912, "learning_rate": 2.556701030927835e-06, "loss": 0.8338, "step": 868 }, { "epoch": 0.025602875535848446, "grad_norm": 2.8085635785966625, "learning_rate": 2.559646539027982e-06, "loss": 0.7121, "step": 869 }, { "epoch": 0.02563233799331202, "grad_norm": 1.7596468104323935, "learning_rate": 2.5625920471281297e-06, "loss": 0.6742, "step": 870 }, { "epoch": 0.0256618004507756, "grad_norm": 1.9346975069142114, "learning_rate": 2.565537555228277e-06, "loss": 0.6502, "step": 871 }, { "epoch": 0.025691262908239178, "grad_norm": 1.987414663608112, "learning_rate": 2.568483063328424e-06, "loss": 0.6802, "step": 872 }, { "epoch": 0.025720725365702753, "grad_norm": 1.9678736151836809, "learning_rate": 2.571428571428571e-06, "loss": 0.766, "step": 873 }, { "epoch": 0.02575018782316633, "grad_norm": 1.8229330133317307, "learning_rate": 2.5743740795287188e-06, "loss": 0.6132, "step": 874 }, { "epoch": 0.025779650280629906, "grad_norm": 2.0265947182851307, "learning_rate": 2.577319587628866e-06, "loss": 0.5226, "step": 875 }, { "epoch": 0.025809112738093484, "grad_norm": 1.9444525349147608, "learning_rate": 2.580265095729014e-06, "loss": 0.5812, "step": 876 }, { "epoch": 0.025838575195557063, "grad_norm": 2.0134754161468553, "learning_rate": 2.583210603829161e-06, "loss": 0.6888, "step": 877 }, { "epoch": 0.025868037653020638, "grad_norm": 2.025990173663596, "learning_rate": 2.5861561119293083e-06, "loss": 0.6813, "step": 878 }, { "epoch": 0.025897500110484216, "grad_norm": 1.997105798936219, "learning_rate": 2.5891016200294554e-06, "loss": 0.6209, "step": 879 }, { "epoch": 0.02592696256794779, "grad_norm": 2.0296889250814814, "learning_rate": 2.5920471281296026e-06, "loss": 0.7482, "step": 880 }, { "epoch": 0.02595642502541137, "grad_norm": 1.9321100987675797, "learning_rate": 2.59499263622975e-06, "loss": 0.5821, "step": 881 }, { "epoch": 0.025985887482874948, "grad_norm": 1.9448836171396517, "learning_rate": 2.5979381443298973e-06, "loss": 0.5849, "step": 882 }, { "epoch": 0.026015349940338522, "grad_norm": 1.9465293945005269, "learning_rate": 2.6008836524300445e-06, "loss": 0.4986, "step": 883 }, { "epoch": 0.0260448123978021, "grad_norm": 1.9273173888141586, "learning_rate": 2.6038291605301917e-06, "loss": 0.5741, "step": 884 }, { "epoch": 0.02607427485526568, "grad_norm": 1.9881485517447213, "learning_rate": 2.6067746686303392e-06, "loss": 0.4739, "step": 885 }, { "epoch": 0.026103737312729254, "grad_norm": 1.923402431113159, "learning_rate": 2.6097201767304864e-06, "loss": 0.6531, "step": 886 }, { "epoch": 0.026133199770192832, "grad_norm": 1.9130674980558755, "learning_rate": 2.6126656848306336e-06, "loss": 0.6048, "step": 887 }, { "epoch": 0.026162662227656407, "grad_norm": 1.9639204972437774, "learning_rate": 2.6156111929307807e-06, "loss": 0.7672, "step": 888 }, { "epoch": 0.026192124685119986, "grad_norm": 1.9565982828304804, "learning_rate": 2.6185567010309283e-06, "loss": 0.5794, "step": 889 }, { "epoch": 0.026221587142583564, "grad_norm": 2.3947357975619252, "learning_rate": 2.6215022091310755e-06, "loss": 0.6672, "step": 890 }, { "epoch": 0.02625104960004714, "grad_norm": 1.7749495768939716, "learning_rate": 2.6244477172312226e-06, "loss": 0.6524, "step": 891 }, { "epoch": 0.026280512057510717, "grad_norm": 1.928673147091872, "learning_rate": 2.62739322533137e-06, "loss": 0.7333, "step": 892 }, { "epoch": 0.026309974514974296, "grad_norm": 1.8901443837004228, "learning_rate": 2.6303387334315174e-06, "loss": 0.7525, "step": 893 }, { "epoch": 0.02633943697243787, "grad_norm": 1.8053016315943675, "learning_rate": 2.6332842415316645e-06, "loss": 0.5636, "step": 894 }, { "epoch": 0.02636889942990145, "grad_norm": 1.9447750073754448, "learning_rate": 2.6362297496318117e-06, "loss": 0.5648, "step": 895 }, { "epoch": 0.026398361887365024, "grad_norm": 2.1405342980306785, "learning_rate": 2.639175257731959e-06, "loss": 0.6407, "step": 896 }, { "epoch": 0.026427824344828602, "grad_norm": 1.7875185953140078, "learning_rate": 2.6421207658321064e-06, "loss": 0.6371, "step": 897 }, { "epoch": 0.02645728680229218, "grad_norm": 2.017275388753311, "learning_rate": 2.6450662739322536e-06, "loss": 0.6844, "step": 898 }, { "epoch": 0.026486749259755756, "grad_norm": 1.9948246189110657, "learning_rate": 2.6480117820324008e-06, "loss": 0.5336, "step": 899 }, { "epoch": 0.026516211717219334, "grad_norm": 2.120308804466343, "learning_rate": 2.650957290132548e-06, "loss": 0.6338, "step": 900 }, { "epoch": 0.02654567417468291, "grad_norm": 1.8377606258137982, "learning_rate": 2.6539027982326955e-06, "loss": 0.6222, "step": 901 }, { "epoch": 0.026575136632146487, "grad_norm": 2.1375393431853817, "learning_rate": 2.6568483063328427e-06, "loss": 0.6449, "step": 902 }, { "epoch": 0.026604599089610066, "grad_norm": 1.95207763783171, "learning_rate": 2.65979381443299e-06, "loss": 0.743, "step": 903 }, { "epoch": 0.02663406154707364, "grad_norm": 1.9569829773808214, "learning_rate": 2.662739322533137e-06, "loss": 0.5024, "step": 904 }, { "epoch": 0.02666352400453722, "grad_norm": 1.9296284681819287, "learning_rate": 2.6656848306332846e-06, "loss": 0.7148, "step": 905 }, { "epoch": 0.026692986462000797, "grad_norm": 1.995537821595474, "learning_rate": 2.6686303387334317e-06, "loss": 0.5849, "step": 906 }, { "epoch": 0.026722448919464372, "grad_norm": 1.918033044866021, "learning_rate": 2.671575846833579e-06, "loss": 0.5875, "step": 907 }, { "epoch": 0.02675191137692795, "grad_norm": 2.1357897068337897, "learning_rate": 2.674521354933726e-06, "loss": 0.6318, "step": 908 }, { "epoch": 0.026781373834391525, "grad_norm": 1.929473218805285, "learning_rate": 2.6774668630338737e-06, "loss": 0.4962, "step": 909 }, { "epoch": 0.026810836291855104, "grad_norm": 1.9366937939241196, "learning_rate": 2.680412371134021e-06, "loss": 0.5881, "step": 910 }, { "epoch": 0.026840298749318682, "grad_norm": 1.624755689452008, "learning_rate": 2.683357879234168e-06, "loss": 0.4811, "step": 911 }, { "epoch": 0.026869761206782257, "grad_norm": 2.011229466967333, "learning_rate": 2.686303387334315e-06, "loss": 0.5253, "step": 912 }, { "epoch": 0.026899223664245835, "grad_norm": 1.71601127572005, "learning_rate": 2.6892488954344627e-06, "loss": 0.4101, "step": 913 }, { "epoch": 0.02692868612170941, "grad_norm": 2.035527933764594, "learning_rate": 2.69219440353461e-06, "loss": 0.8137, "step": 914 }, { "epoch": 0.02695814857917299, "grad_norm": 1.95427900455173, "learning_rate": 2.695139911634757e-06, "loss": 0.6668, "step": 915 }, { "epoch": 0.026987611036636567, "grad_norm": 1.9155617525295427, "learning_rate": 2.698085419734904e-06, "loss": 0.6626, "step": 916 }, { "epoch": 0.027017073494100142, "grad_norm": 2.163596700412166, "learning_rate": 2.7010309278350518e-06, "loss": 0.4569, "step": 917 }, { "epoch": 0.02704653595156372, "grad_norm": 2.0224213265801225, "learning_rate": 2.703976435935199e-06, "loss": 0.6679, "step": 918 }, { "epoch": 0.0270759984090273, "grad_norm": 2.1582472658659344, "learning_rate": 2.706921944035346e-06, "loss": 0.542, "step": 919 }, { "epoch": 0.027105460866490873, "grad_norm": 2.0500456684390853, "learning_rate": 2.7098674521354933e-06, "loss": 0.7306, "step": 920 }, { "epoch": 0.027134923323954452, "grad_norm": 1.984269359590217, "learning_rate": 2.712812960235641e-06, "loss": 0.5755, "step": 921 }, { "epoch": 0.027164385781418027, "grad_norm": 2.0591622551401407, "learning_rate": 2.715758468335788e-06, "loss": 0.6136, "step": 922 }, { "epoch": 0.027193848238881605, "grad_norm": 2.106764372056272, "learning_rate": 2.718703976435935e-06, "loss": 0.5207, "step": 923 }, { "epoch": 0.027223310696345183, "grad_norm": 2.057972142902106, "learning_rate": 2.7216494845360823e-06, "loss": 0.6138, "step": 924 }, { "epoch": 0.02725277315380876, "grad_norm": 1.921981418551688, "learning_rate": 2.72459499263623e-06, "loss": 0.5554, "step": 925 }, { "epoch": 0.027282235611272337, "grad_norm": 1.757788291861664, "learning_rate": 2.727540500736377e-06, "loss": 0.6454, "step": 926 }, { "epoch": 0.02731169806873591, "grad_norm": 1.8041502325005587, "learning_rate": 2.7304860088365242e-06, "loss": 0.6169, "step": 927 }, { "epoch": 0.02734116052619949, "grad_norm": 1.7539841100724063, "learning_rate": 2.7334315169366714e-06, "loss": 0.5489, "step": 928 }, { "epoch": 0.02737062298366307, "grad_norm": 2.0293157330378158, "learning_rate": 2.7363770250368194e-06, "loss": 0.5899, "step": 929 }, { "epoch": 0.027400085441126643, "grad_norm": 2.0711346736620846, "learning_rate": 2.7393225331369666e-06, "loss": 0.618, "step": 930 }, { "epoch": 0.02742954789859022, "grad_norm": 1.8683191292665706, "learning_rate": 2.7422680412371137e-06, "loss": 0.3964, "step": 931 }, { "epoch": 0.0274590103560538, "grad_norm": 2.100625237324164, "learning_rate": 2.7452135493372613e-06, "loss": 0.4588, "step": 932 }, { "epoch": 0.027488472813517375, "grad_norm": 1.8029284202774214, "learning_rate": 2.7481590574374085e-06, "loss": 0.5585, "step": 933 }, { "epoch": 0.027517935270980953, "grad_norm": 2.017190333078256, "learning_rate": 2.7511045655375556e-06, "loss": 0.6666, "step": 934 }, { "epoch": 0.027547397728444528, "grad_norm": 1.805208862774722, "learning_rate": 2.754050073637703e-06, "loss": 0.4986, "step": 935 }, { "epoch": 0.027576860185908107, "grad_norm": 2.2738609978643405, "learning_rate": 2.7569955817378504e-06, "loss": 0.5957, "step": 936 }, { "epoch": 0.027606322643371685, "grad_norm": 2.083834757724951, "learning_rate": 2.7599410898379976e-06, "loss": 0.8596, "step": 937 }, { "epoch": 0.02763578510083526, "grad_norm": 1.8481118439038837, "learning_rate": 2.7628865979381447e-06, "loss": 0.6758, "step": 938 }, { "epoch": 0.027665247558298838, "grad_norm": 1.84137364595685, "learning_rate": 2.765832106038292e-06, "loss": 0.6418, "step": 939 }, { "epoch": 0.027694710015762413, "grad_norm": 1.8353835617654586, "learning_rate": 2.7687776141384395e-06, "loss": 0.4781, "step": 940 }, { "epoch": 0.02772417247322599, "grad_norm": 1.9369727372818766, "learning_rate": 2.7717231222385866e-06, "loss": 0.6418, "step": 941 }, { "epoch": 0.02775363493068957, "grad_norm": 1.9298404430510554, "learning_rate": 2.7746686303387338e-06, "loss": 0.6496, "step": 942 }, { "epoch": 0.027783097388153145, "grad_norm": 1.729002312954835, "learning_rate": 2.777614138438881e-06, "loss": 0.6469, "step": 943 }, { "epoch": 0.027812559845616723, "grad_norm": 1.8588697829487308, "learning_rate": 2.7805596465390285e-06, "loss": 0.6027, "step": 944 }, { "epoch": 0.0278420223030803, "grad_norm": 1.7278680023371695, "learning_rate": 2.7835051546391757e-06, "loss": 0.536, "step": 945 }, { "epoch": 0.027871484760543876, "grad_norm": 1.9821922733235686, "learning_rate": 2.786450662739323e-06, "loss": 0.6543, "step": 946 }, { "epoch": 0.027900947218007455, "grad_norm": 1.868599885058187, "learning_rate": 2.78939617083947e-06, "loss": 0.6566, "step": 947 }, { "epoch": 0.02793040967547103, "grad_norm": 1.9865864612648576, "learning_rate": 2.7923416789396176e-06, "loss": 0.6181, "step": 948 }, { "epoch": 0.027959872132934608, "grad_norm": 2.10187363268658, "learning_rate": 2.7952871870397648e-06, "loss": 0.7319, "step": 949 }, { "epoch": 0.027989334590398186, "grad_norm": 1.9010039124619418, "learning_rate": 2.798232695139912e-06, "loss": 0.7236, "step": 950 }, { "epoch": 0.02801879704786176, "grad_norm": 1.7101588537023789, "learning_rate": 2.801178203240059e-06, "loss": 0.4837, "step": 951 }, { "epoch": 0.02804825950532534, "grad_norm": 1.9575954203007004, "learning_rate": 2.8041237113402062e-06, "loss": 0.7107, "step": 952 }, { "epoch": 0.028077721962788914, "grad_norm": 2.2359256403253323, "learning_rate": 2.807069219440354e-06, "loss": 0.6184, "step": 953 }, { "epoch": 0.028107184420252493, "grad_norm": 2.0338926507658903, "learning_rate": 2.810014727540501e-06, "loss": 0.4861, "step": 954 }, { "epoch": 0.02813664687771607, "grad_norm": 2.0440470874674412, "learning_rate": 2.812960235640648e-06, "loss": 0.4722, "step": 955 }, { "epoch": 0.028166109335179646, "grad_norm": 1.8977479619876647, "learning_rate": 2.8159057437407953e-06, "loss": 0.7787, "step": 956 }, { "epoch": 0.028195571792643224, "grad_norm": 2.397186543638935, "learning_rate": 2.818851251840943e-06, "loss": 0.6947, "step": 957 }, { "epoch": 0.028225034250106803, "grad_norm": 1.7778781508539496, "learning_rate": 2.82179675994109e-06, "loss": 0.4342, "step": 958 }, { "epoch": 0.028254496707570378, "grad_norm": 2.32066113599605, "learning_rate": 2.8247422680412372e-06, "loss": 0.5858, "step": 959 }, { "epoch": 0.028283959165033956, "grad_norm": 1.7767057099576626, "learning_rate": 2.8276877761413844e-06, "loss": 0.5204, "step": 960 }, { "epoch": 0.02831342162249753, "grad_norm": 1.8183923110336646, "learning_rate": 2.830633284241532e-06, "loss": 0.5763, "step": 961 }, { "epoch": 0.02834288407996111, "grad_norm": 1.7922119110885693, "learning_rate": 2.833578792341679e-06, "loss": 0.5646, "step": 962 }, { "epoch": 0.028372346537424688, "grad_norm": 2.325881480876016, "learning_rate": 2.8365243004418263e-06, "loss": 0.6912, "step": 963 }, { "epoch": 0.028401808994888263, "grad_norm": 1.9529031104280548, "learning_rate": 2.8394698085419734e-06, "loss": 0.4932, "step": 964 }, { "epoch": 0.02843127145235184, "grad_norm": 1.9081100569012888, "learning_rate": 2.842415316642121e-06, "loss": 0.545, "step": 965 }, { "epoch": 0.02846073390981542, "grad_norm": 1.9931785725638476, "learning_rate": 2.845360824742268e-06, "loss": 0.6124, "step": 966 }, { "epoch": 0.028490196367278994, "grad_norm": 1.9559098077084138, "learning_rate": 2.8483063328424153e-06, "loss": 0.6081, "step": 967 }, { "epoch": 0.028519658824742573, "grad_norm": 1.9008205337045483, "learning_rate": 2.8512518409425625e-06, "loss": 0.6032, "step": 968 }, { "epoch": 0.028549121282206148, "grad_norm": 2.142628375955106, "learning_rate": 2.85419734904271e-06, "loss": 0.6157, "step": 969 }, { "epoch": 0.028578583739669726, "grad_norm": 1.9566504655051715, "learning_rate": 2.8571428571428573e-06, "loss": 0.6169, "step": 970 }, { "epoch": 0.028608046197133304, "grad_norm": 1.9843398637566545, "learning_rate": 2.8600883652430044e-06, "loss": 0.6078, "step": 971 }, { "epoch": 0.02863750865459688, "grad_norm": 1.8043992228847603, "learning_rate": 2.8630338733431516e-06, "loss": 0.71, "step": 972 }, { "epoch": 0.028666971112060458, "grad_norm": 1.8795257464091772, "learning_rate": 2.865979381443299e-06, "loss": 0.7144, "step": 973 }, { "epoch": 0.028696433569524032, "grad_norm": 1.8924570995113164, "learning_rate": 2.8689248895434463e-06, "loss": 0.7011, "step": 974 }, { "epoch": 0.02872589602698761, "grad_norm": 2.0042566363684156, "learning_rate": 2.8718703976435935e-06, "loss": 0.7365, "step": 975 }, { "epoch": 0.02875535848445119, "grad_norm": 1.9126020827635448, "learning_rate": 2.8748159057437406e-06, "loss": 0.6392, "step": 976 }, { "epoch": 0.028784820941914764, "grad_norm": 1.782436395782303, "learning_rate": 2.8777614138438882e-06, "loss": 0.6217, "step": 977 }, { "epoch": 0.028814283399378342, "grad_norm": 1.9971069921277493, "learning_rate": 2.8807069219440354e-06, "loss": 0.6263, "step": 978 }, { "epoch": 0.02884374585684192, "grad_norm": 2.1510347216936596, "learning_rate": 2.8836524300441826e-06, "loss": 0.5877, "step": 979 }, { "epoch": 0.028873208314305496, "grad_norm": 1.9090715137015288, "learning_rate": 2.8865979381443297e-06, "loss": 0.5751, "step": 980 }, { "epoch": 0.028902670771769074, "grad_norm": 2.1386321298896283, "learning_rate": 2.8895434462444773e-06, "loss": 0.622, "step": 981 }, { "epoch": 0.02893213322923265, "grad_norm": 1.789661101072673, "learning_rate": 2.892488954344625e-06, "loss": 0.6127, "step": 982 }, { "epoch": 0.028961595686696227, "grad_norm": 1.8236837579988543, "learning_rate": 2.895434462444772e-06, "loss": 0.6099, "step": 983 }, { "epoch": 0.028991058144159806, "grad_norm": 1.9588202065323759, "learning_rate": 2.8983799705449196e-06, "loss": 0.663, "step": 984 }, { "epoch": 0.02902052060162338, "grad_norm": 1.8386289934501079, "learning_rate": 2.901325478645067e-06, "loss": 0.6494, "step": 985 }, { "epoch": 0.02904998305908696, "grad_norm": 1.8131412938707256, "learning_rate": 2.904270986745214e-06, "loss": 0.6429, "step": 986 }, { "epoch": 0.029079445516550534, "grad_norm": 1.8600620055481563, "learning_rate": 2.907216494845361e-06, "loss": 0.5783, "step": 987 }, { "epoch": 0.029108907974014112, "grad_norm": 1.9998402522268104, "learning_rate": 2.9101620029455087e-06, "loss": 0.6876, "step": 988 }, { "epoch": 0.02913837043147769, "grad_norm": 2.0495904344800357, "learning_rate": 2.913107511045656e-06, "loss": 0.6463, "step": 989 }, { "epoch": 0.029167832888941266, "grad_norm": 1.883283739522029, "learning_rate": 2.916053019145803e-06, "loss": 0.7105, "step": 990 }, { "epoch": 0.029197295346404844, "grad_norm": 1.7962089356971176, "learning_rate": 2.91899852724595e-06, "loss": 0.695, "step": 991 }, { "epoch": 0.029226757803868422, "grad_norm": 2.285369263498404, "learning_rate": 2.9219440353460978e-06, "loss": 0.617, "step": 992 }, { "epoch": 0.029256220261331997, "grad_norm": 1.7922530458888022, "learning_rate": 2.924889543446245e-06, "loss": 0.6329, "step": 993 }, { "epoch": 0.029285682718795576, "grad_norm": 1.86446287996413, "learning_rate": 2.927835051546392e-06, "loss": 0.6951, "step": 994 }, { "epoch": 0.02931514517625915, "grad_norm": 1.868207005714559, "learning_rate": 2.9307805596465392e-06, "loss": 0.5228, "step": 995 }, { "epoch": 0.02934460763372273, "grad_norm": 2.073558294288017, "learning_rate": 2.933726067746687e-06, "loss": 0.5949, "step": 996 }, { "epoch": 0.029374070091186307, "grad_norm": 2.1193626560075107, "learning_rate": 2.936671575846834e-06, "loss": 0.5604, "step": 997 }, { "epoch": 0.029403532548649882, "grad_norm": 1.9185007447793239, "learning_rate": 2.939617083946981e-06, "loss": 0.5496, "step": 998 }, { "epoch": 0.02943299500611346, "grad_norm": 2.00911706237742, "learning_rate": 2.9425625920471283e-06, "loss": 0.7336, "step": 999 }, { "epoch": 0.029462457463577035, "grad_norm": 2.1464943671327004, "learning_rate": 2.945508100147276e-06, "loss": 0.6908, "step": 1000 }, { "epoch": 0.029491919921040614, "grad_norm": 1.8084277829761657, "learning_rate": 2.948453608247423e-06, "loss": 0.5525, "step": 1001 }, { "epoch": 0.029521382378504192, "grad_norm": 2.2408203148888264, "learning_rate": 2.9513991163475702e-06, "loss": 0.6655, "step": 1002 }, { "epoch": 0.029550844835967767, "grad_norm": 2.061821043424049, "learning_rate": 2.9543446244477174e-06, "loss": 0.6838, "step": 1003 }, { "epoch": 0.029580307293431345, "grad_norm": 2.065333970615678, "learning_rate": 2.957290132547865e-06, "loss": 0.5171, "step": 1004 }, { "epoch": 0.029609769750894924, "grad_norm": 2.114449331978395, "learning_rate": 2.960235640648012e-06, "loss": 0.6917, "step": 1005 }, { "epoch": 0.0296392322083585, "grad_norm": 2.2178098913956994, "learning_rate": 2.9631811487481593e-06, "loss": 0.5443, "step": 1006 }, { "epoch": 0.029668694665822077, "grad_norm": 1.9654865992272958, "learning_rate": 2.9661266568483065e-06, "loss": 0.5528, "step": 1007 }, { "epoch": 0.029698157123285652, "grad_norm": 2.215139022153567, "learning_rate": 2.969072164948454e-06, "loss": 0.7325, "step": 1008 }, { "epoch": 0.02972761958074923, "grad_norm": 1.6893821677061136, "learning_rate": 2.972017673048601e-06, "loss": 0.5576, "step": 1009 }, { "epoch": 0.02975708203821281, "grad_norm": 1.889101001593798, "learning_rate": 2.9749631811487484e-06, "loss": 0.6266, "step": 1010 }, { "epoch": 0.029786544495676383, "grad_norm": 1.9124065989650825, "learning_rate": 2.9779086892488955e-06, "loss": 0.6263, "step": 1011 }, { "epoch": 0.029816006953139962, "grad_norm": 1.796346297940469, "learning_rate": 2.980854197349043e-06, "loss": 0.5014, "step": 1012 }, { "epoch": 0.029845469410603537, "grad_norm": 1.8059769470661327, "learning_rate": 2.9837997054491903e-06, "loss": 0.6865, "step": 1013 }, { "epoch": 0.029874931868067115, "grad_norm": 1.8168327980898302, "learning_rate": 2.9867452135493374e-06, "loss": 0.44, "step": 1014 }, { "epoch": 0.029904394325530693, "grad_norm": 1.8674620797378114, "learning_rate": 2.9896907216494846e-06, "loss": 0.5519, "step": 1015 }, { "epoch": 0.02993385678299427, "grad_norm": 1.8405544896136028, "learning_rate": 2.992636229749632e-06, "loss": 0.6312, "step": 1016 }, { "epoch": 0.029963319240457847, "grad_norm": 2.0901923292277904, "learning_rate": 2.9955817378497793e-06, "loss": 0.7622, "step": 1017 }, { "epoch": 0.029992781697921425, "grad_norm": 1.9148936704759492, "learning_rate": 2.9985272459499265e-06, "loss": 0.6478, "step": 1018 }, { "epoch": 0.030022244155385, "grad_norm": 2.0663364866878573, "learning_rate": 3.0014727540500737e-06, "loss": 0.5307, "step": 1019 }, { "epoch": 0.03005170661284858, "grad_norm": 1.936610228325766, "learning_rate": 3.0044182621502212e-06, "loss": 0.6758, "step": 1020 }, { "epoch": 0.030081169070312153, "grad_norm": 1.7498685572437604, "learning_rate": 3.0073637702503684e-06, "loss": 0.5628, "step": 1021 }, { "epoch": 0.03011063152777573, "grad_norm": 1.9990455930073805, "learning_rate": 3.0103092783505156e-06, "loss": 0.729, "step": 1022 }, { "epoch": 0.03014009398523931, "grad_norm": 1.8925671753961808, "learning_rate": 3.0132547864506627e-06, "loss": 0.6926, "step": 1023 }, { "epoch": 0.030169556442702885, "grad_norm": 1.891606075442412, "learning_rate": 3.01620029455081e-06, "loss": 0.4403, "step": 1024 }, { "epoch": 0.030199018900166463, "grad_norm": 1.7633085700084443, "learning_rate": 3.0191458026509575e-06, "loss": 0.4694, "step": 1025 }, { "epoch": 0.03022848135763004, "grad_norm": 2.0611587434757923, "learning_rate": 3.0220913107511046e-06, "loss": 0.7375, "step": 1026 }, { "epoch": 0.030257943815093617, "grad_norm": 1.8022744281261027, "learning_rate": 3.025036818851252e-06, "loss": 0.5208, "step": 1027 }, { "epoch": 0.030287406272557195, "grad_norm": 1.8846143188791897, "learning_rate": 3.027982326951399e-06, "loss": 0.5587, "step": 1028 }, { "epoch": 0.03031686873002077, "grad_norm": 2.0177275537750337, "learning_rate": 3.0309278350515465e-06, "loss": 0.6016, "step": 1029 }, { "epoch": 0.030346331187484348, "grad_norm": 1.7945088144063126, "learning_rate": 3.0338733431516937e-06, "loss": 0.4728, "step": 1030 }, { "epoch": 0.030375793644947927, "grad_norm": 1.968728106881806, "learning_rate": 3.036818851251841e-06, "loss": 0.6911, "step": 1031 }, { "epoch": 0.0304052561024115, "grad_norm": 1.7440237842241815, "learning_rate": 3.039764359351988e-06, "loss": 0.4924, "step": 1032 }, { "epoch": 0.03043471855987508, "grad_norm": 2.0496958601243738, "learning_rate": 3.0427098674521356e-06, "loss": 0.8316, "step": 1033 }, { "epoch": 0.030464181017338655, "grad_norm": 1.9119989411502327, "learning_rate": 3.0456553755522828e-06, "loss": 0.5805, "step": 1034 }, { "epoch": 0.030493643474802233, "grad_norm": 2.331030796976505, "learning_rate": 3.0486008836524308e-06, "loss": 0.6483, "step": 1035 }, { "epoch": 0.03052310593226581, "grad_norm": 2.326534829131155, "learning_rate": 3.051546391752578e-06, "loss": 0.5664, "step": 1036 }, { "epoch": 0.030552568389729386, "grad_norm": 1.8844065749602974, "learning_rate": 3.054491899852725e-06, "loss": 0.5449, "step": 1037 }, { "epoch": 0.030582030847192965, "grad_norm": 2.0451205313378638, "learning_rate": 3.0574374079528723e-06, "loss": 0.5862, "step": 1038 }, { "epoch": 0.030611493304656543, "grad_norm": 1.6632846868569124, "learning_rate": 3.0603829160530194e-06, "loss": 0.5972, "step": 1039 }, { "epoch": 0.030640955762120118, "grad_norm": 1.969083465752837, "learning_rate": 3.063328424153167e-06, "loss": 0.4806, "step": 1040 }, { "epoch": 0.030670418219583696, "grad_norm": 1.7753646257137063, "learning_rate": 3.066273932253314e-06, "loss": 0.6173, "step": 1041 }, { "epoch": 0.03069988067704727, "grad_norm": 2.0837180017960657, "learning_rate": 3.0692194403534613e-06, "loss": 0.7547, "step": 1042 }, { "epoch": 0.03072934313451085, "grad_norm": 2.173390763053591, "learning_rate": 3.0721649484536085e-06, "loss": 0.5404, "step": 1043 }, { "epoch": 0.030758805591974428, "grad_norm": 2.1594048369651078, "learning_rate": 3.075110456553756e-06, "loss": 0.6753, "step": 1044 }, { "epoch": 0.030788268049438003, "grad_norm": 1.8259633119008936, "learning_rate": 3.0780559646539032e-06, "loss": 0.6496, "step": 1045 }, { "epoch": 0.03081773050690158, "grad_norm": 1.587745516321115, "learning_rate": 3.0810014727540504e-06, "loss": 0.3855, "step": 1046 }, { "epoch": 0.030847192964365156, "grad_norm": 2.0593248932947805, "learning_rate": 3.0839469808541976e-06, "loss": 0.6144, "step": 1047 }, { "epoch": 0.030876655421828735, "grad_norm": 1.7341937972088253, "learning_rate": 3.086892488954345e-06, "loss": 0.5118, "step": 1048 }, { "epoch": 0.030906117879292313, "grad_norm": 1.8453175842205274, "learning_rate": 3.0898379970544923e-06, "loss": 0.58, "step": 1049 }, { "epoch": 0.030935580336755888, "grad_norm": 2.079012569684654, "learning_rate": 3.0927835051546395e-06, "loss": 0.6821, "step": 1050 }, { "epoch": 0.030965042794219466, "grad_norm": 2.033920280658036, "learning_rate": 3.0957290132547866e-06, "loss": 0.536, "step": 1051 }, { "epoch": 0.030994505251683045, "grad_norm": 2.0588851503039534, "learning_rate": 3.098674521354934e-06, "loss": 0.5659, "step": 1052 }, { "epoch": 0.03102396770914662, "grad_norm": 1.851835624889025, "learning_rate": 3.1016200294550814e-06, "loss": 0.5152, "step": 1053 }, { "epoch": 0.031053430166610198, "grad_norm": 2.0578769423974457, "learning_rate": 3.1045655375552285e-06, "loss": 0.6428, "step": 1054 }, { "epoch": 0.031082892624073773, "grad_norm": 1.9150849328833002, "learning_rate": 3.1075110456553757e-06, "loss": 0.7137, "step": 1055 }, { "epoch": 0.03111235508153735, "grad_norm": 1.913046362450495, "learning_rate": 3.1104565537555233e-06, "loss": 0.5462, "step": 1056 }, { "epoch": 0.03114181753900093, "grad_norm": 2.130069140779409, "learning_rate": 3.1134020618556704e-06, "loss": 0.5714, "step": 1057 }, { "epoch": 0.031171279996464504, "grad_norm": 1.7688781231475281, "learning_rate": 3.1163475699558176e-06, "loss": 0.559, "step": 1058 }, { "epoch": 0.031200742453928083, "grad_norm": 2.2004790127018583, "learning_rate": 3.1192930780559648e-06, "loss": 0.7105, "step": 1059 }, { "epoch": 0.031230204911391658, "grad_norm": 1.9379245296628431, "learning_rate": 3.1222385861561123e-06, "loss": 0.6072, "step": 1060 }, { "epoch": 0.031259667368855236, "grad_norm": 1.7567784264712958, "learning_rate": 3.1251840942562595e-06, "loss": 0.5996, "step": 1061 }, { "epoch": 0.03128912982631881, "grad_norm": 1.6935769553348805, "learning_rate": 3.1281296023564067e-06, "loss": 0.5387, "step": 1062 }, { "epoch": 0.03131859228378239, "grad_norm": 2.004674073865155, "learning_rate": 3.131075110456554e-06, "loss": 0.6617, "step": 1063 }, { "epoch": 0.03134805474124597, "grad_norm": 1.8980457611972186, "learning_rate": 3.1340206185567014e-06, "loss": 0.6401, "step": 1064 }, { "epoch": 0.03137751719870954, "grad_norm": 1.9107392626650344, "learning_rate": 3.1369661266568486e-06, "loss": 0.6722, "step": 1065 }, { "epoch": 0.031406979656173124, "grad_norm": 2.037233826312134, "learning_rate": 3.1399116347569957e-06, "loss": 0.573, "step": 1066 }, { "epoch": 0.0314364421136367, "grad_norm": 1.8965110795300524, "learning_rate": 3.142857142857143e-06, "loss": 0.6655, "step": 1067 }, { "epoch": 0.031465904571100274, "grad_norm": 1.8523880824015984, "learning_rate": 3.1458026509572905e-06, "loss": 0.6491, "step": 1068 }, { "epoch": 0.031495367028563856, "grad_norm": 1.8901536640735002, "learning_rate": 3.1487481590574376e-06, "loss": 0.6307, "step": 1069 }, { "epoch": 0.03152482948602743, "grad_norm": 2.007308704859418, "learning_rate": 3.151693667157585e-06, "loss": 0.5149, "step": 1070 }, { "epoch": 0.031554291943491006, "grad_norm": 1.9368920041617472, "learning_rate": 3.154639175257732e-06, "loss": 0.536, "step": 1071 }, { "epoch": 0.03158375440095458, "grad_norm": 2.029665185200516, "learning_rate": 3.1575846833578795e-06, "loss": 0.5199, "step": 1072 }, { "epoch": 0.03161321685841816, "grad_norm": 2.1053133714641303, "learning_rate": 3.1605301914580267e-06, "loss": 0.7618, "step": 1073 }, { "epoch": 0.03164267931588174, "grad_norm": 2.244905655992437, "learning_rate": 3.163475699558174e-06, "loss": 0.5149, "step": 1074 }, { "epoch": 0.03167214177334531, "grad_norm": 2.0270752343837737, "learning_rate": 3.166421207658321e-06, "loss": 0.5476, "step": 1075 }, { "epoch": 0.031701604230808894, "grad_norm": 1.825458160173553, "learning_rate": 3.1693667157584686e-06, "loss": 0.5951, "step": 1076 }, { "epoch": 0.03173106668827247, "grad_norm": 1.9218856746236863, "learning_rate": 3.1723122238586158e-06, "loss": 0.7213, "step": 1077 }, { "epoch": 0.031760529145736044, "grad_norm": 1.7106452766020888, "learning_rate": 3.175257731958763e-06, "loss": 0.5591, "step": 1078 }, { "epoch": 0.031789991603199626, "grad_norm": 1.8580419267482953, "learning_rate": 3.17820324005891e-06, "loss": 0.7311, "step": 1079 }, { "epoch": 0.0318194540606632, "grad_norm": 1.966494290572987, "learning_rate": 3.1811487481590577e-06, "loss": 0.6629, "step": 1080 }, { "epoch": 0.031848916518126776, "grad_norm": 1.7055440315905355, "learning_rate": 3.184094256259205e-06, "loss": 0.5987, "step": 1081 }, { "epoch": 0.03187837897559036, "grad_norm": 2.0014295487642264, "learning_rate": 3.187039764359352e-06, "loss": 0.6753, "step": 1082 }, { "epoch": 0.03190784143305393, "grad_norm": 2.1618093487257775, "learning_rate": 3.189985272459499e-06, "loss": 0.6766, "step": 1083 }, { "epoch": 0.03193730389051751, "grad_norm": 1.6252405915721022, "learning_rate": 3.1929307805596468e-06, "loss": 0.5735, "step": 1084 }, { "epoch": 0.03196676634798108, "grad_norm": 1.8170838051097398, "learning_rate": 3.195876288659794e-06, "loss": 0.6663, "step": 1085 }, { "epoch": 0.031996228805444664, "grad_norm": 1.908742880677786, "learning_rate": 3.198821796759941e-06, "loss": 0.5976, "step": 1086 }, { "epoch": 0.03202569126290824, "grad_norm": 1.8857295708053494, "learning_rate": 3.2017673048600882e-06, "loss": 0.5623, "step": 1087 }, { "epoch": 0.032055153720371814, "grad_norm": 1.8070073072780286, "learning_rate": 3.2047128129602362e-06, "loss": 0.5033, "step": 1088 }, { "epoch": 0.032084616177835396, "grad_norm": 1.926842359553863, "learning_rate": 3.2076583210603834e-06, "loss": 0.6401, "step": 1089 }, { "epoch": 0.03211407863529897, "grad_norm": 2.025195449141991, "learning_rate": 3.2106038291605306e-06, "loss": 0.5963, "step": 1090 }, { "epoch": 0.032143541092762545, "grad_norm": 1.862425174000326, "learning_rate": 3.213549337260678e-06, "loss": 0.6989, "step": 1091 }, { "epoch": 0.03217300355022613, "grad_norm": 2.2042693114029883, "learning_rate": 3.2164948453608253e-06, "loss": 0.6756, "step": 1092 }, { "epoch": 0.0322024660076897, "grad_norm": 1.8556286856718922, "learning_rate": 3.2194403534609725e-06, "loss": 0.6533, "step": 1093 }, { "epoch": 0.03223192846515328, "grad_norm": 2.0232444394974523, "learning_rate": 3.2223858615611196e-06, "loss": 0.4586, "step": 1094 }, { "epoch": 0.03226139092261686, "grad_norm": 1.8573966508053759, "learning_rate": 3.2253313696612672e-06, "loss": 0.5152, "step": 1095 }, { "epoch": 0.032290853380080434, "grad_norm": 1.9920639976482544, "learning_rate": 3.2282768777614144e-06, "loss": 0.6633, "step": 1096 }, { "epoch": 0.03232031583754401, "grad_norm": 1.8637726663877463, "learning_rate": 3.2312223858615615e-06, "loss": 0.6906, "step": 1097 }, { "epoch": 0.032349778295007584, "grad_norm": 2.0722376828450777, "learning_rate": 3.2341678939617087e-06, "loss": 0.4779, "step": 1098 }, { "epoch": 0.032379240752471165, "grad_norm": 1.9052904905683625, "learning_rate": 3.2371134020618563e-06, "loss": 0.6258, "step": 1099 }, { "epoch": 0.03240870320993474, "grad_norm": 1.8762769374088701, "learning_rate": 3.2400589101620034e-06, "loss": 0.5918, "step": 1100 }, { "epoch": 0.032438165667398315, "grad_norm": 1.783162301577244, "learning_rate": 3.2430044182621506e-06, "loss": 0.5311, "step": 1101 }, { "epoch": 0.0324676281248619, "grad_norm": 1.9992406693643983, "learning_rate": 3.2459499263622978e-06, "loss": 0.5383, "step": 1102 }, { "epoch": 0.03249709058232547, "grad_norm": 1.9217828149706058, "learning_rate": 3.2488954344624454e-06, "loss": 0.5062, "step": 1103 }, { "epoch": 0.03252655303978905, "grad_norm": 2.0190817562109076, "learning_rate": 3.2518409425625925e-06, "loss": 0.6044, "step": 1104 }, { "epoch": 0.03255601549725263, "grad_norm": 2.342441243767894, "learning_rate": 3.2547864506627397e-06, "loss": 0.7099, "step": 1105 }, { "epoch": 0.032585477954716204, "grad_norm": 1.9770334446053788, "learning_rate": 3.257731958762887e-06, "loss": 0.694, "step": 1106 }, { "epoch": 0.03261494041217978, "grad_norm": 2.014181852604505, "learning_rate": 3.2606774668630344e-06, "loss": 0.4616, "step": 1107 }, { "epoch": 0.03264440286964336, "grad_norm": 2.026151906286443, "learning_rate": 3.2636229749631816e-06, "loss": 0.5858, "step": 1108 }, { "epoch": 0.032673865327106935, "grad_norm": 2.2844371042129135, "learning_rate": 3.2665684830633287e-06, "loss": 0.6656, "step": 1109 }, { "epoch": 0.03270332778457051, "grad_norm": 1.727625527426001, "learning_rate": 3.269513991163476e-06, "loss": 0.5055, "step": 1110 }, { "epoch": 0.032732790242034085, "grad_norm": 2.028440841500574, "learning_rate": 3.272459499263623e-06, "loss": 0.6973, "step": 1111 }, { "epoch": 0.03276225269949767, "grad_norm": 1.8233801683561814, "learning_rate": 3.2754050073637707e-06, "loss": 0.5558, "step": 1112 }, { "epoch": 0.03279171515696124, "grad_norm": 2.0850526642704352, "learning_rate": 3.278350515463918e-06, "loss": 0.7354, "step": 1113 }, { "epoch": 0.03282117761442482, "grad_norm": 1.9039517746913683, "learning_rate": 3.281296023564065e-06, "loss": 0.5845, "step": 1114 }, { "epoch": 0.0328506400718884, "grad_norm": 1.9697521972248955, "learning_rate": 3.284241531664212e-06, "loss": 0.6802, "step": 1115 }, { "epoch": 0.03288010252935197, "grad_norm": 1.7826807253948498, "learning_rate": 3.2871870397643597e-06, "loss": 0.68, "step": 1116 }, { "epoch": 0.03290956498681555, "grad_norm": 2.218587812264358, "learning_rate": 3.290132547864507e-06, "loss": 0.6636, "step": 1117 }, { "epoch": 0.03293902744427913, "grad_norm": 1.8913089670456091, "learning_rate": 3.293078055964654e-06, "loss": 0.5391, "step": 1118 }, { "epoch": 0.032968489901742705, "grad_norm": 2.1383493875142436, "learning_rate": 3.296023564064801e-06, "loss": 0.631, "step": 1119 }, { "epoch": 0.03299795235920628, "grad_norm": 1.7086486038892525, "learning_rate": 3.298969072164949e-06, "loss": 0.568, "step": 1120 }, { "epoch": 0.03302741481666986, "grad_norm": 1.9967229176047712, "learning_rate": 3.301914580265096e-06, "loss": 0.6529, "step": 1121 }, { "epoch": 0.03305687727413344, "grad_norm": 2.00389709831211, "learning_rate": 3.304860088365243e-06, "loss": 0.5439, "step": 1122 }, { "epoch": 0.03308633973159701, "grad_norm": 2.0314878618589463, "learning_rate": 3.3078055964653903e-06, "loss": 0.4752, "step": 1123 }, { "epoch": 0.033115802189060586, "grad_norm": 1.9291372116241348, "learning_rate": 3.310751104565538e-06, "loss": 0.7475, "step": 1124 }, { "epoch": 0.03314526464652417, "grad_norm": 1.9804917834134044, "learning_rate": 3.313696612665685e-06, "loss": 0.6979, "step": 1125 }, { "epoch": 0.03317472710398774, "grad_norm": 1.8150826828551432, "learning_rate": 3.316642120765832e-06, "loss": 0.6551, "step": 1126 }, { "epoch": 0.03320418956145132, "grad_norm": 2.005322879851103, "learning_rate": 3.3195876288659793e-06, "loss": 0.6471, "step": 1127 }, { "epoch": 0.0332336520189149, "grad_norm": 2.0980591686302508, "learning_rate": 3.322533136966127e-06, "loss": 0.5625, "step": 1128 }, { "epoch": 0.033263114476378475, "grad_norm": 2.254563801692526, "learning_rate": 3.325478645066274e-06, "loss": 0.566, "step": 1129 }, { "epoch": 0.03329257693384205, "grad_norm": 2.0745574490524104, "learning_rate": 3.3284241531664212e-06, "loss": 0.7443, "step": 1130 }, { "epoch": 0.03332203939130563, "grad_norm": 1.7897545982490062, "learning_rate": 3.3313696612665684e-06, "loss": 0.6516, "step": 1131 }, { "epoch": 0.033351501848769206, "grad_norm": 1.9848379875917248, "learning_rate": 3.334315169366716e-06, "loss": 0.7087, "step": 1132 }, { "epoch": 0.03338096430623278, "grad_norm": 1.7644018639747063, "learning_rate": 3.337260677466863e-06, "loss": 0.5721, "step": 1133 }, { "epoch": 0.03341042676369636, "grad_norm": 2.2749463610149663, "learning_rate": 3.3402061855670103e-06, "loss": 0.6563, "step": 1134 }, { "epoch": 0.03343988922115994, "grad_norm": 1.9744659370883626, "learning_rate": 3.3431516936671575e-06, "loss": 0.6353, "step": 1135 }, { "epoch": 0.03346935167862351, "grad_norm": 1.8436517386410451, "learning_rate": 3.346097201767305e-06, "loss": 0.5153, "step": 1136 }, { "epoch": 0.03349881413608709, "grad_norm": 1.6487550517228746, "learning_rate": 3.3490427098674522e-06, "loss": 0.4913, "step": 1137 }, { "epoch": 0.03352827659355067, "grad_norm": 1.9569661920792396, "learning_rate": 3.3519882179675994e-06, "loss": 0.6527, "step": 1138 }, { "epoch": 0.033557739051014245, "grad_norm": 2.0954234456701255, "learning_rate": 3.3549337260677465e-06, "loss": 0.5485, "step": 1139 }, { "epoch": 0.03358720150847782, "grad_norm": 1.8701094744384714, "learning_rate": 3.357879234167894e-06, "loss": 0.7947, "step": 1140 }, { "epoch": 0.0336166639659414, "grad_norm": 1.729084226722523, "learning_rate": 3.3608247422680417e-06, "loss": 0.6008, "step": 1141 }, { "epoch": 0.033646126423404976, "grad_norm": 1.9705250977465043, "learning_rate": 3.363770250368189e-06, "loss": 0.6154, "step": 1142 }, { "epoch": 0.03367558888086855, "grad_norm": 1.8388877593034205, "learning_rate": 3.3667157584683365e-06, "loss": 0.6453, "step": 1143 }, { "epoch": 0.03370505133833213, "grad_norm": 1.889686186821761, "learning_rate": 3.3696612665684836e-06, "loss": 0.657, "step": 1144 }, { "epoch": 0.03373451379579571, "grad_norm": 1.9901103184950086, "learning_rate": 3.3726067746686308e-06, "loss": 0.5379, "step": 1145 }, { "epoch": 0.03376397625325928, "grad_norm": 1.9506000799104157, "learning_rate": 3.375552282768778e-06, "loss": 0.3995, "step": 1146 }, { "epoch": 0.033793438710722865, "grad_norm": 2.043694552473065, "learning_rate": 3.3784977908689255e-06, "loss": 0.8114, "step": 1147 }, { "epoch": 0.03382290116818644, "grad_norm": 1.9520205446524816, "learning_rate": 3.3814432989690727e-06, "loss": 0.4784, "step": 1148 }, { "epoch": 0.033852363625650014, "grad_norm": 2.0320377139634784, "learning_rate": 3.38438880706922e-06, "loss": 0.6086, "step": 1149 }, { "epoch": 0.03388182608311359, "grad_norm": 1.9575977652745709, "learning_rate": 3.387334315169367e-06, "loss": 0.6129, "step": 1150 }, { "epoch": 0.03391128854057717, "grad_norm": 1.7927826561469837, "learning_rate": 3.3902798232695146e-06, "loss": 0.5723, "step": 1151 }, { "epoch": 0.033940750998040746, "grad_norm": 1.8349236722795277, "learning_rate": 3.3932253313696618e-06, "loss": 0.5971, "step": 1152 }, { "epoch": 0.03397021345550432, "grad_norm": 1.6874938039826344, "learning_rate": 3.396170839469809e-06, "loss": 0.5595, "step": 1153 }, { "epoch": 0.0339996759129679, "grad_norm": 1.8040747891692615, "learning_rate": 3.399116347569956e-06, "loss": 0.5188, "step": 1154 }, { "epoch": 0.03402913837043148, "grad_norm": 2.060081557772589, "learning_rate": 3.4020618556701037e-06, "loss": 0.6611, "step": 1155 }, { "epoch": 0.03405860082789505, "grad_norm": 2.0373265974741375, "learning_rate": 3.405007363770251e-06, "loss": 0.659, "step": 1156 }, { "epoch": 0.034088063285358634, "grad_norm": 1.8913323148513612, "learning_rate": 3.407952871870398e-06, "loss": 0.5136, "step": 1157 }, { "epoch": 0.03411752574282221, "grad_norm": 1.7541281050480888, "learning_rate": 3.410898379970545e-06, "loss": 0.6236, "step": 1158 }, { "epoch": 0.034146988200285784, "grad_norm": 1.7769853735461398, "learning_rate": 3.4138438880706927e-06, "loss": 0.6346, "step": 1159 }, { "epoch": 0.034176450657749366, "grad_norm": 1.9452639689197437, "learning_rate": 3.41678939617084e-06, "loss": 0.6978, "step": 1160 }, { "epoch": 0.03420591311521294, "grad_norm": 2.0877654431050776, "learning_rate": 3.419734904270987e-06, "loss": 0.6414, "step": 1161 }, { "epoch": 0.034235375572676516, "grad_norm": 1.8174778016453803, "learning_rate": 3.4226804123711342e-06, "loss": 0.5799, "step": 1162 }, { "epoch": 0.03426483803014009, "grad_norm": 1.816297428812744, "learning_rate": 3.425625920471282e-06, "loss": 0.679, "step": 1163 }, { "epoch": 0.03429430048760367, "grad_norm": 1.9483067007812331, "learning_rate": 3.428571428571429e-06, "loss": 0.7262, "step": 1164 }, { "epoch": 0.03432376294506725, "grad_norm": 1.8529462019303422, "learning_rate": 3.431516936671576e-06, "loss": 0.6831, "step": 1165 }, { "epoch": 0.03435322540253082, "grad_norm": 2.200741500841885, "learning_rate": 3.4344624447717233e-06, "loss": 0.6885, "step": 1166 }, { "epoch": 0.034382687859994404, "grad_norm": 1.8542429591041854, "learning_rate": 3.437407952871871e-06, "loss": 0.4806, "step": 1167 }, { "epoch": 0.03441215031745798, "grad_norm": 1.9507778791711337, "learning_rate": 3.440353460972018e-06, "loss": 0.772, "step": 1168 }, { "epoch": 0.034441612774921554, "grad_norm": 2.213907974162662, "learning_rate": 3.443298969072165e-06, "loss": 0.6239, "step": 1169 }, { "epoch": 0.034471075232385136, "grad_norm": 2.1962675012046162, "learning_rate": 3.4462444771723124e-06, "loss": 0.6739, "step": 1170 }, { "epoch": 0.03450053768984871, "grad_norm": 1.740106433729648, "learning_rate": 3.44918998527246e-06, "loss": 0.4868, "step": 1171 }, { "epoch": 0.034530000147312286, "grad_norm": 1.8862356863551268, "learning_rate": 3.452135493372607e-06, "loss": 0.5616, "step": 1172 }, { "epoch": 0.03455946260477587, "grad_norm": 1.9170130424163785, "learning_rate": 3.4550810014727543e-06, "loss": 0.521, "step": 1173 }, { "epoch": 0.03458892506223944, "grad_norm": 1.7062696339842172, "learning_rate": 3.4580265095729014e-06, "loss": 0.5849, "step": 1174 }, { "epoch": 0.03461838751970302, "grad_norm": 2.1870301264214262, "learning_rate": 3.460972017673049e-06, "loss": 0.6711, "step": 1175 }, { "epoch": 0.03464784997716659, "grad_norm": 2.0975420956517827, "learning_rate": 3.463917525773196e-06, "loss": 0.6207, "step": 1176 }, { "epoch": 0.034677312434630174, "grad_norm": 1.9993876390863183, "learning_rate": 3.4668630338733433e-06, "loss": 0.6863, "step": 1177 }, { "epoch": 0.03470677489209375, "grad_norm": 1.9140546117092747, "learning_rate": 3.4698085419734905e-06, "loss": 0.6798, "step": 1178 }, { "epoch": 0.034736237349557324, "grad_norm": 2.069418793200349, "learning_rate": 3.472754050073638e-06, "loss": 0.6342, "step": 1179 }, { "epoch": 0.034765699807020906, "grad_norm": 2.067145998506921, "learning_rate": 3.4756995581737852e-06, "loss": 0.6809, "step": 1180 }, { "epoch": 0.03479516226448448, "grad_norm": 1.8815776880626522, "learning_rate": 3.4786450662739324e-06, "loss": 0.6011, "step": 1181 }, { "epoch": 0.034824624721948055, "grad_norm": 1.8261765067084474, "learning_rate": 3.4815905743740796e-06, "loss": 0.5467, "step": 1182 }, { "epoch": 0.03485408717941164, "grad_norm": 2.012029994102414, "learning_rate": 3.4845360824742267e-06, "loss": 0.6529, "step": 1183 }, { "epoch": 0.03488354963687521, "grad_norm": 1.9533684028562945, "learning_rate": 3.4874815905743743e-06, "loss": 0.6882, "step": 1184 }, { "epoch": 0.03491301209433879, "grad_norm": 1.9083680470428614, "learning_rate": 3.4904270986745215e-06, "loss": 0.6352, "step": 1185 }, { "epoch": 0.03494247455180237, "grad_norm": 2.026278013192852, "learning_rate": 3.4933726067746686e-06, "loss": 0.6194, "step": 1186 }, { "epoch": 0.034971937009265944, "grad_norm": 2.205392372542187, "learning_rate": 3.4963181148748158e-06, "loss": 0.6861, "step": 1187 }, { "epoch": 0.03500139946672952, "grad_norm": 1.728028020004783, "learning_rate": 3.4992636229749634e-06, "loss": 0.4677, "step": 1188 }, { "epoch": 0.035030861924193094, "grad_norm": 1.8138628346300782, "learning_rate": 3.5022091310751105e-06, "loss": 0.5684, "step": 1189 }, { "epoch": 0.035060324381656675, "grad_norm": 2.232594042175128, "learning_rate": 3.5051546391752577e-06, "loss": 0.6612, "step": 1190 }, { "epoch": 0.03508978683912025, "grad_norm": 1.7452438508808912, "learning_rate": 3.508100147275405e-06, "loss": 0.5171, "step": 1191 }, { "epoch": 0.035119249296583825, "grad_norm": 1.8620864542863254, "learning_rate": 3.5110456553755524e-06, "loss": 0.6413, "step": 1192 }, { "epoch": 0.03514871175404741, "grad_norm": 2.015039552301777, "learning_rate": 3.5139911634756996e-06, "loss": 0.5139, "step": 1193 }, { "epoch": 0.03517817421151098, "grad_norm": 1.859716383248371, "learning_rate": 3.516936671575847e-06, "loss": 0.4743, "step": 1194 }, { "epoch": 0.03520763666897456, "grad_norm": 2.0325341585131116, "learning_rate": 3.5198821796759948e-06, "loss": 0.5818, "step": 1195 }, { "epoch": 0.03523709912643814, "grad_norm": 1.8818918023660027, "learning_rate": 3.522827687776142e-06, "loss": 0.6201, "step": 1196 }, { "epoch": 0.035266561583901714, "grad_norm": 2.0730710249780793, "learning_rate": 3.525773195876289e-06, "loss": 0.6445, "step": 1197 }, { "epoch": 0.03529602404136529, "grad_norm": 1.870381104905812, "learning_rate": 3.5287187039764363e-06, "loss": 0.6327, "step": 1198 }, { "epoch": 0.03532548649882887, "grad_norm": 1.6533942666925487, "learning_rate": 3.531664212076584e-06, "loss": 0.4706, "step": 1199 }, { "epoch": 0.035354948956292445, "grad_norm": 1.9365022224898494, "learning_rate": 3.534609720176731e-06, "loss": 0.6715, "step": 1200 }, { "epoch": 0.03538441141375602, "grad_norm": 1.8899703500114582, "learning_rate": 3.537555228276878e-06, "loss": 0.69, "step": 1201 }, { "epoch": 0.035413873871219595, "grad_norm": 1.8170619260577097, "learning_rate": 3.5405007363770253e-06, "loss": 0.6071, "step": 1202 }, { "epoch": 0.03544333632868318, "grad_norm": 2.004044726243555, "learning_rate": 3.543446244477173e-06, "loss": 0.5404, "step": 1203 }, { "epoch": 0.03547279878614675, "grad_norm": 1.931779386331756, "learning_rate": 3.54639175257732e-06, "loss": 0.5991, "step": 1204 }, { "epoch": 0.03550226124361033, "grad_norm": 1.8174178209078593, "learning_rate": 3.5493372606774672e-06, "loss": 0.4929, "step": 1205 }, { "epoch": 0.03553172370107391, "grad_norm": 2.006519320191868, "learning_rate": 3.5522827687776144e-06, "loss": 0.524, "step": 1206 }, { "epoch": 0.03556118615853748, "grad_norm": 2.03630310727269, "learning_rate": 3.555228276877762e-06, "loss": 0.6185, "step": 1207 }, { "epoch": 0.03559064861600106, "grad_norm": 1.9104037363568653, "learning_rate": 3.558173784977909e-06, "loss": 0.4682, "step": 1208 }, { "epoch": 0.03562011107346464, "grad_norm": 2.1351366834374947, "learning_rate": 3.5611192930780563e-06, "loss": 0.7641, "step": 1209 }, { "epoch": 0.035649573530928215, "grad_norm": 1.8225077675905765, "learning_rate": 3.5640648011782035e-06, "loss": 0.4942, "step": 1210 }, { "epoch": 0.03567903598839179, "grad_norm": 1.9849685014320435, "learning_rate": 3.567010309278351e-06, "loss": 0.6154, "step": 1211 }, { "epoch": 0.03570849844585537, "grad_norm": 1.7986860928440196, "learning_rate": 3.569955817378498e-06, "loss": 0.5403, "step": 1212 }, { "epoch": 0.03573796090331895, "grad_norm": 2.1140620834020303, "learning_rate": 3.5729013254786454e-06, "loss": 0.7292, "step": 1213 }, { "epoch": 0.03576742336078252, "grad_norm": 2.2792690089801435, "learning_rate": 3.5758468335787925e-06, "loss": 0.7914, "step": 1214 }, { "epoch": 0.0357968858182461, "grad_norm": 2.1049699234517263, "learning_rate": 3.57879234167894e-06, "loss": 0.7781, "step": 1215 }, { "epoch": 0.03582634827570968, "grad_norm": 1.7498106675740646, "learning_rate": 3.5817378497790873e-06, "loss": 0.3741, "step": 1216 }, { "epoch": 0.03585581073317325, "grad_norm": 1.797062528920644, "learning_rate": 3.5846833578792344e-06, "loss": 0.5905, "step": 1217 }, { "epoch": 0.03588527319063683, "grad_norm": 2.228506407528334, "learning_rate": 3.5876288659793816e-06, "loss": 0.7558, "step": 1218 }, { "epoch": 0.03591473564810041, "grad_norm": 1.9296263976087618, "learning_rate": 3.590574374079529e-06, "loss": 0.6245, "step": 1219 }, { "epoch": 0.035944198105563985, "grad_norm": 1.9317730993809548, "learning_rate": 3.5935198821796763e-06, "loss": 0.7337, "step": 1220 }, { "epoch": 0.03597366056302756, "grad_norm": 2.1712722127028568, "learning_rate": 3.5964653902798235e-06, "loss": 0.6313, "step": 1221 }, { "epoch": 0.03600312302049114, "grad_norm": 1.881843863226882, "learning_rate": 3.5994108983799707e-06, "loss": 0.6728, "step": 1222 }, { "epoch": 0.036032585477954716, "grad_norm": 1.8844688430488548, "learning_rate": 3.6023564064801182e-06, "loss": 0.6438, "step": 1223 }, { "epoch": 0.03606204793541829, "grad_norm": 2.0012998740691845, "learning_rate": 3.6053019145802654e-06, "loss": 0.5981, "step": 1224 }, { "epoch": 0.03609151039288187, "grad_norm": 1.6093511879962297, "learning_rate": 3.6082474226804126e-06, "loss": 0.5637, "step": 1225 }, { "epoch": 0.03612097285034545, "grad_norm": 1.7272613736185916, "learning_rate": 3.6111929307805597e-06, "loss": 0.4144, "step": 1226 }, { "epoch": 0.03615043530780902, "grad_norm": 1.8420281121691704, "learning_rate": 3.6141384388807073e-06, "loss": 0.6168, "step": 1227 }, { "epoch": 0.036179897765272605, "grad_norm": 1.952993728017318, "learning_rate": 3.6170839469808545e-06, "loss": 0.7419, "step": 1228 }, { "epoch": 0.03620936022273618, "grad_norm": 1.8840239223007014, "learning_rate": 3.6200294550810016e-06, "loss": 0.6408, "step": 1229 }, { "epoch": 0.036238822680199755, "grad_norm": 1.9902763246814463, "learning_rate": 3.622974963181149e-06, "loss": 0.5607, "step": 1230 }, { "epoch": 0.03626828513766333, "grad_norm": 1.8992041878348154, "learning_rate": 3.6259204712812964e-06, "loss": 0.5384, "step": 1231 }, { "epoch": 0.03629774759512691, "grad_norm": 2.3493643819930976, "learning_rate": 3.6288659793814435e-06, "loss": 0.7161, "step": 1232 }, { "epoch": 0.036327210052590486, "grad_norm": 2.1056843109358603, "learning_rate": 3.6318114874815907e-06, "loss": 0.6918, "step": 1233 }, { "epoch": 0.03635667251005406, "grad_norm": 1.9864238956816354, "learning_rate": 3.634756995581738e-06, "loss": 0.4537, "step": 1234 }, { "epoch": 0.03638613496751764, "grad_norm": 1.8999574579736616, "learning_rate": 3.6377025036818854e-06, "loss": 0.5502, "step": 1235 }, { "epoch": 0.03641559742498122, "grad_norm": 1.9429176624562827, "learning_rate": 3.6406480117820326e-06, "loss": 0.7015, "step": 1236 }, { "epoch": 0.03644505988244479, "grad_norm": 1.7725084023678, "learning_rate": 3.6435935198821798e-06, "loss": 0.6335, "step": 1237 }, { "epoch": 0.036474522339908375, "grad_norm": 2.1470888539865425, "learning_rate": 3.646539027982327e-06, "loss": 0.5623, "step": 1238 }, { "epoch": 0.03650398479737195, "grad_norm": 1.9195637454245826, "learning_rate": 3.6494845360824745e-06, "loss": 0.7222, "step": 1239 }, { "epoch": 0.036533447254835524, "grad_norm": 2.092707940669279, "learning_rate": 3.6524300441826217e-06, "loss": 0.4477, "step": 1240 }, { "epoch": 0.036562909712299106, "grad_norm": 2.11995234416903, "learning_rate": 3.655375552282769e-06, "loss": 0.5537, "step": 1241 }, { "epoch": 0.03659237216976268, "grad_norm": 1.8903693933919485, "learning_rate": 3.658321060382916e-06, "loss": 0.7045, "step": 1242 }, { "epoch": 0.036621834627226256, "grad_norm": 1.8848839027835695, "learning_rate": 3.6612665684830636e-06, "loss": 0.5013, "step": 1243 }, { "epoch": 0.03665129708468983, "grad_norm": 1.9359579233144024, "learning_rate": 3.6642120765832107e-06, "loss": 0.5717, "step": 1244 }, { "epoch": 0.03668075954215341, "grad_norm": 1.818711220359606, "learning_rate": 3.667157584683358e-06, "loss": 0.5801, "step": 1245 }, { "epoch": 0.03671022199961699, "grad_norm": 1.9648289312233365, "learning_rate": 3.670103092783505e-06, "loss": 0.6393, "step": 1246 }, { "epoch": 0.03673968445708056, "grad_norm": 1.7859693595977582, "learning_rate": 3.673048600883653e-06, "loss": 0.5115, "step": 1247 }, { "epoch": 0.036769146914544144, "grad_norm": 2.1200963597746383, "learning_rate": 3.6759941089838002e-06, "loss": 0.7479, "step": 1248 }, { "epoch": 0.03679860937200772, "grad_norm": 1.7755119485534203, "learning_rate": 3.6789396170839474e-06, "loss": 0.5398, "step": 1249 }, { "epoch": 0.036828071829471294, "grad_norm": 1.9633953054429587, "learning_rate": 3.681885125184095e-06, "loss": 0.6003, "step": 1250 }, { "epoch": 0.036857534286934876, "grad_norm": 1.8286064748685567, "learning_rate": 3.684830633284242e-06, "loss": 0.6272, "step": 1251 }, { "epoch": 0.03688699674439845, "grad_norm": 1.9798009096632374, "learning_rate": 3.6877761413843893e-06, "loss": 0.5294, "step": 1252 }, { "epoch": 0.036916459201862026, "grad_norm": 1.773065362955977, "learning_rate": 3.6907216494845365e-06, "loss": 0.537, "step": 1253 }, { "epoch": 0.03694592165932561, "grad_norm": 1.9357763248207245, "learning_rate": 3.693667157584684e-06, "loss": 0.4883, "step": 1254 }, { "epoch": 0.03697538411678918, "grad_norm": 2.2678127639934287, "learning_rate": 3.696612665684831e-06, "loss": 0.6995, "step": 1255 }, { "epoch": 0.03700484657425276, "grad_norm": 1.954205951803009, "learning_rate": 3.6995581737849784e-06, "loss": 0.5092, "step": 1256 }, { "epoch": 0.03703430903171633, "grad_norm": 2.1759559564004367, "learning_rate": 3.7025036818851255e-06, "loss": 0.81, "step": 1257 }, { "epoch": 0.037063771489179914, "grad_norm": 2.020951357714969, "learning_rate": 3.705449189985273e-06, "loss": 0.6076, "step": 1258 }, { "epoch": 0.03709323394664349, "grad_norm": 2.0369206191873386, "learning_rate": 3.7083946980854203e-06, "loss": 0.7082, "step": 1259 }, { "epoch": 0.037122696404107064, "grad_norm": 2.2371782378608676, "learning_rate": 3.7113402061855674e-06, "loss": 0.5637, "step": 1260 }, { "epoch": 0.037152158861570646, "grad_norm": 1.8976523927635296, "learning_rate": 3.7142857142857146e-06, "loss": 0.5515, "step": 1261 }, { "epoch": 0.03718162131903422, "grad_norm": 2.055389885068695, "learning_rate": 3.717231222385862e-06, "loss": 0.7507, "step": 1262 }, { "epoch": 0.037211083776497796, "grad_norm": 1.9408284224944465, "learning_rate": 3.7201767304860093e-06, "loss": 0.6266, "step": 1263 }, { "epoch": 0.03724054623396138, "grad_norm": 1.9200519979988193, "learning_rate": 3.7231222385861565e-06, "loss": 0.6011, "step": 1264 }, { "epoch": 0.03727000869142495, "grad_norm": 2.0937215469676875, "learning_rate": 3.7260677466863037e-06, "loss": 0.5876, "step": 1265 }, { "epoch": 0.03729947114888853, "grad_norm": 1.8125997616214753, "learning_rate": 3.729013254786451e-06, "loss": 0.507, "step": 1266 }, { "epoch": 0.03732893360635211, "grad_norm": 1.8455159564844505, "learning_rate": 3.7319587628865984e-06, "loss": 0.4657, "step": 1267 }, { "epoch": 0.037358396063815684, "grad_norm": 2.2899810698725864, "learning_rate": 3.7349042709867456e-06, "loss": 0.678, "step": 1268 }, { "epoch": 0.03738785852127926, "grad_norm": 1.9674787682013517, "learning_rate": 3.7378497790868927e-06, "loss": 0.4674, "step": 1269 }, { "epoch": 0.037417320978742834, "grad_norm": 2.35421068758803, "learning_rate": 3.74079528718704e-06, "loss": 0.8288, "step": 1270 }, { "epoch": 0.037446783436206416, "grad_norm": 1.7417084486272085, "learning_rate": 3.7437407952871875e-06, "loss": 0.5807, "step": 1271 }, { "epoch": 0.03747624589366999, "grad_norm": 2.0874060393906326, "learning_rate": 3.7466863033873346e-06, "loss": 0.6309, "step": 1272 }, { "epoch": 0.037505708351133565, "grad_norm": 1.732858919561342, "learning_rate": 3.749631811487482e-06, "loss": 0.5155, "step": 1273 }, { "epoch": 0.03753517080859715, "grad_norm": 2.0201843246392572, "learning_rate": 3.752577319587629e-06, "loss": 0.6001, "step": 1274 }, { "epoch": 0.03756463326606072, "grad_norm": 1.6662252972916887, "learning_rate": 3.7555228276877766e-06, "loss": 0.4935, "step": 1275 }, { "epoch": 0.0375940957235243, "grad_norm": 1.8962360059898666, "learning_rate": 3.7584683357879237e-06, "loss": 0.5688, "step": 1276 }, { "epoch": 0.03762355818098788, "grad_norm": 1.9327214196874367, "learning_rate": 3.761413843888071e-06, "loss": 0.5154, "step": 1277 }, { "epoch": 0.037653020638451454, "grad_norm": 1.9521962001630442, "learning_rate": 3.764359351988218e-06, "loss": 0.5687, "step": 1278 }, { "epoch": 0.03768248309591503, "grad_norm": 1.7509785872913344, "learning_rate": 3.7673048600883656e-06, "loss": 0.5145, "step": 1279 }, { "epoch": 0.03771194555337861, "grad_norm": 2.048540283279825, "learning_rate": 3.7702503681885128e-06, "loss": 0.5189, "step": 1280 }, { "epoch": 0.037741408010842185, "grad_norm": 1.914194900853268, "learning_rate": 3.77319587628866e-06, "loss": 0.6497, "step": 1281 }, { "epoch": 0.03777087046830576, "grad_norm": 2.02180411757943, "learning_rate": 3.776141384388807e-06, "loss": 0.5318, "step": 1282 }, { "epoch": 0.037800332925769335, "grad_norm": 1.8848583048565988, "learning_rate": 3.7790868924889547e-06, "loss": 0.7516, "step": 1283 }, { "epoch": 0.03782979538323292, "grad_norm": 2.058895317343124, "learning_rate": 3.782032400589102e-06, "loss": 0.5887, "step": 1284 }, { "epoch": 0.03785925784069649, "grad_norm": 1.9934139055737765, "learning_rate": 3.784977908689249e-06, "loss": 0.5839, "step": 1285 }, { "epoch": 0.03788872029816007, "grad_norm": 1.908031147826969, "learning_rate": 3.787923416789396e-06, "loss": 0.6718, "step": 1286 }, { "epoch": 0.03791818275562365, "grad_norm": 1.8495580622085226, "learning_rate": 3.7908689248895438e-06, "loss": 0.7381, "step": 1287 }, { "epoch": 0.037947645213087224, "grad_norm": 1.7430957514354477, "learning_rate": 3.793814432989691e-06, "loss": 0.5652, "step": 1288 }, { "epoch": 0.0379771076705508, "grad_norm": 2.075495999665508, "learning_rate": 3.796759941089838e-06, "loss": 0.7146, "step": 1289 }, { "epoch": 0.03800657012801438, "grad_norm": 2.209823320607003, "learning_rate": 3.7997054491899852e-06, "loss": 0.5114, "step": 1290 }, { "epoch": 0.038036032585477955, "grad_norm": 1.927852392115973, "learning_rate": 3.802650957290133e-06, "loss": 0.6125, "step": 1291 }, { "epoch": 0.03806549504294153, "grad_norm": 1.8185295264795225, "learning_rate": 3.80559646539028e-06, "loss": 0.6675, "step": 1292 }, { "epoch": 0.03809495750040511, "grad_norm": 1.9555776972370738, "learning_rate": 3.808541973490427e-06, "loss": 0.7108, "step": 1293 }, { "epoch": 0.03812441995786869, "grad_norm": 1.9051363001304848, "learning_rate": 3.8114874815905743e-06, "loss": 0.6367, "step": 1294 }, { "epoch": 0.03815388241533226, "grad_norm": 2.3927088579639606, "learning_rate": 3.814432989690722e-06, "loss": 0.5925, "step": 1295 }, { "epoch": 0.03818334487279584, "grad_norm": 2.0508948273678067, "learning_rate": 3.817378497790869e-06, "loss": 0.5724, "step": 1296 }, { "epoch": 0.03821280733025942, "grad_norm": 1.8133736350693896, "learning_rate": 3.820324005891016e-06, "loss": 0.4794, "step": 1297 }, { "epoch": 0.03824226978772299, "grad_norm": 1.9782067206066007, "learning_rate": 3.823269513991163e-06, "loss": 0.475, "step": 1298 }, { "epoch": 0.03827173224518657, "grad_norm": 1.9648860546428064, "learning_rate": 3.8262150220913105e-06, "loss": 0.7171, "step": 1299 }, { "epoch": 0.03830119470265015, "grad_norm": 1.7985388417968047, "learning_rate": 3.8291605301914585e-06, "loss": 0.6701, "step": 1300 }, { "epoch": 0.038330657160113725, "grad_norm": 1.9668668931139508, "learning_rate": 3.832106038291606e-06, "loss": 0.3511, "step": 1301 }, { "epoch": 0.0383601196175773, "grad_norm": 2.02334832276886, "learning_rate": 3.835051546391753e-06, "loss": 0.5658, "step": 1302 }, { "epoch": 0.03838958207504088, "grad_norm": 1.845420410021972, "learning_rate": 3.8379970544919e-06, "loss": 0.5307, "step": 1303 }, { "epoch": 0.03841904453250446, "grad_norm": 2.0632018638659644, "learning_rate": 3.840942562592048e-06, "loss": 0.6567, "step": 1304 }, { "epoch": 0.03844850698996803, "grad_norm": 1.904688677667026, "learning_rate": 3.843888070692195e-06, "loss": 0.7016, "step": 1305 }, { "epoch": 0.03847796944743161, "grad_norm": 1.9651513978526183, "learning_rate": 3.846833578792342e-06, "loss": 0.5719, "step": 1306 }, { "epoch": 0.03850743190489519, "grad_norm": 1.9034898771414717, "learning_rate": 3.8497790868924895e-06, "loss": 0.7409, "step": 1307 }, { "epoch": 0.03853689436235876, "grad_norm": 1.8781365190480461, "learning_rate": 3.852724594992637e-06, "loss": 0.7547, "step": 1308 }, { "epoch": 0.03856635681982234, "grad_norm": 1.9627527742072588, "learning_rate": 3.855670103092784e-06, "loss": 0.5421, "step": 1309 }, { "epoch": 0.03859581927728592, "grad_norm": 1.9883455862451302, "learning_rate": 3.858615611192931e-06, "loss": 0.7284, "step": 1310 }, { "epoch": 0.038625281734749495, "grad_norm": 1.821990210967069, "learning_rate": 3.861561119293078e-06, "loss": 0.6808, "step": 1311 }, { "epoch": 0.03865474419221307, "grad_norm": 1.8792202244205614, "learning_rate": 3.864506627393226e-06, "loss": 0.5635, "step": 1312 }, { "epoch": 0.03868420664967665, "grad_norm": 2.38690258805271, "learning_rate": 3.867452135493373e-06, "loss": 0.7185, "step": 1313 }, { "epoch": 0.038713669107140226, "grad_norm": 2.1561249314378403, "learning_rate": 3.8703976435935205e-06, "loss": 0.6368, "step": 1314 }, { "epoch": 0.0387431315646038, "grad_norm": 1.823038148562072, "learning_rate": 3.873343151693668e-06, "loss": 0.4728, "step": 1315 }, { "epoch": 0.03877259402206738, "grad_norm": 1.9695872685295046, "learning_rate": 3.876288659793815e-06, "loss": 0.6136, "step": 1316 }, { "epoch": 0.03880205647953096, "grad_norm": 1.9350442722679517, "learning_rate": 3.879234167893962e-06, "loss": 0.486, "step": 1317 }, { "epoch": 0.03883151893699453, "grad_norm": 1.8012154689035254, "learning_rate": 3.882179675994109e-06, "loss": 0.589, "step": 1318 }, { "epoch": 0.038860981394458115, "grad_norm": 2.278020734620222, "learning_rate": 3.885125184094256e-06, "loss": 0.6493, "step": 1319 }, { "epoch": 0.03889044385192169, "grad_norm": 1.8319147552957498, "learning_rate": 3.888070692194404e-06, "loss": 0.5893, "step": 1320 }, { "epoch": 0.038919906309385265, "grad_norm": 2.048979718346911, "learning_rate": 3.8910162002945515e-06, "loss": 0.7002, "step": 1321 }, { "epoch": 0.03894936876684884, "grad_norm": 2.0710571340009465, "learning_rate": 3.893961708394699e-06, "loss": 0.6483, "step": 1322 }, { "epoch": 0.03897883122431242, "grad_norm": 1.9322703050590833, "learning_rate": 3.896907216494846e-06, "loss": 0.5568, "step": 1323 }, { "epoch": 0.039008293681775996, "grad_norm": 1.752930941501399, "learning_rate": 3.899852724594993e-06, "loss": 0.6127, "step": 1324 }, { "epoch": 0.03903775613923957, "grad_norm": 1.8865434052844388, "learning_rate": 3.90279823269514e-06, "loss": 0.6249, "step": 1325 }, { "epoch": 0.03906721859670315, "grad_norm": 1.8402596765958115, "learning_rate": 3.905743740795287e-06, "loss": 0.5468, "step": 1326 }, { "epoch": 0.03909668105416673, "grad_norm": 1.8346065301779713, "learning_rate": 3.9086892488954344e-06, "loss": 0.6322, "step": 1327 }, { "epoch": 0.0391261435116303, "grad_norm": 1.9309589592977148, "learning_rate": 3.9116347569955824e-06, "loss": 0.4203, "step": 1328 }, { "epoch": 0.039155605969093885, "grad_norm": 2.0691351258048423, "learning_rate": 3.91458026509573e-06, "loss": 0.483, "step": 1329 }, { "epoch": 0.03918506842655746, "grad_norm": 1.7182718946545643, "learning_rate": 3.917525773195877e-06, "loss": 0.4803, "step": 1330 }, { "epoch": 0.039214530884021034, "grad_norm": 1.8448208785055515, "learning_rate": 3.920471281296024e-06, "loss": 0.497, "step": 1331 }, { "epoch": 0.039243993341484616, "grad_norm": 1.7375038055721421, "learning_rate": 3.923416789396171e-06, "loss": 0.5186, "step": 1332 }, { "epoch": 0.03927345579894819, "grad_norm": 2.066897052941075, "learning_rate": 3.926362297496318e-06, "loss": 0.7183, "step": 1333 }, { "epoch": 0.039302918256411766, "grad_norm": 2.005053695384042, "learning_rate": 3.929307805596465e-06, "loss": 0.6749, "step": 1334 }, { "epoch": 0.03933238071387534, "grad_norm": 2.040813000655777, "learning_rate": 3.9322533136966126e-06, "loss": 0.5083, "step": 1335 }, { "epoch": 0.03936184317133892, "grad_norm": 1.9823202864355887, "learning_rate": 3.935198821796761e-06, "loss": 0.5816, "step": 1336 }, { "epoch": 0.0393913056288025, "grad_norm": 1.8587977024813653, "learning_rate": 3.938144329896908e-06, "loss": 0.6007, "step": 1337 }, { "epoch": 0.03942076808626607, "grad_norm": 1.957435750898079, "learning_rate": 3.941089837997055e-06, "loss": 0.5532, "step": 1338 }, { "epoch": 0.039450230543729654, "grad_norm": 1.9044218860683244, "learning_rate": 3.944035346097202e-06, "loss": 0.6541, "step": 1339 }, { "epoch": 0.03947969300119323, "grad_norm": 2.1852940109444714, "learning_rate": 3.946980854197349e-06, "loss": 0.6497, "step": 1340 }, { "epoch": 0.039509155458656804, "grad_norm": 1.809336082463143, "learning_rate": 3.949926362297496e-06, "loss": 0.5463, "step": 1341 }, { "epoch": 0.039538617916120386, "grad_norm": 1.8310044444910865, "learning_rate": 3.9528718703976435e-06, "loss": 0.5819, "step": 1342 }, { "epoch": 0.03956808037358396, "grad_norm": 2.1514761921060295, "learning_rate": 3.955817378497791e-06, "loss": 0.6426, "step": 1343 }, { "epoch": 0.039597542831047536, "grad_norm": 2.2034126365353823, "learning_rate": 3.958762886597938e-06, "loss": 0.6228, "step": 1344 }, { "epoch": 0.03962700528851112, "grad_norm": 1.7759143743420838, "learning_rate": 3.961708394698086e-06, "loss": 0.6456, "step": 1345 }, { "epoch": 0.03965646774597469, "grad_norm": 1.8089474886453643, "learning_rate": 3.964653902798233e-06, "loss": 0.629, "step": 1346 }, { "epoch": 0.03968593020343827, "grad_norm": 1.8068262840610165, "learning_rate": 3.96759941089838e-06, "loss": 0.6367, "step": 1347 }, { "epoch": 0.03971539266090185, "grad_norm": 2.138882198644322, "learning_rate": 3.970544918998527e-06, "loss": 0.603, "step": 1348 }, { "epoch": 0.039744855118365424, "grad_norm": 1.9811973689803077, "learning_rate": 3.9734904270986745e-06, "loss": 0.7017, "step": 1349 }, { "epoch": 0.039774317575829, "grad_norm": 1.9562350011307348, "learning_rate": 3.976435935198822e-06, "loss": 0.6485, "step": 1350 }, { "epoch": 0.039803780033292574, "grad_norm": 2.1595436244583883, "learning_rate": 3.979381443298969e-06, "loss": 0.5122, "step": 1351 }, { "epoch": 0.039833242490756156, "grad_norm": 1.7684386739099418, "learning_rate": 3.982326951399116e-06, "loss": 0.6852, "step": 1352 }, { "epoch": 0.03986270494821973, "grad_norm": 1.7929954941755335, "learning_rate": 3.985272459499264e-06, "loss": 0.5639, "step": 1353 }, { "epoch": 0.039892167405683306, "grad_norm": 1.8127543457310957, "learning_rate": 3.988217967599411e-06, "loss": 0.5242, "step": 1354 }, { "epoch": 0.03992162986314689, "grad_norm": 1.9282018206376401, "learning_rate": 3.991163475699558e-06, "loss": 0.5763, "step": 1355 }, { "epoch": 0.03995109232061046, "grad_norm": 1.9395510892490209, "learning_rate": 3.994108983799706e-06, "loss": 0.6336, "step": 1356 }, { "epoch": 0.03998055477807404, "grad_norm": 1.9454068728750198, "learning_rate": 3.9970544918998535e-06, "loss": 0.642, "step": 1357 }, { "epoch": 0.04001001723553762, "grad_norm": 1.6993934172044816, "learning_rate": 4.000000000000001e-06, "loss": 0.6515, "step": 1358 }, { "epoch": 0.040039479693001194, "grad_norm": 1.8799505931502427, "learning_rate": 4.002945508100148e-06, "loss": 0.6293, "step": 1359 }, { "epoch": 0.04006894215046477, "grad_norm": 2.150487010497291, "learning_rate": 4.005891016200295e-06, "loss": 0.5911, "step": 1360 }, { "epoch": 0.04009840460792835, "grad_norm": 1.8247957005379976, "learning_rate": 4.008836524300442e-06, "loss": 0.6303, "step": 1361 }, { "epoch": 0.040127867065391926, "grad_norm": 1.9515529358438108, "learning_rate": 4.011782032400589e-06, "loss": 0.6781, "step": 1362 }, { "epoch": 0.0401573295228555, "grad_norm": 2.4868137594881077, "learning_rate": 4.0147275405007365e-06, "loss": 0.5671, "step": 1363 }, { "epoch": 0.040186791980319075, "grad_norm": 1.8832719048795732, "learning_rate": 4.0176730486008845e-06, "loss": 0.5602, "step": 1364 }, { "epoch": 0.04021625443778266, "grad_norm": 1.9181353317249552, "learning_rate": 4.020618556701032e-06, "loss": 0.5304, "step": 1365 }, { "epoch": 0.04024571689524623, "grad_norm": 1.9878647865753865, "learning_rate": 4.023564064801179e-06, "loss": 0.7312, "step": 1366 }, { "epoch": 0.04027517935270981, "grad_norm": 1.9149403284425612, "learning_rate": 4.026509572901326e-06, "loss": 0.6295, "step": 1367 }, { "epoch": 0.04030464181017339, "grad_norm": 1.9853175693056375, "learning_rate": 4.029455081001473e-06, "loss": 0.5995, "step": 1368 }, { "epoch": 0.040334104267636964, "grad_norm": 1.9087331377940449, "learning_rate": 4.03240058910162e-06, "loss": 0.6826, "step": 1369 }, { "epoch": 0.04036356672510054, "grad_norm": 1.8038712840597273, "learning_rate": 4.0353460972017674e-06, "loss": 0.5875, "step": 1370 }, { "epoch": 0.04039302918256412, "grad_norm": 1.8317067619386598, "learning_rate": 4.038291605301915e-06, "loss": 0.6894, "step": 1371 }, { "epoch": 0.040422491640027695, "grad_norm": 2.479135774588999, "learning_rate": 4.041237113402063e-06, "loss": 0.6729, "step": 1372 }, { "epoch": 0.04045195409749127, "grad_norm": 1.6951016647357937, "learning_rate": 4.04418262150221e-06, "loss": 0.556, "step": 1373 }, { "epoch": 0.04048141655495485, "grad_norm": 1.978258980263628, "learning_rate": 4.047128129602357e-06, "loss": 0.5232, "step": 1374 }, { "epoch": 0.04051087901241843, "grad_norm": 1.8224462477925902, "learning_rate": 4.050073637702504e-06, "loss": 0.5934, "step": 1375 }, { "epoch": 0.040540341469882, "grad_norm": 1.9858972007425106, "learning_rate": 4.053019145802651e-06, "loss": 0.6882, "step": 1376 }, { "epoch": 0.04056980392734558, "grad_norm": 1.7904240537890002, "learning_rate": 4.055964653902798e-06, "loss": 0.4064, "step": 1377 }, { "epoch": 0.04059926638480916, "grad_norm": 1.8615669357065072, "learning_rate": 4.058910162002946e-06, "loss": 0.3772, "step": 1378 }, { "epoch": 0.040628728842272734, "grad_norm": 1.9655033908026507, "learning_rate": 4.061855670103093e-06, "loss": 0.5275, "step": 1379 }, { "epoch": 0.04065819129973631, "grad_norm": 2.186444927322707, "learning_rate": 4.064801178203241e-06, "loss": 0.5795, "step": 1380 }, { "epoch": 0.04068765375719989, "grad_norm": 1.9072679378211246, "learning_rate": 4.067746686303388e-06, "loss": 0.5259, "step": 1381 }, { "epoch": 0.040717116214663465, "grad_norm": 1.8920520650468398, "learning_rate": 4.070692194403535e-06, "loss": 0.6303, "step": 1382 }, { "epoch": 0.04074657867212704, "grad_norm": 1.6809474867975898, "learning_rate": 4.073637702503682e-06, "loss": 0.539, "step": 1383 }, { "epoch": 0.04077604112959062, "grad_norm": 1.9088034788992685, "learning_rate": 4.076583210603829e-06, "loss": 0.5795, "step": 1384 }, { "epoch": 0.0408055035870542, "grad_norm": 2.181419221056357, "learning_rate": 4.0795287187039766e-06, "loss": 0.5564, "step": 1385 }, { "epoch": 0.04083496604451777, "grad_norm": 1.9292133508466214, "learning_rate": 4.082474226804124e-06, "loss": 0.44, "step": 1386 }, { "epoch": 0.040864428501981354, "grad_norm": 1.9739393371597742, "learning_rate": 4.085419734904271e-06, "loss": 0.5904, "step": 1387 }, { "epoch": 0.04089389095944493, "grad_norm": 2.1603476923821874, "learning_rate": 4.088365243004419e-06, "loss": 0.5865, "step": 1388 }, { "epoch": 0.0409233534169085, "grad_norm": 2.229356610484844, "learning_rate": 4.091310751104566e-06, "loss": 0.599, "step": 1389 }, { "epoch": 0.04095281587437208, "grad_norm": 2.0258522675498014, "learning_rate": 4.094256259204713e-06, "loss": 0.5987, "step": 1390 }, { "epoch": 0.04098227833183566, "grad_norm": 1.856801464413149, "learning_rate": 4.09720176730486e-06, "loss": 0.7158, "step": 1391 }, { "epoch": 0.041011740789299235, "grad_norm": 1.752508825166553, "learning_rate": 4.1001472754050075e-06, "loss": 0.4231, "step": 1392 }, { "epoch": 0.04104120324676281, "grad_norm": 1.7748491829406556, "learning_rate": 4.103092783505155e-06, "loss": 0.5552, "step": 1393 }, { "epoch": 0.04107066570422639, "grad_norm": 1.8380756194528367, "learning_rate": 4.106038291605302e-06, "loss": 0.5497, "step": 1394 }, { "epoch": 0.04110012816168997, "grad_norm": 1.7091724521929903, "learning_rate": 4.108983799705449e-06, "loss": 0.5637, "step": 1395 }, { "epoch": 0.04112959061915354, "grad_norm": 2.0792058401574054, "learning_rate": 4.111929307805597e-06, "loss": 0.6432, "step": 1396 }, { "epoch": 0.04115905307661712, "grad_norm": 1.901878487636543, "learning_rate": 4.114874815905744e-06, "loss": 0.5958, "step": 1397 }, { "epoch": 0.0411885155340807, "grad_norm": 2.162025889012069, "learning_rate": 4.117820324005891e-06, "loss": 0.6091, "step": 1398 }, { "epoch": 0.04121797799154427, "grad_norm": 1.767293261900025, "learning_rate": 4.1207658321060385e-06, "loss": 0.5114, "step": 1399 }, { "epoch": 0.041247440449007855, "grad_norm": 1.707469971235916, "learning_rate": 4.123711340206186e-06, "loss": 0.6108, "step": 1400 }, { "epoch": 0.04127690290647143, "grad_norm": 1.749830241580734, "learning_rate": 4.126656848306333e-06, "loss": 0.4755, "step": 1401 }, { "epoch": 0.041306365363935005, "grad_norm": 1.8808271950116386, "learning_rate": 4.12960235640648e-06, "loss": 0.6529, "step": 1402 }, { "epoch": 0.04133582782139858, "grad_norm": 1.9710824016161574, "learning_rate": 4.132547864506627e-06, "loss": 0.707, "step": 1403 }, { "epoch": 0.04136529027886216, "grad_norm": 2.014085175938829, "learning_rate": 4.135493372606775e-06, "loss": 0.5257, "step": 1404 }, { "epoch": 0.041394752736325736, "grad_norm": 2.069429499392017, "learning_rate": 4.138438880706922e-06, "loss": 0.5918, "step": 1405 }, { "epoch": 0.04142421519378931, "grad_norm": 1.8814008145125989, "learning_rate": 4.1413843888070695e-06, "loss": 0.4988, "step": 1406 }, { "epoch": 0.04145367765125289, "grad_norm": 1.832853222373136, "learning_rate": 4.1443298969072175e-06, "loss": 0.5423, "step": 1407 }, { "epoch": 0.04148314010871647, "grad_norm": 2.01403385685267, "learning_rate": 4.147275405007365e-06, "loss": 0.6697, "step": 1408 }, { "epoch": 0.04151260256618004, "grad_norm": 1.8619123776693474, "learning_rate": 4.150220913107512e-06, "loss": 0.6008, "step": 1409 }, { "epoch": 0.041542065023643625, "grad_norm": 2.180087755589521, "learning_rate": 4.153166421207659e-06, "loss": 0.7739, "step": 1410 }, { "epoch": 0.0415715274811072, "grad_norm": 1.8164909745402729, "learning_rate": 4.156111929307806e-06, "loss": 0.6073, "step": 1411 }, { "epoch": 0.041600989938570775, "grad_norm": 1.9353932446540647, "learning_rate": 4.159057437407953e-06, "loss": 0.6919, "step": 1412 }, { "epoch": 0.041630452396034356, "grad_norm": 1.8972224500916968, "learning_rate": 4.1620029455081005e-06, "loss": 0.5527, "step": 1413 }, { "epoch": 0.04165991485349793, "grad_norm": 1.889631838873182, "learning_rate": 4.164948453608248e-06, "loss": 0.6326, "step": 1414 }, { "epoch": 0.041689377310961506, "grad_norm": 1.9476547200130365, "learning_rate": 4.167893961708396e-06, "loss": 0.6002, "step": 1415 }, { "epoch": 0.04171883976842508, "grad_norm": 1.9949135639900486, "learning_rate": 4.170839469808543e-06, "loss": 0.6717, "step": 1416 }, { "epoch": 0.04174830222588866, "grad_norm": 1.8706285217230758, "learning_rate": 4.17378497790869e-06, "loss": 0.5709, "step": 1417 }, { "epoch": 0.04177776468335224, "grad_norm": 1.8086149536976448, "learning_rate": 4.176730486008837e-06, "loss": 0.5364, "step": 1418 }, { "epoch": 0.04180722714081581, "grad_norm": 2.2050574196304016, "learning_rate": 4.179675994108984e-06, "loss": 0.7164, "step": 1419 }, { "epoch": 0.041836689598279395, "grad_norm": 1.8152698796617495, "learning_rate": 4.1826215022091314e-06, "loss": 0.6153, "step": 1420 }, { "epoch": 0.04186615205574297, "grad_norm": 1.9782390769073253, "learning_rate": 4.185567010309279e-06, "loss": 0.5115, "step": 1421 }, { "epoch": 0.041895614513206544, "grad_norm": 2.0910624271731457, "learning_rate": 4.188512518409426e-06, "loss": 0.7089, "step": 1422 }, { "epoch": 0.041925076970670126, "grad_norm": 1.8189151490890516, "learning_rate": 4.191458026509574e-06, "loss": 0.5903, "step": 1423 }, { "epoch": 0.0419545394281337, "grad_norm": 2.0324893966271307, "learning_rate": 4.194403534609721e-06, "loss": 0.5793, "step": 1424 }, { "epoch": 0.041984001885597276, "grad_norm": 1.7549473988008997, "learning_rate": 4.197349042709868e-06, "loss": 0.4361, "step": 1425 }, { "epoch": 0.04201346434306086, "grad_norm": 1.848472708301348, "learning_rate": 4.200294550810015e-06, "loss": 0.5173, "step": 1426 }, { "epoch": 0.04204292680052443, "grad_norm": 1.8344225323999896, "learning_rate": 4.203240058910162e-06, "loss": 0.5668, "step": 1427 }, { "epoch": 0.04207238925798801, "grad_norm": 2.2308526823939387, "learning_rate": 4.2061855670103096e-06, "loss": 0.4372, "step": 1428 }, { "epoch": 0.04210185171545158, "grad_norm": 1.6771868237714787, "learning_rate": 4.209131075110457e-06, "loss": 0.4489, "step": 1429 }, { "epoch": 0.042131314172915164, "grad_norm": 1.90026237169698, "learning_rate": 4.212076583210604e-06, "loss": 0.6105, "step": 1430 }, { "epoch": 0.04216077663037874, "grad_norm": 2.062128735012699, "learning_rate": 4.215022091310751e-06, "loss": 0.7149, "step": 1431 }, { "epoch": 0.042190239087842314, "grad_norm": 2.302658219888479, "learning_rate": 4.217967599410899e-06, "loss": 0.5388, "step": 1432 }, { "epoch": 0.042219701545305896, "grad_norm": 1.7368399620979078, "learning_rate": 4.220913107511046e-06, "loss": 0.535, "step": 1433 }, { "epoch": 0.04224916400276947, "grad_norm": 1.997217951487554, "learning_rate": 4.223858615611193e-06, "loss": 0.6326, "step": 1434 }, { "epoch": 0.042278626460233046, "grad_norm": 2.0247779131071955, "learning_rate": 4.2268041237113405e-06, "loss": 0.6379, "step": 1435 }, { "epoch": 0.04230808891769663, "grad_norm": 2.1001811453221992, "learning_rate": 4.229749631811488e-06, "loss": 0.743, "step": 1436 }, { "epoch": 0.0423375513751602, "grad_norm": 2.1795417611350536, "learning_rate": 4.232695139911635e-06, "loss": 0.4764, "step": 1437 }, { "epoch": 0.04236701383262378, "grad_norm": 2.03017241505483, "learning_rate": 4.235640648011782e-06, "loss": 0.8169, "step": 1438 }, { "epoch": 0.04239647629008736, "grad_norm": 1.7069823712874288, "learning_rate": 4.238586156111929e-06, "loss": 0.5474, "step": 1439 }, { "epoch": 0.042425938747550934, "grad_norm": 2.0874107392894428, "learning_rate": 4.241531664212077e-06, "loss": 0.6265, "step": 1440 }, { "epoch": 0.04245540120501451, "grad_norm": 1.8763248779917374, "learning_rate": 4.244477172312224e-06, "loss": 0.5957, "step": 1441 }, { "epoch": 0.042484863662478084, "grad_norm": 1.8155630826386293, "learning_rate": 4.2474226804123715e-06, "loss": 0.7055, "step": 1442 }, { "epoch": 0.042514326119941666, "grad_norm": 1.9079424948403827, "learning_rate": 4.250368188512519e-06, "loss": 0.6066, "step": 1443 }, { "epoch": 0.04254378857740524, "grad_norm": 1.9331582577263784, "learning_rate": 4.253313696612666e-06, "loss": 0.5827, "step": 1444 }, { "epoch": 0.042573251034868816, "grad_norm": 1.6840470691373362, "learning_rate": 4.256259204712813e-06, "loss": 0.4872, "step": 1445 }, { "epoch": 0.0426027134923324, "grad_norm": 1.7358788072034013, "learning_rate": 4.25920471281296e-06, "loss": 0.4634, "step": 1446 }, { "epoch": 0.04263217594979597, "grad_norm": 1.9051718047763933, "learning_rate": 4.262150220913107e-06, "loss": 0.5261, "step": 1447 }, { "epoch": 0.04266163840725955, "grad_norm": 1.7759961772200088, "learning_rate": 4.265095729013255e-06, "loss": 0.5575, "step": 1448 }, { "epoch": 0.04269110086472313, "grad_norm": 1.7067915121917303, "learning_rate": 4.2680412371134025e-06, "loss": 0.5616, "step": 1449 }, { "epoch": 0.042720563322186704, "grad_norm": 2.1774134463294326, "learning_rate": 4.27098674521355e-06, "loss": 0.4924, "step": 1450 }, { "epoch": 0.04275002577965028, "grad_norm": 1.8985436147282182, "learning_rate": 4.273932253313697e-06, "loss": 0.6474, "step": 1451 }, { "epoch": 0.04277948823711386, "grad_norm": 1.9178992100834151, "learning_rate": 4.276877761413844e-06, "loss": 0.5741, "step": 1452 }, { "epoch": 0.042808950694577436, "grad_norm": 1.7327437502256773, "learning_rate": 4.279823269513991e-06, "loss": 0.5872, "step": 1453 }, { "epoch": 0.04283841315204101, "grad_norm": 1.7489277588940095, "learning_rate": 4.282768777614138e-06, "loss": 0.515, "step": 1454 }, { "epoch": 0.042867875609504585, "grad_norm": 1.821824411073116, "learning_rate": 4.2857142857142855e-06, "loss": 0.4649, "step": 1455 }, { "epoch": 0.04289733806696817, "grad_norm": 1.7700705052296748, "learning_rate": 4.2886597938144335e-06, "loss": 0.5761, "step": 1456 }, { "epoch": 0.04292680052443174, "grad_norm": 2.01283659554413, "learning_rate": 4.291605301914581e-06, "loss": 0.5356, "step": 1457 }, { "epoch": 0.04295626298189532, "grad_norm": 2.0117972684242, "learning_rate": 4.294550810014728e-06, "loss": 0.6727, "step": 1458 }, { "epoch": 0.0429857254393589, "grad_norm": 2.1512358772543285, "learning_rate": 4.297496318114876e-06, "loss": 0.5491, "step": 1459 }, { "epoch": 0.043015187896822474, "grad_norm": 1.8697607494194461, "learning_rate": 4.300441826215023e-06, "loss": 0.6295, "step": 1460 }, { "epoch": 0.04304465035428605, "grad_norm": 2.2283047019233293, "learning_rate": 4.30338733431517e-06, "loss": 0.5485, "step": 1461 }, { "epoch": 0.04307411281174963, "grad_norm": 1.7224150246725636, "learning_rate": 4.306332842415317e-06, "loss": 0.596, "step": 1462 }, { "epoch": 0.043103575269213205, "grad_norm": 1.658073612571444, "learning_rate": 4.3092783505154644e-06, "loss": 0.5077, "step": 1463 }, { "epoch": 0.04313303772667678, "grad_norm": 1.7852766445736463, "learning_rate": 4.312223858615612e-06, "loss": 0.6379, "step": 1464 }, { "epoch": 0.04316250018414036, "grad_norm": 1.711196677817341, "learning_rate": 4.315169366715759e-06, "loss": 0.5605, "step": 1465 }, { "epoch": 0.04319196264160394, "grad_norm": 1.863190787117895, "learning_rate": 4.318114874815906e-06, "loss": 0.5144, "step": 1466 }, { "epoch": 0.04322142509906751, "grad_norm": 1.8681737354797943, "learning_rate": 4.321060382916054e-06, "loss": 0.6526, "step": 1467 }, { "epoch": 0.04325088755653109, "grad_norm": 2.0143019809487286, "learning_rate": 4.324005891016201e-06, "loss": 0.5274, "step": 1468 }, { "epoch": 0.04328035001399467, "grad_norm": 1.7514482049819562, "learning_rate": 4.326951399116348e-06, "loss": 0.4441, "step": 1469 }, { "epoch": 0.043309812471458244, "grad_norm": 2.0587337105050247, "learning_rate": 4.329896907216495e-06, "loss": 0.5702, "step": 1470 }, { "epoch": 0.04333927492892182, "grad_norm": 2.1717020964182954, "learning_rate": 4.332842415316643e-06, "loss": 0.4589, "step": 1471 }, { "epoch": 0.0433687373863854, "grad_norm": 1.8892401039073887, "learning_rate": 4.33578792341679e-06, "loss": 0.5232, "step": 1472 }, { "epoch": 0.043398199843848975, "grad_norm": 1.8492038843456575, "learning_rate": 4.338733431516937e-06, "loss": 0.5878, "step": 1473 }, { "epoch": 0.04342766230131255, "grad_norm": 1.819843145801629, "learning_rate": 4.341678939617084e-06, "loss": 0.4551, "step": 1474 }, { "epoch": 0.04345712475877613, "grad_norm": 1.9053404319568206, "learning_rate": 4.344624447717232e-06, "loss": 0.504, "step": 1475 }, { "epoch": 0.04348658721623971, "grad_norm": 1.82213918371011, "learning_rate": 4.347569955817379e-06, "loss": 0.5345, "step": 1476 }, { "epoch": 0.04351604967370328, "grad_norm": 1.8435950948266264, "learning_rate": 4.350515463917526e-06, "loss": 0.6327, "step": 1477 }, { "epoch": 0.043545512131166864, "grad_norm": 1.7831670641565536, "learning_rate": 4.3534609720176736e-06, "loss": 0.4476, "step": 1478 }, { "epoch": 0.04357497458863044, "grad_norm": 1.7075494310829749, "learning_rate": 4.356406480117821e-06, "loss": 0.6869, "step": 1479 }, { "epoch": 0.04360443704609401, "grad_norm": 1.9602404763234513, "learning_rate": 4.359351988217968e-06, "loss": 0.5019, "step": 1480 }, { "epoch": 0.04363389950355759, "grad_norm": 1.8421971859580863, "learning_rate": 4.362297496318115e-06, "loss": 0.6841, "step": 1481 }, { "epoch": 0.04366336196102117, "grad_norm": 1.8278747909411202, "learning_rate": 4.365243004418262e-06, "loss": 0.5248, "step": 1482 }, { "epoch": 0.043692824418484745, "grad_norm": 1.838994859859892, "learning_rate": 4.36818851251841e-06, "loss": 0.6345, "step": 1483 }, { "epoch": 0.04372228687594832, "grad_norm": 1.7908804552535018, "learning_rate": 4.371134020618557e-06, "loss": 0.5833, "step": 1484 }, { "epoch": 0.0437517493334119, "grad_norm": 2.1936420792292144, "learning_rate": 4.3740795287187045e-06, "loss": 0.7423, "step": 1485 }, { "epoch": 0.04378121179087548, "grad_norm": 1.8849062617089405, "learning_rate": 4.377025036818852e-06, "loss": 0.5662, "step": 1486 }, { "epoch": 0.04381067424833905, "grad_norm": 2.174458436240626, "learning_rate": 4.379970544918999e-06, "loss": 0.664, "step": 1487 }, { "epoch": 0.04384013670580263, "grad_norm": 2.0278366519046194, "learning_rate": 4.382916053019146e-06, "loss": 0.7034, "step": 1488 }, { "epoch": 0.04386959916326621, "grad_norm": 1.8879652730157885, "learning_rate": 4.385861561119293e-06, "loss": 0.5575, "step": 1489 }, { "epoch": 0.04389906162072978, "grad_norm": 1.8855281606758625, "learning_rate": 4.38880706921944e-06, "loss": 0.6253, "step": 1490 }, { "epoch": 0.043928524078193365, "grad_norm": 1.9882782426177672, "learning_rate": 4.391752577319588e-06, "loss": 0.5735, "step": 1491 }, { "epoch": 0.04395798653565694, "grad_norm": 2.2515091636607094, "learning_rate": 4.3946980854197355e-06, "loss": 0.6712, "step": 1492 }, { "epoch": 0.043987448993120515, "grad_norm": 1.9225038041076745, "learning_rate": 4.397643593519883e-06, "loss": 0.5152, "step": 1493 }, { "epoch": 0.0440169114505841, "grad_norm": 1.912078619604644, "learning_rate": 4.40058910162003e-06, "loss": 0.5474, "step": 1494 }, { "epoch": 0.04404637390804767, "grad_norm": 1.976742283820687, "learning_rate": 4.403534609720177e-06, "loss": 0.534, "step": 1495 }, { "epoch": 0.044075836365511246, "grad_norm": 2.0056855504921316, "learning_rate": 4.406480117820324e-06, "loss": 0.7816, "step": 1496 }, { "epoch": 0.04410529882297482, "grad_norm": 1.841212057487871, "learning_rate": 4.409425625920471e-06, "loss": 0.5332, "step": 1497 }, { "epoch": 0.0441347612804384, "grad_norm": 1.982444645367608, "learning_rate": 4.4123711340206185e-06, "loss": 0.7153, "step": 1498 }, { "epoch": 0.04416422373790198, "grad_norm": 1.8948826796525402, "learning_rate": 4.415316642120766e-06, "loss": 0.5928, "step": 1499 }, { "epoch": 0.04419368619536555, "grad_norm": 1.9896104252404336, "learning_rate": 4.418262150220914e-06, "loss": 0.4507, "step": 1500 }, { "epoch": 0.044223148652829135, "grad_norm": 2.0449804040192374, "learning_rate": 4.421207658321061e-06, "loss": 0.6709, "step": 1501 }, { "epoch": 0.04425261111029271, "grad_norm": 1.7756095280431552, "learning_rate": 4.424153166421208e-06, "loss": 0.6106, "step": 1502 }, { "epoch": 0.044282073567756285, "grad_norm": 1.6157949892126466, "learning_rate": 4.427098674521355e-06, "loss": 0.4718, "step": 1503 }, { "epoch": 0.044311536025219866, "grad_norm": 2.0140310471731997, "learning_rate": 4.430044182621502e-06, "loss": 0.5828, "step": 1504 }, { "epoch": 0.04434099848268344, "grad_norm": 1.990260762683164, "learning_rate": 4.4329896907216494e-06, "loss": 0.5256, "step": 1505 }, { "epoch": 0.044370460940147016, "grad_norm": 2.0547973332525773, "learning_rate": 4.435935198821797e-06, "loss": 0.5443, "step": 1506 }, { "epoch": 0.0443999233976106, "grad_norm": 2.049202069685967, "learning_rate": 4.438880706921944e-06, "loss": 0.6288, "step": 1507 }, { "epoch": 0.04442938585507417, "grad_norm": 1.988437979177908, "learning_rate": 4.441826215022092e-06, "loss": 0.646, "step": 1508 }, { "epoch": 0.04445884831253775, "grad_norm": 1.8891074976966675, "learning_rate": 4.444771723122239e-06, "loss": 0.6787, "step": 1509 }, { "epoch": 0.04448831077000132, "grad_norm": 2.0569222724942415, "learning_rate": 4.447717231222386e-06, "loss": 0.6523, "step": 1510 }, { "epoch": 0.044517773227464905, "grad_norm": 1.7748484262501076, "learning_rate": 4.450662739322533e-06, "loss": 0.5538, "step": 1511 }, { "epoch": 0.04454723568492848, "grad_norm": 1.8254925410271428, "learning_rate": 4.453608247422681e-06, "loss": 0.5769, "step": 1512 }, { "epoch": 0.044576698142392054, "grad_norm": 1.8532450178800184, "learning_rate": 4.4565537555228284e-06, "loss": 0.4765, "step": 1513 }, { "epoch": 0.044606160599855636, "grad_norm": 2.024049978689662, "learning_rate": 4.459499263622976e-06, "loss": 0.7486, "step": 1514 }, { "epoch": 0.04463562305731921, "grad_norm": 1.9705545728029241, "learning_rate": 4.462444771723123e-06, "loss": 0.6857, "step": 1515 }, { "epoch": 0.044665085514782786, "grad_norm": 1.8622317057337476, "learning_rate": 4.46539027982327e-06, "loss": 0.6103, "step": 1516 }, { "epoch": 0.04469454797224637, "grad_norm": 1.9230522086531174, "learning_rate": 4.468335787923417e-06, "loss": 0.6895, "step": 1517 }, { "epoch": 0.04472401042970994, "grad_norm": 1.9633392720869698, "learning_rate": 4.471281296023564e-06, "loss": 0.7031, "step": 1518 }, { "epoch": 0.04475347288717352, "grad_norm": 1.9746819287420991, "learning_rate": 4.474226804123712e-06, "loss": 0.694, "step": 1519 }, { "epoch": 0.0447829353446371, "grad_norm": 1.9075082978143878, "learning_rate": 4.477172312223859e-06, "loss": 0.6914, "step": 1520 }, { "epoch": 0.044812397802100674, "grad_norm": 2.3583341430622187, "learning_rate": 4.4801178203240066e-06, "loss": 0.6859, "step": 1521 }, { "epoch": 0.04484186025956425, "grad_norm": 2.150490038009056, "learning_rate": 4.483063328424154e-06, "loss": 0.8464, "step": 1522 }, { "epoch": 0.044871322717027824, "grad_norm": 1.7552696690082696, "learning_rate": 4.486008836524301e-06, "loss": 0.5599, "step": 1523 }, { "epoch": 0.044900785174491406, "grad_norm": 1.920199347215357, "learning_rate": 4.488954344624448e-06, "loss": 0.7019, "step": 1524 }, { "epoch": 0.04493024763195498, "grad_norm": 2.1378136509763563, "learning_rate": 4.491899852724595e-06, "loss": 0.4451, "step": 1525 }, { "epoch": 0.044959710089418556, "grad_norm": 2.0657819821055807, "learning_rate": 4.494845360824742e-06, "loss": 0.5372, "step": 1526 }, { "epoch": 0.04498917254688214, "grad_norm": 1.70161780396398, "learning_rate": 4.49779086892489e-06, "loss": 0.525, "step": 1527 }, { "epoch": 0.04501863500434571, "grad_norm": 1.9197522955954187, "learning_rate": 4.5007363770250375e-06, "loss": 0.5246, "step": 1528 }, { "epoch": 0.04504809746180929, "grad_norm": 1.9310495378511054, "learning_rate": 4.503681885125185e-06, "loss": 0.6219, "step": 1529 }, { "epoch": 0.04507755991927287, "grad_norm": 1.6449341498908603, "learning_rate": 4.506627393225332e-06, "loss": 0.4877, "step": 1530 }, { "epoch": 0.045107022376736444, "grad_norm": 1.7977047450020893, "learning_rate": 4.509572901325479e-06, "loss": 0.5078, "step": 1531 }, { "epoch": 0.04513648483420002, "grad_norm": 2.1840391595472473, "learning_rate": 4.512518409425626e-06, "loss": 0.6932, "step": 1532 }, { "epoch": 0.0451659472916636, "grad_norm": 1.7499967475646527, "learning_rate": 4.515463917525773e-06, "loss": 0.6192, "step": 1533 }, { "epoch": 0.045195409749127176, "grad_norm": 1.8838333606384794, "learning_rate": 4.5184094256259205e-06, "loss": 0.5026, "step": 1534 }, { "epoch": 0.04522487220659075, "grad_norm": 2.033854483992285, "learning_rate": 4.5213549337260685e-06, "loss": 0.5718, "step": 1535 }, { "epoch": 0.045254334664054326, "grad_norm": 1.9489110875391074, "learning_rate": 4.524300441826216e-06, "loss": 0.762, "step": 1536 }, { "epoch": 0.04528379712151791, "grad_norm": 2.0124275110688763, "learning_rate": 4.527245949926363e-06, "loss": 0.6918, "step": 1537 }, { "epoch": 0.04531325957898148, "grad_norm": 1.8900332164942022, "learning_rate": 4.53019145802651e-06, "loss": 0.4867, "step": 1538 }, { "epoch": 0.04534272203644506, "grad_norm": 1.8819817067172318, "learning_rate": 4.533136966126657e-06, "loss": 0.5708, "step": 1539 }, { "epoch": 0.04537218449390864, "grad_norm": 1.979216397028021, "learning_rate": 4.536082474226804e-06, "loss": 0.5397, "step": 1540 }, { "epoch": 0.045401646951372214, "grad_norm": 1.832938618166508, "learning_rate": 4.5390279823269515e-06, "loss": 0.5443, "step": 1541 }, { "epoch": 0.04543110940883579, "grad_norm": 1.8240966376313574, "learning_rate": 4.541973490427099e-06, "loss": 0.6298, "step": 1542 }, { "epoch": 0.04546057186629937, "grad_norm": 1.9446366569104263, "learning_rate": 4.544918998527247e-06, "loss": 0.6038, "step": 1543 }, { "epoch": 0.045490034323762946, "grad_norm": 1.9534963584845717, "learning_rate": 4.547864506627394e-06, "loss": 0.5935, "step": 1544 }, { "epoch": 0.04551949678122652, "grad_norm": 1.871334987438146, "learning_rate": 4.550810014727541e-06, "loss": 0.5791, "step": 1545 }, { "epoch": 0.0455489592386901, "grad_norm": 2.1294973598033167, "learning_rate": 4.553755522827688e-06, "loss": 0.408, "step": 1546 }, { "epoch": 0.04557842169615368, "grad_norm": 1.7311780350585717, "learning_rate": 4.556701030927835e-06, "loss": 0.5347, "step": 1547 }, { "epoch": 0.04560788415361725, "grad_norm": 1.8776050743381778, "learning_rate": 4.5596465390279825e-06, "loss": 0.5417, "step": 1548 }, { "epoch": 0.04563734661108083, "grad_norm": 1.909113990690755, "learning_rate": 4.56259204712813e-06, "loss": 0.623, "step": 1549 }, { "epoch": 0.04566680906854441, "grad_norm": 1.9414250196333656, "learning_rate": 4.565537555228277e-06, "loss": 0.5859, "step": 1550 }, { "epoch": 0.045696271526007984, "grad_norm": 1.8281694505910322, "learning_rate": 4.568483063328425e-06, "loss": 0.5372, "step": 1551 }, { "epoch": 0.04572573398347156, "grad_norm": 1.933577609739807, "learning_rate": 4.571428571428572e-06, "loss": 0.655, "step": 1552 }, { "epoch": 0.04575519644093514, "grad_norm": 1.9161236152505197, "learning_rate": 4.574374079528719e-06, "loss": 0.5225, "step": 1553 }, { "epoch": 0.045784658898398715, "grad_norm": 1.9239532026471011, "learning_rate": 4.577319587628866e-06, "loss": 0.5129, "step": 1554 }, { "epoch": 0.04581412135586229, "grad_norm": 1.9599064998199052, "learning_rate": 4.5802650957290134e-06, "loss": 0.4704, "step": 1555 }, { "epoch": 0.04584358381332587, "grad_norm": 1.845853744891913, "learning_rate": 4.583210603829161e-06, "loss": 0.5088, "step": 1556 }, { "epoch": 0.04587304627078945, "grad_norm": 2.210261252059955, "learning_rate": 4.586156111929308e-06, "loss": 0.7499, "step": 1557 }, { "epoch": 0.04590250872825302, "grad_norm": 2.040055758788618, "learning_rate": 4.589101620029455e-06, "loss": 0.7345, "step": 1558 }, { "epoch": 0.045931971185716604, "grad_norm": 1.7203417068710893, "learning_rate": 4.592047128129603e-06, "loss": 0.5131, "step": 1559 }, { "epoch": 0.04596143364318018, "grad_norm": 2.23882648064965, "learning_rate": 4.59499263622975e-06, "loss": 0.5727, "step": 1560 }, { "epoch": 0.045990896100643754, "grad_norm": 1.766063252474329, "learning_rate": 4.597938144329897e-06, "loss": 0.6289, "step": 1561 }, { "epoch": 0.04602035855810733, "grad_norm": 1.8175286457893651, "learning_rate": 4.600883652430044e-06, "loss": 0.5535, "step": 1562 }, { "epoch": 0.04604982101557091, "grad_norm": 2.112269704209757, "learning_rate": 4.6038291605301916e-06, "loss": 0.7963, "step": 1563 }, { "epoch": 0.046079283473034485, "grad_norm": 2.0969931964879795, "learning_rate": 4.606774668630339e-06, "loss": 0.7553, "step": 1564 }, { "epoch": 0.04610874593049806, "grad_norm": 1.8755750151762574, "learning_rate": 4.609720176730487e-06, "loss": 0.5496, "step": 1565 }, { "epoch": 0.04613820838796164, "grad_norm": 2.1500444904133924, "learning_rate": 4.612665684830634e-06, "loss": 0.6651, "step": 1566 }, { "epoch": 0.04616767084542522, "grad_norm": 1.7388427136195597, "learning_rate": 4.615611192930781e-06, "loss": 0.5488, "step": 1567 }, { "epoch": 0.04619713330288879, "grad_norm": 2.038913798150692, "learning_rate": 4.618556701030928e-06, "loss": 0.6245, "step": 1568 }, { "epoch": 0.046226595760352374, "grad_norm": 1.8883109798113902, "learning_rate": 4.621502209131075e-06, "loss": 0.6187, "step": 1569 }, { "epoch": 0.04625605821781595, "grad_norm": 1.7670565540554033, "learning_rate": 4.624447717231223e-06, "loss": 0.5236, "step": 1570 }, { "epoch": 0.04628552067527952, "grad_norm": 2.11320379042622, "learning_rate": 4.6273932253313706e-06, "loss": 0.6405, "step": 1571 }, { "epoch": 0.046314983132743105, "grad_norm": 1.8811851057825917, "learning_rate": 4.630338733431518e-06, "loss": 0.5979, "step": 1572 }, { "epoch": 0.04634444559020668, "grad_norm": 1.8173906289916526, "learning_rate": 4.633284241531665e-06, "loss": 0.5223, "step": 1573 }, { "epoch": 0.046373908047670255, "grad_norm": 1.853706565780506, "learning_rate": 4.636229749631812e-06, "loss": 0.4922, "step": 1574 }, { "epoch": 0.04640337050513383, "grad_norm": 1.967244797828074, "learning_rate": 4.639175257731959e-06, "loss": 0.6034, "step": 1575 }, { "epoch": 0.04643283296259741, "grad_norm": 1.9483585458131765, "learning_rate": 4.642120765832106e-06, "loss": 0.6555, "step": 1576 }, { "epoch": 0.04646229542006099, "grad_norm": 2.012439634345164, "learning_rate": 4.6450662739322535e-06, "loss": 0.5099, "step": 1577 }, { "epoch": 0.04649175787752456, "grad_norm": 1.9186062769336725, "learning_rate": 4.6480117820324015e-06, "loss": 0.4935, "step": 1578 }, { "epoch": 0.04652122033498814, "grad_norm": 1.858862550396551, "learning_rate": 4.650957290132549e-06, "loss": 0.6412, "step": 1579 }, { "epoch": 0.04655068279245172, "grad_norm": 1.962413627012287, "learning_rate": 4.653902798232696e-06, "loss": 0.3973, "step": 1580 }, { "epoch": 0.04658014524991529, "grad_norm": 1.8000689017672242, "learning_rate": 4.656848306332843e-06, "loss": 0.5905, "step": 1581 }, { "epoch": 0.046609607707378875, "grad_norm": 1.9423920770740561, "learning_rate": 4.65979381443299e-06, "loss": 0.5849, "step": 1582 }, { "epoch": 0.04663907016484245, "grad_norm": 1.9900582581913364, "learning_rate": 4.662739322533137e-06, "loss": 0.4802, "step": 1583 }, { "epoch": 0.046668532622306025, "grad_norm": 1.9138324892767862, "learning_rate": 4.6656848306332845e-06, "loss": 0.5966, "step": 1584 }, { "epoch": 0.04669799507976961, "grad_norm": 2.06223500535481, "learning_rate": 4.668630338733432e-06, "loss": 0.5111, "step": 1585 }, { "epoch": 0.04672745753723318, "grad_norm": 2.3029523455257936, "learning_rate": 4.671575846833579e-06, "loss": 0.629, "step": 1586 }, { "epoch": 0.046756919994696756, "grad_norm": 1.8747936256038287, "learning_rate": 4.674521354933727e-06, "loss": 0.5659, "step": 1587 }, { "epoch": 0.04678638245216033, "grad_norm": 1.8628071394371382, "learning_rate": 4.677466863033874e-06, "loss": 0.5735, "step": 1588 }, { "epoch": 0.04681584490962391, "grad_norm": 2.5221573833018804, "learning_rate": 4.680412371134021e-06, "loss": 0.8314, "step": 1589 }, { "epoch": 0.04684530736708749, "grad_norm": 2.075131367381351, "learning_rate": 4.683357879234168e-06, "loss": 0.6167, "step": 1590 }, { "epoch": 0.04687476982455106, "grad_norm": 1.8778599116097783, "learning_rate": 4.6863033873343155e-06, "loss": 0.5594, "step": 1591 }, { "epoch": 0.046904232282014645, "grad_norm": 1.7839201467764236, "learning_rate": 4.689248895434463e-06, "loss": 0.6762, "step": 1592 }, { "epoch": 0.04693369473947822, "grad_norm": 2.0132182370995597, "learning_rate": 4.69219440353461e-06, "loss": 0.5625, "step": 1593 }, { "epoch": 0.046963157196941795, "grad_norm": 1.7669490651850153, "learning_rate": 4.695139911634757e-06, "loss": 0.4317, "step": 1594 }, { "epoch": 0.046992619654405376, "grad_norm": 2.337813809950023, "learning_rate": 4.698085419734905e-06, "loss": 0.7735, "step": 1595 }, { "epoch": 0.04702208211186895, "grad_norm": 1.8909217631665491, "learning_rate": 4.701030927835052e-06, "loss": 0.5208, "step": 1596 }, { "epoch": 0.047051544569332526, "grad_norm": 1.9984380569101259, "learning_rate": 4.703976435935199e-06, "loss": 0.6709, "step": 1597 }, { "epoch": 0.04708100702679611, "grad_norm": 1.8480153868356426, "learning_rate": 4.7069219440353464e-06, "loss": 0.6976, "step": 1598 }, { "epoch": 0.04711046948425968, "grad_norm": 2.0505053254955676, "learning_rate": 4.709867452135494e-06, "loss": 0.6672, "step": 1599 }, { "epoch": 0.04713993194172326, "grad_norm": 2.129841820201933, "learning_rate": 4.712812960235641e-06, "loss": 0.6619, "step": 1600 }, { "epoch": 0.04716939439918683, "grad_norm": 2.0288512871065545, "learning_rate": 4.715758468335788e-06, "loss": 0.6025, "step": 1601 }, { "epoch": 0.047198856856650415, "grad_norm": 1.9831178731205708, "learning_rate": 4.718703976435935e-06, "loss": 0.5624, "step": 1602 }, { "epoch": 0.04722831931411399, "grad_norm": 1.9301280238755743, "learning_rate": 4.721649484536083e-06, "loss": 0.6128, "step": 1603 }, { "epoch": 0.047257781771577564, "grad_norm": 2.0107316651927296, "learning_rate": 4.72459499263623e-06, "loss": 0.4028, "step": 1604 }, { "epoch": 0.047287244229041146, "grad_norm": 1.9719000522295451, "learning_rate": 4.727540500736377e-06, "loss": 0.6464, "step": 1605 }, { "epoch": 0.04731670668650472, "grad_norm": 2.0086396256857117, "learning_rate": 4.730486008836525e-06, "loss": 0.6973, "step": 1606 }, { "epoch": 0.047346169143968296, "grad_norm": 1.9741169700491668, "learning_rate": 4.733431516936672e-06, "loss": 0.6505, "step": 1607 }, { "epoch": 0.04737563160143188, "grad_norm": 2.1206404570846416, "learning_rate": 4.736377025036819e-06, "loss": 0.7095, "step": 1608 }, { "epoch": 0.04740509405889545, "grad_norm": 1.8312265928691136, "learning_rate": 4.739322533136966e-06, "loss": 0.5062, "step": 1609 }, { "epoch": 0.04743455651635903, "grad_norm": 1.8431403533129591, "learning_rate": 4.742268041237113e-06, "loss": 0.5752, "step": 1610 }, { "epoch": 0.04746401897382261, "grad_norm": 1.8865474898709262, "learning_rate": 4.745213549337261e-06, "loss": 0.4339, "step": 1611 }, { "epoch": 0.047493481431286184, "grad_norm": 1.84192947173871, "learning_rate": 4.748159057437408e-06, "loss": 0.5692, "step": 1612 }, { "epoch": 0.04752294388874976, "grad_norm": 1.9227742457514563, "learning_rate": 4.7511045655375556e-06, "loss": 0.578, "step": 1613 }, { "epoch": 0.047552406346213334, "grad_norm": 1.7987434850728072, "learning_rate": 4.754050073637703e-06, "loss": 0.5569, "step": 1614 }, { "epoch": 0.047581868803676916, "grad_norm": 1.8564709812719296, "learning_rate": 4.75699558173785e-06, "loss": 0.6006, "step": 1615 }, { "epoch": 0.04761133126114049, "grad_norm": 1.7373442368173002, "learning_rate": 4.759941089837997e-06, "loss": 0.5609, "step": 1616 }, { "epoch": 0.047640793718604066, "grad_norm": 1.9599681619810752, "learning_rate": 4.762886597938144e-06, "loss": 0.5451, "step": 1617 }, { "epoch": 0.04767025617606765, "grad_norm": 1.8835251801875639, "learning_rate": 4.765832106038292e-06, "loss": 0.6001, "step": 1618 }, { "epoch": 0.04769971863353122, "grad_norm": 2.0424573702698092, "learning_rate": 4.768777614138439e-06, "loss": 0.5724, "step": 1619 }, { "epoch": 0.0477291810909948, "grad_norm": 1.9329122914396326, "learning_rate": 4.7717231222385865e-06, "loss": 0.5013, "step": 1620 }, { "epoch": 0.04775864354845838, "grad_norm": 1.9857596608165462, "learning_rate": 4.774668630338734e-06, "loss": 0.7036, "step": 1621 }, { "epoch": 0.047788106005921954, "grad_norm": 1.9945899492606403, "learning_rate": 4.777614138438882e-06, "loss": 0.6274, "step": 1622 }, { "epoch": 0.04781756846338553, "grad_norm": 1.8509624052637095, "learning_rate": 4.780559646539029e-06, "loss": 0.6108, "step": 1623 }, { "epoch": 0.04784703092084911, "grad_norm": 1.8432593004089879, "learning_rate": 4.783505154639176e-06, "loss": 0.6627, "step": 1624 }, { "epoch": 0.047876493378312686, "grad_norm": 2.179839111007365, "learning_rate": 4.786450662739323e-06, "loss": 0.6324, "step": 1625 }, { "epoch": 0.04790595583577626, "grad_norm": 2.0959671865792124, "learning_rate": 4.78939617083947e-06, "loss": 0.4801, "step": 1626 }, { "epoch": 0.04793541829323984, "grad_norm": 2.4290111078425127, "learning_rate": 4.7923416789396175e-06, "loss": 0.6148, "step": 1627 }, { "epoch": 0.04796488075070342, "grad_norm": 1.9633957020500863, "learning_rate": 4.795287187039765e-06, "loss": 0.5384, "step": 1628 }, { "epoch": 0.04799434320816699, "grad_norm": 1.7958104823214274, "learning_rate": 4.798232695139912e-06, "loss": 0.6553, "step": 1629 }, { "epoch": 0.04802380566563057, "grad_norm": 2.004359052642179, "learning_rate": 4.80117820324006e-06, "loss": 0.6894, "step": 1630 }, { "epoch": 0.04805326812309415, "grad_norm": 1.9155404238799567, "learning_rate": 4.804123711340207e-06, "loss": 0.6388, "step": 1631 }, { "epoch": 0.048082730580557724, "grad_norm": 1.8600045070778535, "learning_rate": 4.807069219440354e-06, "loss": 0.7012, "step": 1632 }, { "epoch": 0.0481121930380213, "grad_norm": 1.9878305064733555, "learning_rate": 4.810014727540501e-06, "loss": 0.4987, "step": 1633 }, { "epoch": 0.04814165549548488, "grad_norm": 1.7461870749764206, "learning_rate": 4.8129602356406485e-06, "loss": 0.5424, "step": 1634 }, { "epoch": 0.048171117952948456, "grad_norm": 1.9274673893073386, "learning_rate": 4.815905743740796e-06, "loss": 0.5658, "step": 1635 }, { "epoch": 0.04820058041041203, "grad_norm": 2.0320041057740186, "learning_rate": 4.818851251840943e-06, "loss": 0.5752, "step": 1636 }, { "epoch": 0.04823004286787561, "grad_norm": 1.6894856153721134, "learning_rate": 4.82179675994109e-06, "loss": 0.4855, "step": 1637 }, { "epoch": 0.04825950532533919, "grad_norm": 1.720302842470695, "learning_rate": 4.824742268041238e-06, "loss": 0.531, "step": 1638 }, { "epoch": 0.04828896778280276, "grad_norm": 2.0213086241800506, "learning_rate": 4.827687776141385e-06, "loss": 0.5252, "step": 1639 }, { "epoch": 0.048318430240266344, "grad_norm": 2.318432781139047, "learning_rate": 4.830633284241532e-06, "loss": 0.5264, "step": 1640 }, { "epoch": 0.04834789269772992, "grad_norm": 2.0164470949327766, "learning_rate": 4.8335787923416795e-06, "loss": 0.5848, "step": 1641 }, { "epoch": 0.048377355155193494, "grad_norm": 2.007241817055835, "learning_rate": 4.836524300441827e-06, "loss": 0.6423, "step": 1642 }, { "epoch": 0.04840681761265707, "grad_norm": 1.9393963635330689, "learning_rate": 4.839469808541974e-06, "loss": 0.4446, "step": 1643 }, { "epoch": 0.04843628007012065, "grad_norm": 1.8194692271670665, "learning_rate": 4.842415316642121e-06, "loss": 0.5838, "step": 1644 }, { "epoch": 0.048465742527584225, "grad_norm": 1.9110197907558366, "learning_rate": 4.845360824742268e-06, "loss": 0.5327, "step": 1645 }, { "epoch": 0.0484952049850478, "grad_norm": 1.9336050092752268, "learning_rate": 4.848306332842416e-06, "loss": 0.6373, "step": 1646 }, { "epoch": 0.04852466744251138, "grad_norm": 1.8317577893091574, "learning_rate": 4.851251840942563e-06, "loss": 0.586, "step": 1647 }, { "epoch": 0.04855412989997496, "grad_norm": 2.118034511322204, "learning_rate": 4.8541973490427104e-06, "loss": 0.4296, "step": 1648 }, { "epoch": 0.04858359235743853, "grad_norm": 2.084355441960032, "learning_rate": 4.857142857142858e-06, "loss": 0.7264, "step": 1649 }, { "epoch": 0.048613054814902114, "grad_norm": 2.024443441470366, "learning_rate": 4.860088365243005e-06, "loss": 0.5221, "step": 1650 }, { "epoch": 0.04864251727236569, "grad_norm": 2.213954919893841, "learning_rate": 4.863033873343152e-06, "loss": 0.6966, "step": 1651 }, { "epoch": 0.048671979729829264, "grad_norm": 1.8141153780881867, "learning_rate": 4.865979381443299e-06, "loss": 0.5086, "step": 1652 }, { "epoch": 0.048701442187292845, "grad_norm": 1.6767578656298978, "learning_rate": 4.868924889543446e-06, "loss": 0.5227, "step": 1653 }, { "epoch": 0.04873090464475642, "grad_norm": 1.7163755207535178, "learning_rate": 4.871870397643594e-06, "loss": 0.4855, "step": 1654 }, { "epoch": 0.048760367102219995, "grad_norm": 1.9875578127925058, "learning_rate": 4.874815905743741e-06, "loss": 0.5419, "step": 1655 }, { "epoch": 0.04878982955968357, "grad_norm": 2.0596474758198693, "learning_rate": 4.8777614138438886e-06, "loss": 0.6279, "step": 1656 }, { "epoch": 0.04881929201714715, "grad_norm": 2.2992447991921385, "learning_rate": 4.880706921944036e-06, "loss": 0.668, "step": 1657 }, { "epoch": 0.04884875447461073, "grad_norm": 1.7844511993776273, "learning_rate": 4.883652430044183e-06, "loss": 0.6389, "step": 1658 }, { "epoch": 0.0488782169320743, "grad_norm": 2.0373215591382112, "learning_rate": 4.88659793814433e-06, "loss": 0.658, "step": 1659 }, { "epoch": 0.048907679389537884, "grad_norm": 2.014819679424432, "learning_rate": 4.889543446244477e-06, "loss": 0.5669, "step": 1660 }, { "epoch": 0.04893714184700146, "grad_norm": 1.9714691258267285, "learning_rate": 4.892488954344624e-06, "loss": 0.5977, "step": 1661 }, { "epoch": 0.04896660430446503, "grad_norm": 1.9717453107764022, "learning_rate": 4.8954344624447715e-06, "loss": 0.7015, "step": 1662 }, { "epoch": 0.048996066761928615, "grad_norm": 1.847090975202504, "learning_rate": 4.8983799705449195e-06, "loss": 0.5707, "step": 1663 }, { "epoch": 0.04902552921939219, "grad_norm": 2.1006371609705243, "learning_rate": 4.901325478645067e-06, "loss": 0.6091, "step": 1664 }, { "epoch": 0.049054991676855765, "grad_norm": 2.0229207542607464, "learning_rate": 4.904270986745214e-06, "loss": 0.608, "step": 1665 }, { "epoch": 0.04908445413431935, "grad_norm": 1.9035507475172528, "learning_rate": 4.907216494845361e-06, "loss": 0.4803, "step": 1666 }, { "epoch": 0.04911391659178292, "grad_norm": 2.167516443925855, "learning_rate": 4.910162002945508e-06, "loss": 0.6766, "step": 1667 }, { "epoch": 0.0491433790492465, "grad_norm": 1.8426218480339673, "learning_rate": 4.913107511045655e-06, "loss": 0.4513, "step": 1668 }, { "epoch": 0.04917284150671007, "grad_norm": 2.1190333461693407, "learning_rate": 4.9160530191458025e-06, "loss": 0.6166, "step": 1669 }, { "epoch": 0.04920230396417365, "grad_norm": 1.7847773978928518, "learning_rate": 4.91899852724595e-06, "loss": 0.4954, "step": 1670 }, { "epoch": 0.04923176642163723, "grad_norm": 2.030802515128628, "learning_rate": 4.921944035346098e-06, "loss": 0.5355, "step": 1671 }, { "epoch": 0.0492612288791008, "grad_norm": 1.9659230753504833, "learning_rate": 4.924889543446245e-06, "loss": 0.6648, "step": 1672 }, { "epoch": 0.049290691336564385, "grad_norm": 1.7586963403779643, "learning_rate": 4.927835051546392e-06, "loss": 0.4761, "step": 1673 }, { "epoch": 0.04932015379402796, "grad_norm": 1.7899255873467843, "learning_rate": 4.93078055964654e-06, "loss": 0.5176, "step": 1674 }, { "epoch": 0.049349616251491535, "grad_norm": 1.9556836028935678, "learning_rate": 4.933726067746687e-06, "loss": 0.5729, "step": 1675 }, { "epoch": 0.04937907870895512, "grad_norm": 1.8994029794701202, "learning_rate": 4.936671575846834e-06, "loss": 0.5849, "step": 1676 }, { "epoch": 0.04940854116641869, "grad_norm": 2.159837028179765, "learning_rate": 4.9396170839469815e-06, "loss": 0.5289, "step": 1677 }, { "epoch": 0.049438003623882266, "grad_norm": 1.8080674750003345, "learning_rate": 4.942562592047129e-06, "loss": 0.6345, "step": 1678 }, { "epoch": 0.04946746608134585, "grad_norm": 1.9640051086631218, "learning_rate": 4.945508100147276e-06, "loss": 0.4004, "step": 1679 }, { "epoch": 0.04949692853880942, "grad_norm": 1.6718094877524716, "learning_rate": 4.948453608247423e-06, "loss": 0.6106, "step": 1680 }, { "epoch": 0.049526390996273, "grad_norm": 1.9579912322536956, "learning_rate": 4.95139911634757e-06, "loss": 0.5979, "step": 1681 }, { "epoch": 0.04955585345373657, "grad_norm": 1.78840391967557, "learning_rate": 4.954344624447718e-06, "loss": 0.5304, "step": 1682 }, { "epoch": 0.049585315911200155, "grad_norm": 1.9227770467678698, "learning_rate": 4.957290132547865e-06, "loss": 0.4537, "step": 1683 }, { "epoch": 0.04961477836866373, "grad_norm": 1.8257146212568123, "learning_rate": 4.9602356406480125e-06, "loss": 0.6379, "step": 1684 }, { "epoch": 0.049644240826127305, "grad_norm": 1.852583666664278, "learning_rate": 4.96318114874816e-06, "loss": 0.4953, "step": 1685 }, { "epoch": 0.049673703283590886, "grad_norm": 2.0595554750661633, "learning_rate": 4.966126656848307e-06, "loss": 0.5558, "step": 1686 }, { "epoch": 0.04970316574105446, "grad_norm": 1.6757246789201548, "learning_rate": 4.969072164948454e-06, "loss": 0.5343, "step": 1687 }, { "epoch": 0.049732628198518036, "grad_norm": 2.0842896233881247, "learning_rate": 4.972017673048601e-06, "loss": 0.6611, "step": 1688 }, { "epoch": 0.04976209065598162, "grad_norm": 1.9864831460349157, "learning_rate": 4.974963181148748e-06, "loss": 0.6521, "step": 1689 }, { "epoch": 0.04979155311344519, "grad_norm": 2.0807849895435204, "learning_rate": 4.977908689248896e-06, "loss": 0.7224, "step": 1690 }, { "epoch": 0.04982101557090877, "grad_norm": 2.1934247746516924, "learning_rate": 4.9808541973490434e-06, "loss": 0.6091, "step": 1691 }, { "epoch": 0.04985047802837235, "grad_norm": 2.2086413637082773, "learning_rate": 4.983799705449191e-06, "loss": 0.4493, "step": 1692 }, { "epoch": 0.049879940485835925, "grad_norm": 1.9114521785103864, "learning_rate": 4.986745213549338e-06, "loss": 0.6656, "step": 1693 }, { "epoch": 0.0499094029432995, "grad_norm": 1.8993372537664757, "learning_rate": 4.989690721649485e-06, "loss": 0.5521, "step": 1694 }, { "epoch": 0.049938865400763074, "grad_norm": 1.9755956912515222, "learning_rate": 4.992636229749632e-06, "loss": 0.6959, "step": 1695 }, { "epoch": 0.049968327858226656, "grad_norm": 1.7369837890003539, "learning_rate": 4.995581737849779e-06, "loss": 0.5816, "step": 1696 }, { "epoch": 0.04999779031569023, "grad_norm": 1.893026181696575, "learning_rate": 4.998527245949926e-06, "loss": 0.5264, "step": 1697 }, { "epoch": 0.050027252773153806, "grad_norm": 2.0065773920904917, "learning_rate": 5.001472754050074e-06, "loss": 0.3682, "step": 1698 }, { "epoch": 0.05005671523061739, "grad_norm": 1.7213428462300382, "learning_rate": 5.0044182621502216e-06, "loss": 0.5922, "step": 1699 }, { "epoch": 0.05008617768808096, "grad_norm": 1.785168234678004, "learning_rate": 5.007363770250369e-06, "loss": 0.4882, "step": 1700 }, { "epoch": 0.05011564014554454, "grad_norm": 1.9083709942333211, "learning_rate": 5.010309278350516e-06, "loss": 0.6156, "step": 1701 }, { "epoch": 0.05014510260300812, "grad_norm": 1.8817073255353862, "learning_rate": 5.013254786450663e-06, "loss": 0.5524, "step": 1702 }, { "epoch": 0.050174565060471694, "grad_norm": 2.0566097362610445, "learning_rate": 5.01620029455081e-06, "loss": 0.4919, "step": 1703 }, { "epoch": 0.05020402751793527, "grad_norm": 1.8617360117093593, "learning_rate": 5.019145802650957e-06, "loss": 0.5028, "step": 1704 }, { "epoch": 0.05023348997539885, "grad_norm": 1.972369635879622, "learning_rate": 5.0220913107511045e-06, "loss": 0.6319, "step": 1705 }, { "epoch": 0.050262952432862426, "grad_norm": 1.783998147742522, "learning_rate": 5.0250368188512525e-06, "loss": 0.612, "step": 1706 }, { "epoch": 0.050292414890326, "grad_norm": 1.718795052002889, "learning_rate": 5.0279823269514e-06, "loss": 0.4769, "step": 1707 }, { "epoch": 0.050321877347789576, "grad_norm": 1.7583734796402353, "learning_rate": 5.030927835051547e-06, "loss": 0.473, "step": 1708 }, { "epoch": 0.05035133980525316, "grad_norm": 2.0049583597371434, "learning_rate": 5.033873343151694e-06, "loss": 0.7964, "step": 1709 }, { "epoch": 0.05038080226271673, "grad_norm": 1.85119902347369, "learning_rate": 5.036818851251841e-06, "loss": 0.5678, "step": 1710 }, { "epoch": 0.05041026472018031, "grad_norm": 1.9731514946856206, "learning_rate": 5.039764359351988e-06, "loss": 0.6201, "step": 1711 }, { "epoch": 0.05043972717764389, "grad_norm": 2.1227225728384354, "learning_rate": 5.0427098674521355e-06, "loss": 0.7239, "step": 1712 }, { "epoch": 0.050469189635107464, "grad_norm": 1.8984663285029533, "learning_rate": 5.045655375552283e-06, "loss": 0.5548, "step": 1713 }, { "epoch": 0.05049865209257104, "grad_norm": 1.8405935341827957, "learning_rate": 5.048600883652431e-06, "loss": 0.4442, "step": 1714 }, { "epoch": 0.05052811455003462, "grad_norm": 1.7853937306295127, "learning_rate": 5.051546391752578e-06, "loss": 0.4495, "step": 1715 }, { "epoch": 0.050557577007498196, "grad_norm": 1.9825592498596691, "learning_rate": 5.054491899852725e-06, "loss": 0.6496, "step": 1716 }, { "epoch": 0.05058703946496177, "grad_norm": 2.1834419787343116, "learning_rate": 5.057437407952872e-06, "loss": 0.5918, "step": 1717 }, { "epoch": 0.05061650192242535, "grad_norm": 2.1073377185269284, "learning_rate": 5.060382916053019e-06, "loss": 0.7117, "step": 1718 }, { "epoch": 0.05064596437988893, "grad_norm": 1.8733314052896677, "learning_rate": 5.0633284241531665e-06, "loss": 0.7031, "step": 1719 }, { "epoch": 0.0506754268373525, "grad_norm": 1.9845540130903467, "learning_rate": 5.066273932253314e-06, "loss": 0.5964, "step": 1720 }, { "epoch": 0.05070488929481608, "grad_norm": 1.9561315679234372, "learning_rate": 5.069219440353461e-06, "loss": 0.6485, "step": 1721 }, { "epoch": 0.05073435175227966, "grad_norm": 1.8503833712004991, "learning_rate": 5.072164948453609e-06, "loss": 0.6017, "step": 1722 }, { "epoch": 0.050763814209743234, "grad_norm": 1.9995976243743008, "learning_rate": 5.075110456553756e-06, "loss": 0.6716, "step": 1723 }, { "epoch": 0.05079327666720681, "grad_norm": 1.8833818123532564, "learning_rate": 5.078055964653903e-06, "loss": 0.6363, "step": 1724 }, { "epoch": 0.05082273912467039, "grad_norm": 1.8559182162032568, "learning_rate": 5.08100147275405e-06, "loss": 0.5984, "step": 1725 }, { "epoch": 0.050852201582133966, "grad_norm": 1.754164164818873, "learning_rate": 5.0839469808541975e-06, "loss": 0.4403, "step": 1726 }, { "epoch": 0.05088166403959754, "grad_norm": 1.812873476705842, "learning_rate": 5.086892488954345e-06, "loss": 0.6642, "step": 1727 }, { "epoch": 0.05091112649706112, "grad_norm": 1.7830783982139735, "learning_rate": 5.089837997054492e-06, "loss": 0.5714, "step": 1728 }, { "epoch": 0.0509405889545247, "grad_norm": 1.7054287217662254, "learning_rate": 5.092783505154639e-06, "loss": 0.5352, "step": 1729 }, { "epoch": 0.05097005141198827, "grad_norm": 1.845951783082457, "learning_rate": 5.095729013254786e-06, "loss": 0.6902, "step": 1730 }, { "epoch": 0.050999513869451854, "grad_norm": 1.9993191106637707, "learning_rate": 5.098674521354934e-06, "loss": 0.6704, "step": 1731 }, { "epoch": 0.05102897632691543, "grad_norm": 2.068081446884431, "learning_rate": 5.101620029455081e-06, "loss": 0.5859, "step": 1732 }, { "epoch": 0.051058438784379004, "grad_norm": 2.0450836531017687, "learning_rate": 5.1045655375552284e-06, "loss": 0.6035, "step": 1733 }, { "epoch": 0.05108790124184258, "grad_norm": 1.9190677974162937, "learning_rate": 5.107511045655376e-06, "loss": 0.5612, "step": 1734 }, { "epoch": 0.05111736369930616, "grad_norm": 2.0284153109535934, "learning_rate": 5.110456553755523e-06, "loss": 0.5194, "step": 1735 }, { "epoch": 0.051146826156769735, "grad_norm": 1.9211039322899706, "learning_rate": 5.11340206185567e-06, "loss": 0.6071, "step": 1736 }, { "epoch": 0.05117628861423331, "grad_norm": 1.8968078266959167, "learning_rate": 5.116347569955817e-06, "loss": 0.4782, "step": 1737 }, { "epoch": 0.05120575107169689, "grad_norm": 2.1018740132032314, "learning_rate": 5.119293078055964e-06, "loss": 0.5588, "step": 1738 }, { "epoch": 0.05123521352916047, "grad_norm": 2.1143341073872546, "learning_rate": 5.122238586156112e-06, "loss": 0.5675, "step": 1739 }, { "epoch": 0.05126467598662404, "grad_norm": 2.11982611151677, "learning_rate": 5.125184094256259e-06, "loss": 0.6849, "step": 1740 }, { "epoch": 0.051294138444087624, "grad_norm": 2.017861849717437, "learning_rate": 5.128129602356407e-06, "loss": 0.7308, "step": 1741 }, { "epoch": 0.0513236009015512, "grad_norm": 1.973777686822774, "learning_rate": 5.131075110456554e-06, "loss": 0.707, "step": 1742 }, { "epoch": 0.051353063359014774, "grad_norm": 1.9287695061662695, "learning_rate": 5.134020618556701e-06, "loss": 0.559, "step": 1743 }, { "epoch": 0.051382525816478355, "grad_norm": 1.7645957762489966, "learning_rate": 5.136966126656848e-06, "loss": 0.4793, "step": 1744 }, { "epoch": 0.05141198827394193, "grad_norm": 1.7850806478478167, "learning_rate": 5.139911634756995e-06, "loss": 0.5844, "step": 1745 }, { "epoch": 0.051441450731405505, "grad_norm": 1.9810947892210642, "learning_rate": 5.142857142857142e-06, "loss": 0.5895, "step": 1746 }, { "epoch": 0.05147091318886908, "grad_norm": 1.8713434980044301, "learning_rate": 5.14580265095729e-06, "loss": 0.4352, "step": 1747 }, { "epoch": 0.05150037564633266, "grad_norm": 2.0069829340610643, "learning_rate": 5.1487481590574376e-06, "loss": 0.6333, "step": 1748 }, { "epoch": 0.05152983810379624, "grad_norm": 1.6921752613320675, "learning_rate": 5.151693667157585e-06, "loss": 0.4437, "step": 1749 }, { "epoch": 0.05155930056125981, "grad_norm": 2.0188892089422765, "learning_rate": 5.154639175257732e-06, "loss": 0.5363, "step": 1750 }, { "epoch": 0.051588763018723394, "grad_norm": 1.8371041215814465, "learning_rate": 5.15758468335788e-06, "loss": 0.517, "step": 1751 }, { "epoch": 0.05161822547618697, "grad_norm": 1.5519408785773052, "learning_rate": 5.160530191458028e-06, "loss": 0.4227, "step": 1752 }, { "epoch": 0.05164768793365054, "grad_norm": 1.8730787099860675, "learning_rate": 5.163475699558175e-06, "loss": 0.5934, "step": 1753 }, { "epoch": 0.051677150391114125, "grad_norm": 1.844100298041623, "learning_rate": 5.166421207658322e-06, "loss": 0.6674, "step": 1754 }, { "epoch": 0.0517066128485777, "grad_norm": 1.8429900176938105, "learning_rate": 5.169366715758469e-06, "loss": 0.6564, "step": 1755 }, { "epoch": 0.051736075306041275, "grad_norm": 1.7520650365770052, "learning_rate": 5.1723122238586165e-06, "loss": 0.4864, "step": 1756 }, { "epoch": 0.05176553776350486, "grad_norm": 1.9715444160902291, "learning_rate": 5.175257731958764e-06, "loss": 0.6761, "step": 1757 }, { "epoch": 0.05179500022096843, "grad_norm": 1.872166000840263, "learning_rate": 5.178203240058911e-06, "loss": 0.5232, "step": 1758 }, { "epoch": 0.05182446267843201, "grad_norm": 1.9715829493222274, "learning_rate": 5.181148748159058e-06, "loss": 0.6945, "step": 1759 }, { "epoch": 0.05185392513589558, "grad_norm": 2.1833805626747043, "learning_rate": 5.184094256259205e-06, "loss": 0.7034, "step": 1760 }, { "epoch": 0.05188338759335916, "grad_norm": 1.7187469104721156, "learning_rate": 5.187039764359353e-06, "loss": 0.6084, "step": 1761 }, { "epoch": 0.05191285005082274, "grad_norm": 1.789837212764785, "learning_rate": 5.1899852724595e-06, "loss": 0.7062, "step": 1762 }, { "epoch": 0.05194231250828631, "grad_norm": 1.773761749657489, "learning_rate": 5.1929307805596475e-06, "loss": 0.5102, "step": 1763 }, { "epoch": 0.051971774965749895, "grad_norm": 1.870580564960722, "learning_rate": 5.195876288659795e-06, "loss": 0.5951, "step": 1764 }, { "epoch": 0.05200123742321347, "grad_norm": 1.881013214680594, "learning_rate": 5.198821796759942e-06, "loss": 0.5805, "step": 1765 }, { "epoch": 0.052030699880677045, "grad_norm": 2.0415275521509733, "learning_rate": 5.201767304860089e-06, "loss": 0.5629, "step": 1766 }, { "epoch": 0.05206016233814063, "grad_norm": 2.061226773707769, "learning_rate": 5.204712812960236e-06, "loss": 0.7887, "step": 1767 }, { "epoch": 0.0520896247956042, "grad_norm": 2.006625467150842, "learning_rate": 5.207658321060383e-06, "loss": 0.5739, "step": 1768 }, { "epoch": 0.052119087253067777, "grad_norm": 1.9622003177464973, "learning_rate": 5.210603829160531e-06, "loss": 0.527, "step": 1769 }, { "epoch": 0.05214854971053136, "grad_norm": 2.0772200273149224, "learning_rate": 5.2135493372606785e-06, "loss": 0.6626, "step": 1770 }, { "epoch": 0.05217801216799493, "grad_norm": 1.926881040779172, "learning_rate": 5.216494845360826e-06, "loss": 0.4657, "step": 1771 }, { "epoch": 0.05220747462545851, "grad_norm": 1.8401450338317982, "learning_rate": 5.219440353460973e-06, "loss": 0.6308, "step": 1772 }, { "epoch": 0.05223693708292209, "grad_norm": 1.7247054659649748, "learning_rate": 5.22238586156112e-06, "loss": 0.7395, "step": 1773 }, { "epoch": 0.052266399540385665, "grad_norm": 1.7824556173030037, "learning_rate": 5.225331369661267e-06, "loss": 0.4657, "step": 1774 }, { "epoch": 0.05229586199784924, "grad_norm": 2.054565760986694, "learning_rate": 5.228276877761414e-06, "loss": 0.6304, "step": 1775 }, { "epoch": 0.052325324455312815, "grad_norm": 1.9072536631082064, "learning_rate": 5.2312223858615615e-06, "loss": 0.7392, "step": 1776 }, { "epoch": 0.052354786912776397, "grad_norm": 1.8523427572776945, "learning_rate": 5.2341678939617095e-06, "loss": 0.6205, "step": 1777 }, { "epoch": 0.05238424937023997, "grad_norm": 1.6323869528508652, "learning_rate": 5.237113402061857e-06, "loss": 0.3813, "step": 1778 }, { "epoch": 0.052413711827703546, "grad_norm": 1.9908528085194381, "learning_rate": 5.240058910162004e-06, "loss": 0.675, "step": 1779 }, { "epoch": 0.05244317428516713, "grad_norm": 1.8022107220217916, "learning_rate": 5.243004418262151e-06, "loss": 0.6503, "step": 1780 }, { "epoch": 0.0524726367426307, "grad_norm": 1.9152071505816355, "learning_rate": 5.245949926362298e-06, "loss": 0.5648, "step": 1781 }, { "epoch": 0.05250209920009428, "grad_norm": 1.8177422288811054, "learning_rate": 5.248895434462445e-06, "loss": 0.4566, "step": 1782 }, { "epoch": 0.05253156165755786, "grad_norm": 1.7370284894519326, "learning_rate": 5.2518409425625924e-06, "loss": 0.5767, "step": 1783 }, { "epoch": 0.052561024115021435, "grad_norm": 1.8306241700949584, "learning_rate": 5.25478645066274e-06, "loss": 0.4288, "step": 1784 }, { "epoch": 0.05259048657248501, "grad_norm": 2.007063879397914, "learning_rate": 5.257731958762888e-06, "loss": 0.4541, "step": 1785 }, { "epoch": 0.05261994902994859, "grad_norm": 1.859135453471448, "learning_rate": 5.260677466863035e-06, "loss": 0.6005, "step": 1786 }, { "epoch": 0.052649411487412166, "grad_norm": 1.7518055594372963, "learning_rate": 5.263622974963182e-06, "loss": 0.4548, "step": 1787 }, { "epoch": 0.05267887394487574, "grad_norm": 2.0463755692276195, "learning_rate": 5.266568483063329e-06, "loss": 0.5735, "step": 1788 }, { "epoch": 0.052708336402339316, "grad_norm": 1.9910921116623375, "learning_rate": 5.269513991163476e-06, "loss": 0.558, "step": 1789 }, { "epoch": 0.0527377988598029, "grad_norm": 1.8544055858760131, "learning_rate": 5.272459499263623e-06, "loss": 0.4967, "step": 1790 }, { "epoch": 0.05276726131726647, "grad_norm": 1.9750171361450781, "learning_rate": 5.2754050073637706e-06, "loss": 0.6656, "step": 1791 }, { "epoch": 0.05279672377473005, "grad_norm": 1.9163680848860614, "learning_rate": 5.278350515463918e-06, "loss": 0.6924, "step": 1792 }, { "epoch": 0.05282618623219363, "grad_norm": 1.7865329784519066, "learning_rate": 5.281296023564066e-06, "loss": 0.5421, "step": 1793 }, { "epoch": 0.052855648689657204, "grad_norm": 1.7421701856687997, "learning_rate": 5.284241531664213e-06, "loss": 0.6127, "step": 1794 }, { "epoch": 0.05288511114712078, "grad_norm": 2.0750623088973423, "learning_rate": 5.28718703976436e-06, "loss": 0.6177, "step": 1795 }, { "epoch": 0.05291457360458436, "grad_norm": 1.8615111228834968, "learning_rate": 5.290132547864507e-06, "loss": 0.4514, "step": 1796 }, { "epoch": 0.052944036062047936, "grad_norm": 1.9819703458147666, "learning_rate": 5.293078055964654e-06, "loss": 0.5539, "step": 1797 }, { "epoch": 0.05297349851951151, "grad_norm": 2.1012516247133544, "learning_rate": 5.2960235640648015e-06, "loss": 0.5334, "step": 1798 }, { "epoch": 0.05300296097697509, "grad_norm": 1.874132972107067, "learning_rate": 5.298969072164949e-06, "loss": 0.5871, "step": 1799 }, { "epoch": 0.05303242343443867, "grad_norm": 1.984121661189552, "learning_rate": 5.301914580265096e-06, "loss": 0.6388, "step": 1800 }, { "epoch": 0.05306188589190224, "grad_norm": 1.73290010694321, "learning_rate": 5.304860088365244e-06, "loss": 0.5188, "step": 1801 }, { "epoch": 0.05309134834936582, "grad_norm": 1.9303446577461556, "learning_rate": 5.307805596465391e-06, "loss": 0.6043, "step": 1802 }, { "epoch": 0.0531208108068294, "grad_norm": 1.8171939575517477, "learning_rate": 5.310751104565538e-06, "loss": 0.6027, "step": 1803 }, { "epoch": 0.053150273264292974, "grad_norm": 1.7988056012382163, "learning_rate": 5.313696612665685e-06, "loss": 0.5421, "step": 1804 }, { "epoch": 0.05317973572175655, "grad_norm": 1.8631240171074934, "learning_rate": 5.3166421207658325e-06, "loss": 0.4553, "step": 1805 }, { "epoch": 0.05320919817922013, "grad_norm": 1.9003209678155633, "learning_rate": 5.31958762886598e-06, "loss": 0.5433, "step": 1806 }, { "epoch": 0.053238660636683706, "grad_norm": 1.9272478882034134, "learning_rate": 5.322533136966127e-06, "loss": 0.5139, "step": 1807 }, { "epoch": 0.05326812309414728, "grad_norm": 2.1104163939802416, "learning_rate": 5.325478645066274e-06, "loss": 0.5169, "step": 1808 }, { "epoch": 0.05329758555161086, "grad_norm": 1.8565800297016808, "learning_rate": 5.328424153166422e-06, "loss": 0.5863, "step": 1809 }, { "epoch": 0.05332704800907444, "grad_norm": 1.9438343557982705, "learning_rate": 5.331369661266569e-06, "loss": 0.6048, "step": 1810 }, { "epoch": 0.05335651046653801, "grad_norm": 2.0379696727081327, "learning_rate": 5.334315169366716e-06, "loss": 0.4641, "step": 1811 }, { "epoch": 0.053385972924001594, "grad_norm": 2.3197186872894258, "learning_rate": 5.3372606774668635e-06, "loss": 0.6577, "step": 1812 }, { "epoch": 0.05341543538146517, "grad_norm": 1.852422350391209, "learning_rate": 5.340206185567011e-06, "loss": 0.4943, "step": 1813 }, { "epoch": 0.053444897838928744, "grad_norm": 1.8727063483334052, "learning_rate": 5.343151693667158e-06, "loss": 0.6441, "step": 1814 }, { "epoch": 0.05347436029639232, "grad_norm": 1.6432557345736245, "learning_rate": 5.346097201767305e-06, "loss": 0.3682, "step": 1815 }, { "epoch": 0.0535038227538559, "grad_norm": 1.701967200143744, "learning_rate": 5.349042709867452e-06, "loss": 0.5473, "step": 1816 }, { "epoch": 0.053533285211319476, "grad_norm": 1.8653115314519835, "learning_rate": 5.351988217967599e-06, "loss": 0.538, "step": 1817 }, { "epoch": 0.05356274766878305, "grad_norm": 1.8596839406564976, "learning_rate": 5.354933726067747e-06, "loss": 0.5228, "step": 1818 }, { "epoch": 0.05359221012624663, "grad_norm": 1.8871806213606297, "learning_rate": 5.3578792341678945e-06, "loss": 0.5775, "step": 1819 }, { "epoch": 0.05362167258371021, "grad_norm": 1.7267841813702087, "learning_rate": 5.360824742268042e-06, "loss": 0.5182, "step": 1820 }, { "epoch": 0.05365113504117378, "grad_norm": 2.058987553971514, "learning_rate": 5.363770250368189e-06, "loss": 0.5547, "step": 1821 }, { "epoch": 0.053680597498637364, "grad_norm": 1.7116001901201536, "learning_rate": 5.366715758468336e-06, "loss": 0.645, "step": 1822 }, { "epoch": 0.05371005995610094, "grad_norm": 1.7452883289510899, "learning_rate": 5.369661266568483e-06, "loss": 0.547, "step": 1823 }, { "epoch": 0.053739522413564514, "grad_norm": 2.0802312131394824, "learning_rate": 5.37260677466863e-06, "loss": 0.5119, "step": 1824 }, { "epoch": 0.053768984871028096, "grad_norm": 2.0668799607883703, "learning_rate": 5.3755522827687774e-06, "loss": 0.6009, "step": 1825 }, { "epoch": 0.05379844732849167, "grad_norm": 2.028307371312003, "learning_rate": 5.3784977908689254e-06, "loss": 0.6403, "step": 1826 }, { "epoch": 0.053827909785955246, "grad_norm": 1.9535900310493526, "learning_rate": 5.381443298969073e-06, "loss": 0.5871, "step": 1827 }, { "epoch": 0.05385737224341882, "grad_norm": 1.9605605715257801, "learning_rate": 5.38438880706922e-06, "loss": 0.5754, "step": 1828 }, { "epoch": 0.0538868347008824, "grad_norm": 1.931481446828749, "learning_rate": 5.387334315169367e-06, "loss": 0.4973, "step": 1829 }, { "epoch": 0.05391629715834598, "grad_norm": 2.0245972962376912, "learning_rate": 5.390279823269514e-06, "loss": 0.7816, "step": 1830 }, { "epoch": 0.05394575961580955, "grad_norm": 1.8197863842971485, "learning_rate": 5.393225331369661e-06, "loss": 0.5894, "step": 1831 }, { "epoch": 0.053975222073273134, "grad_norm": 2.0626447429124672, "learning_rate": 5.396170839469808e-06, "loss": 0.8149, "step": 1832 }, { "epoch": 0.05400468453073671, "grad_norm": 1.8367571438161747, "learning_rate": 5.3991163475699556e-06, "loss": 0.5803, "step": 1833 }, { "epoch": 0.054034146988200284, "grad_norm": 1.685460442544444, "learning_rate": 5.4020618556701036e-06, "loss": 0.4272, "step": 1834 }, { "epoch": 0.054063609445663866, "grad_norm": 1.8181573494685443, "learning_rate": 5.405007363770251e-06, "loss": 0.58, "step": 1835 }, { "epoch": 0.05409307190312744, "grad_norm": 2.0527659130961182, "learning_rate": 5.407952871870398e-06, "loss": 0.5425, "step": 1836 }, { "epoch": 0.054122534360591015, "grad_norm": 1.9492102080673637, "learning_rate": 5.410898379970545e-06, "loss": 0.6574, "step": 1837 }, { "epoch": 0.0541519968180546, "grad_norm": 1.88984448952009, "learning_rate": 5.413843888070692e-06, "loss": 0.4987, "step": 1838 }, { "epoch": 0.05418145927551817, "grad_norm": 1.7954343416331318, "learning_rate": 5.416789396170839e-06, "loss": 0.6003, "step": 1839 }, { "epoch": 0.05421092173298175, "grad_norm": 2.0254682048536345, "learning_rate": 5.4197349042709865e-06, "loss": 0.6924, "step": 1840 }, { "epoch": 0.05424038419044532, "grad_norm": 1.748566031134593, "learning_rate": 5.422680412371134e-06, "loss": 0.5534, "step": 1841 }, { "epoch": 0.054269846647908904, "grad_norm": 2.1863874589712515, "learning_rate": 5.425625920471282e-06, "loss": 0.6764, "step": 1842 }, { "epoch": 0.05429930910537248, "grad_norm": 1.9452524787682322, "learning_rate": 5.428571428571429e-06, "loss": 0.6735, "step": 1843 }, { "epoch": 0.05432877156283605, "grad_norm": 2.247690987302642, "learning_rate": 5.431516936671576e-06, "loss": 0.4661, "step": 1844 }, { "epoch": 0.054358234020299635, "grad_norm": 2.2384405828677982, "learning_rate": 5.434462444771723e-06, "loss": 0.739, "step": 1845 }, { "epoch": 0.05438769647776321, "grad_norm": 1.9633461019214258, "learning_rate": 5.43740795287187e-06, "loss": 0.5953, "step": 1846 }, { "epoch": 0.054417158935226785, "grad_norm": 2.150610380020207, "learning_rate": 5.4403534609720175e-06, "loss": 0.633, "step": 1847 }, { "epoch": 0.05444662139269037, "grad_norm": 2.0785018533523663, "learning_rate": 5.443298969072165e-06, "loss": 0.6797, "step": 1848 }, { "epoch": 0.05447608385015394, "grad_norm": 1.7232938220359844, "learning_rate": 5.446244477172312e-06, "loss": 0.4165, "step": 1849 }, { "epoch": 0.05450554630761752, "grad_norm": 1.9841225487501333, "learning_rate": 5.44918998527246e-06, "loss": 0.5986, "step": 1850 }, { "epoch": 0.0545350087650811, "grad_norm": 2.1189805657627474, "learning_rate": 5.452135493372607e-06, "loss": 0.5179, "step": 1851 }, { "epoch": 0.05456447122254467, "grad_norm": 1.9896586290892888, "learning_rate": 5.455081001472754e-06, "loss": 0.5281, "step": 1852 }, { "epoch": 0.05459393368000825, "grad_norm": 1.9933510624291757, "learning_rate": 5.458026509572901e-06, "loss": 0.5427, "step": 1853 }, { "epoch": 0.05462339613747182, "grad_norm": 1.6524643963639472, "learning_rate": 5.4609720176730485e-06, "loss": 0.4825, "step": 1854 }, { "epoch": 0.054652858594935405, "grad_norm": 2.062972481831144, "learning_rate": 5.463917525773196e-06, "loss": 0.6088, "step": 1855 }, { "epoch": 0.05468232105239898, "grad_norm": 1.9980877179241423, "learning_rate": 5.466863033873343e-06, "loss": 0.618, "step": 1856 }, { "epoch": 0.054711783509862555, "grad_norm": 1.7057427323045409, "learning_rate": 5.469808541973492e-06, "loss": 0.5453, "step": 1857 }, { "epoch": 0.05474124596732614, "grad_norm": 1.9299647073159352, "learning_rate": 5.472754050073639e-06, "loss": 0.5509, "step": 1858 }, { "epoch": 0.05477070842478971, "grad_norm": 2.135258366853054, "learning_rate": 5.475699558173786e-06, "loss": 0.7946, "step": 1859 }, { "epoch": 0.05480017088225329, "grad_norm": 1.8802004492671112, "learning_rate": 5.478645066273933e-06, "loss": 0.6522, "step": 1860 }, { "epoch": 0.05482963333971687, "grad_norm": 1.8745962910059504, "learning_rate": 5.48159057437408e-06, "loss": 0.6885, "step": 1861 }, { "epoch": 0.05485909579718044, "grad_norm": 1.8430112402219483, "learning_rate": 5.4845360824742275e-06, "loss": 0.5772, "step": 1862 }, { "epoch": 0.05488855825464402, "grad_norm": 1.6285071605400818, "learning_rate": 5.487481590574375e-06, "loss": 0.4725, "step": 1863 }, { "epoch": 0.0549180207121076, "grad_norm": 1.6778467777049297, "learning_rate": 5.490427098674523e-06, "loss": 0.5463, "step": 1864 }, { "epoch": 0.054947483169571175, "grad_norm": 1.9825502829854214, "learning_rate": 5.49337260677467e-06, "loss": 0.4877, "step": 1865 }, { "epoch": 0.05497694562703475, "grad_norm": 1.8921561908589037, "learning_rate": 5.496318114874817e-06, "loss": 0.5194, "step": 1866 }, { "epoch": 0.055006408084498325, "grad_norm": 1.9179231967269943, "learning_rate": 5.499263622974964e-06, "loss": 0.5364, "step": 1867 }, { "epoch": 0.05503587054196191, "grad_norm": 1.9523684343723895, "learning_rate": 5.502209131075111e-06, "loss": 0.6361, "step": 1868 }, { "epoch": 0.05506533299942548, "grad_norm": 1.904373201084417, "learning_rate": 5.5051546391752584e-06, "loss": 0.4388, "step": 1869 }, { "epoch": 0.055094795456889056, "grad_norm": 1.9677427854893301, "learning_rate": 5.508100147275406e-06, "loss": 0.5533, "step": 1870 }, { "epoch": 0.05512425791435264, "grad_norm": 1.9311232319058305, "learning_rate": 5.511045655375553e-06, "loss": 0.6278, "step": 1871 }, { "epoch": 0.05515372037181621, "grad_norm": 1.8368617869143922, "learning_rate": 5.513991163475701e-06, "loss": 0.6477, "step": 1872 }, { "epoch": 0.05518318282927979, "grad_norm": 2.3526031862091443, "learning_rate": 5.516936671575848e-06, "loss": 0.5544, "step": 1873 }, { "epoch": 0.05521264528674337, "grad_norm": 1.8017680345118903, "learning_rate": 5.519882179675995e-06, "loss": 0.4467, "step": 1874 }, { "epoch": 0.055242107744206945, "grad_norm": 1.7352821750348184, "learning_rate": 5.522827687776142e-06, "loss": 0.5894, "step": 1875 }, { "epoch": 0.05527157020167052, "grad_norm": 1.9247089167552724, "learning_rate": 5.525773195876289e-06, "loss": 0.5838, "step": 1876 }, { "epoch": 0.0553010326591341, "grad_norm": 1.8244631599066718, "learning_rate": 5.528718703976437e-06, "loss": 0.4953, "step": 1877 }, { "epoch": 0.055330495116597676, "grad_norm": 1.7773651455782635, "learning_rate": 5.531664212076584e-06, "loss": 0.4594, "step": 1878 }, { "epoch": 0.05535995757406125, "grad_norm": 1.9261503916972074, "learning_rate": 5.534609720176731e-06, "loss": 0.6507, "step": 1879 }, { "epoch": 0.055389420031524826, "grad_norm": 1.7984963181434854, "learning_rate": 5.537555228276879e-06, "loss": 0.3983, "step": 1880 }, { "epoch": 0.05541888248898841, "grad_norm": 1.919733015369868, "learning_rate": 5.540500736377026e-06, "loss": 0.7381, "step": 1881 }, { "epoch": 0.05544834494645198, "grad_norm": 1.8586369241564669, "learning_rate": 5.543446244477173e-06, "loss": 0.5215, "step": 1882 }, { "epoch": 0.05547780740391556, "grad_norm": 1.8599498373526586, "learning_rate": 5.54639175257732e-06, "loss": 0.5403, "step": 1883 }, { "epoch": 0.05550726986137914, "grad_norm": 1.7996864041532337, "learning_rate": 5.5493372606774676e-06, "loss": 0.4938, "step": 1884 }, { "epoch": 0.055536732318842714, "grad_norm": 1.9276426376956504, "learning_rate": 5.552282768777615e-06, "loss": 0.5161, "step": 1885 }, { "epoch": 0.05556619477630629, "grad_norm": 1.941455863245543, "learning_rate": 5.555228276877762e-06, "loss": 0.6704, "step": 1886 }, { "epoch": 0.05559565723376987, "grad_norm": 2.0543069864131143, "learning_rate": 5.558173784977909e-06, "loss": 0.4896, "step": 1887 }, { "epoch": 0.055625119691233446, "grad_norm": 2.00367087607361, "learning_rate": 5.561119293078057e-06, "loss": 0.4683, "step": 1888 }, { "epoch": 0.05565458214869702, "grad_norm": 1.9962485481736991, "learning_rate": 5.564064801178204e-06, "loss": 0.5891, "step": 1889 }, { "epoch": 0.0556840446061606, "grad_norm": 2.0009072555457252, "learning_rate": 5.567010309278351e-06, "loss": 0.5511, "step": 1890 }, { "epoch": 0.05571350706362418, "grad_norm": 1.9181134198739924, "learning_rate": 5.5699558173784985e-06, "loss": 0.7207, "step": 1891 }, { "epoch": 0.05574296952108775, "grad_norm": 2.1220247038721434, "learning_rate": 5.572901325478646e-06, "loss": 0.6337, "step": 1892 }, { "epoch": 0.05577243197855133, "grad_norm": 2.1281479770107734, "learning_rate": 5.575846833578793e-06, "loss": 0.7375, "step": 1893 }, { "epoch": 0.05580189443601491, "grad_norm": 2.1428520463879877, "learning_rate": 5.57879234167894e-06, "loss": 0.6225, "step": 1894 }, { "epoch": 0.055831356893478484, "grad_norm": 1.8874760985394412, "learning_rate": 5.581737849779087e-06, "loss": 0.5423, "step": 1895 }, { "epoch": 0.05586081935094206, "grad_norm": 1.9176372154994497, "learning_rate": 5.584683357879235e-06, "loss": 0.4659, "step": 1896 }, { "epoch": 0.05589028180840564, "grad_norm": 2.0183528283915932, "learning_rate": 5.587628865979382e-06, "loss": 0.7122, "step": 1897 }, { "epoch": 0.055919744265869216, "grad_norm": 1.8156043980021768, "learning_rate": 5.5905743740795295e-06, "loss": 0.5217, "step": 1898 }, { "epoch": 0.05594920672333279, "grad_norm": 2.2057059541346558, "learning_rate": 5.593519882179677e-06, "loss": 0.7347, "step": 1899 }, { "epoch": 0.05597866918079637, "grad_norm": 2.237325610934337, "learning_rate": 5.596465390279824e-06, "loss": 0.7482, "step": 1900 }, { "epoch": 0.05600813163825995, "grad_norm": 1.8720249496270602, "learning_rate": 5.599410898379971e-06, "loss": 0.8173, "step": 1901 }, { "epoch": 0.05603759409572352, "grad_norm": 1.838823396689465, "learning_rate": 5.602356406480118e-06, "loss": 0.5362, "step": 1902 }, { "epoch": 0.056067056553187104, "grad_norm": 1.8640588833625507, "learning_rate": 5.605301914580265e-06, "loss": 0.5191, "step": 1903 }, { "epoch": 0.05609651901065068, "grad_norm": 1.8417890546812314, "learning_rate": 5.6082474226804125e-06, "loss": 0.5856, "step": 1904 }, { "epoch": 0.056125981468114254, "grad_norm": 1.8945368978643957, "learning_rate": 5.6111929307805605e-06, "loss": 0.6054, "step": 1905 }, { "epoch": 0.05615544392557783, "grad_norm": 1.6386696068896849, "learning_rate": 5.614138438880708e-06, "loss": 0.4793, "step": 1906 }, { "epoch": 0.05618490638304141, "grad_norm": 2.0491514704495826, "learning_rate": 5.617083946980855e-06, "loss": 0.3969, "step": 1907 }, { "epoch": 0.056214368840504986, "grad_norm": 2.10045679240871, "learning_rate": 5.620029455081002e-06, "loss": 0.5999, "step": 1908 }, { "epoch": 0.05624383129796856, "grad_norm": 1.8295824539158774, "learning_rate": 5.622974963181149e-06, "loss": 0.5555, "step": 1909 }, { "epoch": 0.05627329375543214, "grad_norm": 1.8633604093646294, "learning_rate": 5.625920471281296e-06, "loss": 0.5502, "step": 1910 }, { "epoch": 0.05630275621289572, "grad_norm": 1.8480126347623789, "learning_rate": 5.6288659793814435e-06, "loss": 0.5355, "step": 1911 }, { "epoch": 0.05633221867035929, "grad_norm": 1.868289784885085, "learning_rate": 5.631811487481591e-06, "loss": 0.6814, "step": 1912 }, { "epoch": 0.056361681127822874, "grad_norm": 1.8684354893197355, "learning_rate": 5.634756995581739e-06, "loss": 0.5677, "step": 1913 }, { "epoch": 0.05639114358528645, "grad_norm": 1.8433067664714975, "learning_rate": 5.637702503681886e-06, "loss": 0.4599, "step": 1914 }, { "epoch": 0.056420606042750024, "grad_norm": 1.9781501728116397, "learning_rate": 5.640648011782033e-06, "loss": 0.7142, "step": 1915 }, { "epoch": 0.056450068500213606, "grad_norm": 1.8421342167684038, "learning_rate": 5.64359351988218e-06, "loss": 0.6238, "step": 1916 }, { "epoch": 0.05647953095767718, "grad_norm": 1.8723239306074586, "learning_rate": 5.646539027982327e-06, "loss": 0.5009, "step": 1917 }, { "epoch": 0.056508993415140756, "grad_norm": 1.726961971477096, "learning_rate": 5.6494845360824744e-06, "loss": 0.4614, "step": 1918 }, { "epoch": 0.05653845587260434, "grad_norm": 1.9007523259889152, "learning_rate": 5.652430044182622e-06, "loss": 0.5589, "step": 1919 }, { "epoch": 0.05656791833006791, "grad_norm": 1.912604485967143, "learning_rate": 5.655375552282769e-06, "loss": 0.6293, "step": 1920 }, { "epoch": 0.05659738078753149, "grad_norm": 2.1496350794518997, "learning_rate": 5.658321060382917e-06, "loss": 0.6382, "step": 1921 }, { "epoch": 0.05662684324499506, "grad_norm": 1.7739501560622115, "learning_rate": 5.661266568483064e-06, "loss": 0.4041, "step": 1922 }, { "epoch": 0.056656305702458644, "grad_norm": 1.88442223541658, "learning_rate": 5.664212076583211e-06, "loss": 0.5415, "step": 1923 }, { "epoch": 0.05668576815992222, "grad_norm": 1.6727510389799802, "learning_rate": 5.667157584683358e-06, "loss": 0.5063, "step": 1924 }, { "epoch": 0.056715230617385794, "grad_norm": 2.063708697339161, "learning_rate": 5.670103092783505e-06, "loss": 0.5718, "step": 1925 }, { "epoch": 0.056744693074849376, "grad_norm": 1.8408632012168695, "learning_rate": 5.6730486008836526e-06, "loss": 0.6212, "step": 1926 }, { "epoch": 0.05677415553231295, "grad_norm": 1.6683611907921743, "learning_rate": 5.6759941089838e-06, "loss": 0.4224, "step": 1927 }, { "epoch": 0.056803617989776525, "grad_norm": 1.9641316185165167, "learning_rate": 5.678939617083947e-06, "loss": 0.6309, "step": 1928 }, { "epoch": 0.05683308044724011, "grad_norm": 1.8590069678145489, "learning_rate": 5.681885125184095e-06, "loss": 0.644, "step": 1929 }, { "epoch": 0.05686254290470368, "grad_norm": 1.8852958645853086, "learning_rate": 5.684830633284242e-06, "loss": 0.6049, "step": 1930 }, { "epoch": 0.05689200536216726, "grad_norm": 1.8932395036030603, "learning_rate": 5.687776141384389e-06, "loss": 0.5805, "step": 1931 }, { "epoch": 0.05692146781963084, "grad_norm": 1.754678972005208, "learning_rate": 5.690721649484536e-06, "loss": 0.5856, "step": 1932 }, { "epoch": 0.056950930277094414, "grad_norm": 2.036119306424485, "learning_rate": 5.6936671575846835e-06, "loss": 0.5358, "step": 1933 }, { "epoch": 0.05698039273455799, "grad_norm": 1.8666176024101742, "learning_rate": 5.696612665684831e-06, "loss": 0.6147, "step": 1934 }, { "epoch": 0.057009855192021563, "grad_norm": 2.046805822447233, "learning_rate": 5.699558173784978e-06, "loss": 0.5773, "step": 1935 }, { "epoch": 0.057039317649485145, "grad_norm": 1.868732162318134, "learning_rate": 5.702503681885125e-06, "loss": 0.5703, "step": 1936 }, { "epoch": 0.05706878010694872, "grad_norm": 1.7594211758927236, "learning_rate": 5.705449189985273e-06, "loss": 0.4939, "step": 1937 }, { "epoch": 0.057098242564412295, "grad_norm": 2.2180622137701933, "learning_rate": 5.70839469808542e-06, "loss": 0.5792, "step": 1938 }, { "epoch": 0.05712770502187588, "grad_norm": 1.8468118046186328, "learning_rate": 5.711340206185567e-06, "loss": 0.6059, "step": 1939 }, { "epoch": 0.05715716747933945, "grad_norm": 1.8969074923266316, "learning_rate": 5.7142857142857145e-06, "loss": 0.5808, "step": 1940 }, { "epoch": 0.05718662993680303, "grad_norm": 1.6715445713766561, "learning_rate": 5.717231222385862e-06, "loss": 0.5067, "step": 1941 }, { "epoch": 0.05721609239426661, "grad_norm": 2.0083017806284564, "learning_rate": 5.720176730486009e-06, "loss": 0.5903, "step": 1942 }, { "epoch": 0.057245554851730183, "grad_norm": 1.9532620852565223, "learning_rate": 5.723122238586156e-06, "loss": 0.5174, "step": 1943 }, { "epoch": 0.05727501730919376, "grad_norm": 1.7496358523635462, "learning_rate": 5.726067746686303e-06, "loss": 0.6896, "step": 1944 }, { "epoch": 0.05730447976665734, "grad_norm": 1.8774636573435415, "learning_rate": 5.729013254786451e-06, "loss": 0.6814, "step": 1945 }, { "epoch": 0.057333942224120915, "grad_norm": 1.8888080367055817, "learning_rate": 5.731958762886598e-06, "loss": 0.553, "step": 1946 }, { "epoch": 0.05736340468158449, "grad_norm": 2.190983287809453, "learning_rate": 5.7349042709867455e-06, "loss": 0.6075, "step": 1947 }, { "epoch": 0.057392867139048065, "grad_norm": 1.7138079520521252, "learning_rate": 5.737849779086893e-06, "loss": 0.3742, "step": 1948 }, { "epoch": 0.05742232959651165, "grad_norm": 1.796075027807234, "learning_rate": 5.74079528718704e-06, "loss": 0.4865, "step": 1949 }, { "epoch": 0.05745179205397522, "grad_norm": 2.081708744883543, "learning_rate": 5.743740795287187e-06, "loss": 0.7387, "step": 1950 }, { "epoch": 0.0574812545114388, "grad_norm": 1.7417009324304222, "learning_rate": 5.746686303387334e-06, "loss": 0.533, "step": 1951 }, { "epoch": 0.05751071696890238, "grad_norm": 1.6550077663324358, "learning_rate": 5.749631811487481e-06, "loss": 0.5549, "step": 1952 }, { "epoch": 0.05754017942636595, "grad_norm": 2.247714004579519, "learning_rate": 5.752577319587629e-06, "loss": 0.6185, "step": 1953 }, { "epoch": 0.05756964188382953, "grad_norm": 1.9746325228646109, "learning_rate": 5.7555228276877765e-06, "loss": 0.6171, "step": 1954 }, { "epoch": 0.05759910434129311, "grad_norm": 1.8056566313848725, "learning_rate": 5.758468335787924e-06, "loss": 0.6463, "step": 1955 }, { "epoch": 0.057628566798756685, "grad_norm": 2.0996300750089993, "learning_rate": 5.761413843888071e-06, "loss": 0.6562, "step": 1956 }, { "epoch": 0.05765802925622026, "grad_norm": 1.8663469646317816, "learning_rate": 5.764359351988218e-06, "loss": 0.6173, "step": 1957 }, { "epoch": 0.05768749171368384, "grad_norm": 2.2354813209619167, "learning_rate": 5.767304860088365e-06, "loss": 0.5051, "step": 1958 }, { "epoch": 0.05771695417114742, "grad_norm": 2.0270517299204918, "learning_rate": 5.770250368188512e-06, "loss": 0.5379, "step": 1959 }, { "epoch": 0.05774641662861099, "grad_norm": 2.248342361454714, "learning_rate": 5.7731958762886594e-06, "loss": 0.7058, "step": 1960 }, { "epoch": 0.057775879086074566, "grad_norm": 1.7252241771745767, "learning_rate": 5.776141384388807e-06, "loss": 0.5073, "step": 1961 }, { "epoch": 0.05780534154353815, "grad_norm": 1.780993969486548, "learning_rate": 5.779086892488955e-06, "loss": 0.531, "step": 1962 }, { "epoch": 0.05783480400100172, "grad_norm": 2.193030447908043, "learning_rate": 5.782032400589103e-06, "loss": 0.696, "step": 1963 }, { "epoch": 0.0578642664584653, "grad_norm": 1.6460779010483373, "learning_rate": 5.78497790868925e-06, "loss": 0.4177, "step": 1964 }, { "epoch": 0.05789372891592888, "grad_norm": 1.9531690238854247, "learning_rate": 5.787923416789397e-06, "loss": 0.4751, "step": 1965 }, { "epoch": 0.057923191373392455, "grad_norm": 1.770215892825399, "learning_rate": 5.790868924889544e-06, "loss": 0.527, "step": 1966 }, { "epoch": 0.05795265383085603, "grad_norm": 1.859394628265911, "learning_rate": 5.793814432989692e-06, "loss": 0.5415, "step": 1967 }, { "epoch": 0.05798211628831961, "grad_norm": 1.770262106284854, "learning_rate": 5.796759941089839e-06, "loss": 0.5844, "step": 1968 }, { "epoch": 0.058011578745783186, "grad_norm": 1.8971303342722383, "learning_rate": 5.799705449189986e-06, "loss": 0.6221, "step": 1969 }, { "epoch": 0.05804104120324676, "grad_norm": 2.047755353355592, "learning_rate": 5.802650957290134e-06, "loss": 0.664, "step": 1970 }, { "epoch": 0.05807050366071034, "grad_norm": 2.2391631323509387, "learning_rate": 5.805596465390281e-06, "loss": 0.5754, "step": 1971 }, { "epoch": 0.05809996611817392, "grad_norm": 1.893583754394986, "learning_rate": 5.808541973490428e-06, "loss": 0.6318, "step": 1972 }, { "epoch": 0.05812942857563749, "grad_norm": 1.7589522380529015, "learning_rate": 5.811487481590575e-06, "loss": 0.603, "step": 1973 }, { "epoch": 0.05815889103310107, "grad_norm": 1.934152260734167, "learning_rate": 5.814432989690722e-06, "loss": 0.7237, "step": 1974 }, { "epoch": 0.05818835349056465, "grad_norm": 1.9394051902853462, "learning_rate": 5.81737849779087e-06, "loss": 0.6851, "step": 1975 }, { "epoch": 0.058217815948028225, "grad_norm": 2.156415058444471, "learning_rate": 5.820324005891017e-06, "loss": 0.7532, "step": 1976 }, { "epoch": 0.0582472784054918, "grad_norm": 1.9036834238907927, "learning_rate": 5.8232695139911646e-06, "loss": 0.6317, "step": 1977 }, { "epoch": 0.05827674086295538, "grad_norm": 2.0856979414030685, "learning_rate": 5.826215022091312e-06, "loss": 0.6276, "step": 1978 }, { "epoch": 0.058306203320418956, "grad_norm": 1.9185049028934733, "learning_rate": 5.829160530191459e-06, "loss": 0.6629, "step": 1979 }, { "epoch": 0.05833566577788253, "grad_norm": 1.9856198444296322, "learning_rate": 5.832106038291606e-06, "loss": 0.6466, "step": 1980 }, { "epoch": 0.05836512823534611, "grad_norm": 1.8076348530653625, "learning_rate": 5.835051546391753e-06, "loss": 0.5487, "step": 1981 }, { "epoch": 0.05839459069280969, "grad_norm": 1.7493915734266716, "learning_rate": 5.8379970544919e-06, "loss": 0.563, "step": 1982 }, { "epoch": 0.05842405315027326, "grad_norm": 2.014189377504321, "learning_rate": 5.840942562592048e-06, "loss": 0.5566, "step": 1983 }, { "epoch": 0.058453515607736845, "grad_norm": 1.9439163436038325, "learning_rate": 5.8438880706921955e-06, "loss": 0.7085, "step": 1984 }, { "epoch": 0.05848297806520042, "grad_norm": 2.120289617511591, "learning_rate": 5.846833578792343e-06, "loss": 0.7246, "step": 1985 }, { "epoch": 0.058512440522663994, "grad_norm": 1.9020074023568658, "learning_rate": 5.84977908689249e-06, "loss": 0.7152, "step": 1986 }, { "epoch": 0.05854190298012757, "grad_norm": 1.8223925844455162, "learning_rate": 5.852724594992637e-06, "loss": 0.6632, "step": 1987 }, { "epoch": 0.05857136543759115, "grad_norm": 1.8460160618277524, "learning_rate": 5.855670103092784e-06, "loss": 0.7851, "step": 1988 }, { "epoch": 0.058600827895054726, "grad_norm": 1.7930622895857007, "learning_rate": 5.858615611192931e-06, "loss": 0.6342, "step": 1989 }, { "epoch": 0.0586302903525183, "grad_norm": 1.9794225697459398, "learning_rate": 5.8615611192930785e-06, "loss": 0.6205, "step": 1990 }, { "epoch": 0.05865975280998188, "grad_norm": 2.0446114123050205, "learning_rate": 5.864506627393226e-06, "loss": 0.7222, "step": 1991 }, { "epoch": 0.05868921526744546, "grad_norm": 1.837122087366503, "learning_rate": 5.867452135493374e-06, "loss": 0.603, "step": 1992 }, { "epoch": 0.05871867772490903, "grad_norm": 1.8787102375428235, "learning_rate": 5.870397643593521e-06, "loss": 0.5357, "step": 1993 }, { "epoch": 0.058748140182372614, "grad_norm": 1.9864856011147227, "learning_rate": 5.873343151693668e-06, "loss": 0.6715, "step": 1994 }, { "epoch": 0.05877760263983619, "grad_norm": 1.7774046467826705, "learning_rate": 5.876288659793815e-06, "loss": 0.5653, "step": 1995 }, { "epoch": 0.058807065097299764, "grad_norm": 2.2039275733852963, "learning_rate": 5.879234167893962e-06, "loss": 0.7784, "step": 1996 }, { "epoch": 0.058836527554763346, "grad_norm": 1.9413671492783715, "learning_rate": 5.8821796759941095e-06, "loss": 0.5872, "step": 1997 }, { "epoch": 0.05886599001222692, "grad_norm": 1.9153057249244507, "learning_rate": 5.885125184094257e-06, "loss": 0.5965, "step": 1998 }, { "epoch": 0.058895452469690496, "grad_norm": 1.8967273953867037, "learning_rate": 5.888070692194404e-06, "loss": 0.4317, "step": 1999 }, { "epoch": 0.05892491492715407, "grad_norm": 1.8132201983066727, "learning_rate": 5.891016200294552e-06, "loss": 0.5952, "step": 2000 }, { "epoch": 0.05895437738461765, "grad_norm": 1.8301711243335088, "learning_rate": 5.893961708394699e-06, "loss": 0.5958, "step": 2001 }, { "epoch": 0.05898383984208123, "grad_norm": 1.7838398274398921, "learning_rate": 5.896907216494846e-06, "loss": 0.4553, "step": 2002 }, { "epoch": 0.0590133022995448, "grad_norm": 1.9879788442309745, "learning_rate": 5.899852724594993e-06, "loss": 0.5278, "step": 2003 }, { "epoch": 0.059042764757008384, "grad_norm": 1.9422017309161947, "learning_rate": 5.9027982326951404e-06, "loss": 0.5344, "step": 2004 }, { "epoch": 0.05907222721447196, "grad_norm": 1.985550630946066, "learning_rate": 5.905743740795288e-06, "loss": 0.5134, "step": 2005 }, { "epoch": 0.059101689671935534, "grad_norm": 1.8124780106765759, "learning_rate": 5.908689248895435e-06, "loss": 0.4922, "step": 2006 }, { "epoch": 0.059131152129399116, "grad_norm": 2.133443685054474, "learning_rate": 5.911634756995582e-06, "loss": 0.5073, "step": 2007 }, { "epoch": 0.05916061458686269, "grad_norm": 1.8599619465790544, "learning_rate": 5.91458026509573e-06, "loss": 0.616, "step": 2008 }, { "epoch": 0.059190077044326266, "grad_norm": 1.9041002772111846, "learning_rate": 5.917525773195877e-06, "loss": 0.5079, "step": 2009 }, { "epoch": 0.05921953950178985, "grad_norm": 1.7892037630393132, "learning_rate": 5.920471281296024e-06, "loss": 0.5013, "step": 2010 }, { "epoch": 0.05924900195925342, "grad_norm": 1.7651589243803734, "learning_rate": 5.923416789396171e-06, "loss": 0.5759, "step": 2011 }, { "epoch": 0.059278464416717, "grad_norm": 1.904084813778536, "learning_rate": 5.926362297496319e-06, "loss": 0.6378, "step": 2012 }, { "epoch": 0.05930792687418057, "grad_norm": 2.0217474053448954, "learning_rate": 5.929307805596466e-06, "loss": 0.6389, "step": 2013 }, { "epoch": 0.059337389331644154, "grad_norm": 2.1294668041647378, "learning_rate": 5.932253313696613e-06, "loss": 0.5753, "step": 2014 }, { "epoch": 0.05936685178910773, "grad_norm": 1.9389761350201087, "learning_rate": 5.93519882179676e-06, "loss": 0.6347, "step": 2015 }, { "epoch": 0.059396314246571304, "grad_norm": 2.0037344326535407, "learning_rate": 5.938144329896908e-06, "loss": 0.6923, "step": 2016 }, { "epoch": 0.059425776704034886, "grad_norm": 2.086231298105764, "learning_rate": 5.941089837997055e-06, "loss": 0.5962, "step": 2017 }, { "epoch": 0.05945523916149846, "grad_norm": 2.04413549734548, "learning_rate": 5.944035346097202e-06, "loss": 0.7206, "step": 2018 }, { "epoch": 0.059484701618962035, "grad_norm": 1.9338271243198548, "learning_rate": 5.9469808541973496e-06, "loss": 0.6755, "step": 2019 }, { "epoch": 0.05951416407642562, "grad_norm": 1.7899586984324731, "learning_rate": 5.949926362297497e-06, "loss": 0.5711, "step": 2020 }, { "epoch": 0.05954362653388919, "grad_norm": 2.0133377356625806, "learning_rate": 5.952871870397644e-06, "loss": 0.7272, "step": 2021 }, { "epoch": 0.05957308899135277, "grad_norm": 1.8448030077700253, "learning_rate": 5.955817378497791e-06, "loss": 0.5163, "step": 2022 }, { "epoch": 0.05960255144881635, "grad_norm": 2.007596952132068, "learning_rate": 5.958762886597938e-06, "loss": 0.6279, "step": 2023 }, { "epoch": 0.059632013906279924, "grad_norm": 1.8181396992036511, "learning_rate": 5.961708394698086e-06, "loss": 0.6226, "step": 2024 }, { "epoch": 0.0596614763637435, "grad_norm": 1.577680454124183, "learning_rate": 5.964653902798233e-06, "loss": 0.4912, "step": 2025 }, { "epoch": 0.059690938821207074, "grad_norm": 1.721947864034179, "learning_rate": 5.9675994108983805e-06, "loss": 0.5562, "step": 2026 }, { "epoch": 0.059720401278670655, "grad_norm": 1.8053547450577803, "learning_rate": 5.970544918998528e-06, "loss": 0.5689, "step": 2027 }, { "epoch": 0.05974986373613423, "grad_norm": 1.8505943759739516, "learning_rate": 5.973490427098675e-06, "loss": 0.5632, "step": 2028 }, { "epoch": 0.059779326193597805, "grad_norm": 1.8351543619638682, "learning_rate": 5.976435935198822e-06, "loss": 0.6975, "step": 2029 }, { "epoch": 0.05980878865106139, "grad_norm": 2.0380797589515747, "learning_rate": 5.979381443298969e-06, "loss": 0.6599, "step": 2030 }, { "epoch": 0.05983825110852496, "grad_norm": 1.794883743625357, "learning_rate": 5.982326951399116e-06, "loss": 0.4329, "step": 2031 }, { "epoch": 0.05986771356598854, "grad_norm": 1.8696713710720019, "learning_rate": 5.985272459499264e-06, "loss": 0.4301, "step": 2032 }, { "epoch": 0.05989717602345212, "grad_norm": 1.8725867155337015, "learning_rate": 5.9882179675994115e-06, "loss": 0.4714, "step": 2033 }, { "epoch": 0.059926638480915694, "grad_norm": 1.7708593229198561, "learning_rate": 5.991163475699559e-06, "loss": 0.5275, "step": 2034 }, { "epoch": 0.05995610093837927, "grad_norm": 1.964945060111942, "learning_rate": 5.994108983799706e-06, "loss": 0.5459, "step": 2035 }, { "epoch": 0.05998556339584285, "grad_norm": 1.7573853643903374, "learning_rate": 5.997054491899853e-06, "loss": 0.5733, "step": 2036 }, { "epoch": 0.060015025853306425, "grad_norm": 1.7293963442626707, "learning_rate": 6e-06, "loss": 0.6041, "step": 2037 }, { "epoch": 0.06004448831077, "grad_norm": 1.881094090883829, "learning_rate": 6.002945508100147e-06, "loss": 0.596, "step": 2038 }, { "epoch": 0.060073950768233575, "grad_norm": 1.8328271947191417, "learning_rate": 6.0058910162002945e-06, "loss": 0.5741, "step": 2039 }, { "epoch": 0.06010341322569716, "grad_norm": 1.7174936600595994, "learning_rate": 6.0088365243004425e-06, "loss": 0.534, "step": 2040 }, { "epoch": 0.06013287568316073, "grad_norm": 1.713827822501352, "learning_rate": 6.01178203240059e-06, "loss": 0.5187, "step": 2041 }, { "epoch": 0.06016233814062431, "grad_norm": 1.8461132373200464, "learning_rate": 6.014727540500737e-06, "loss": 0.6297, "step": 2042 }, { "epoch": 0.06019180059808789, "grad_norm": 1.6826706463065935, "learning_rate": 6.017673048600884e-06, "loss": 0.4885, "step": 2043 }, { "epoch": 0.06022126305555146, "grad_norm": 1.806903941052195, "learning_rate": 6.020618556701031e-06, "loss": 0.617, "step": 2044 }, { "epoch": 0.06025072551301504, "grad_norm": 2.1207928762710138, "learning_rate": 6.023564064801178e-06, "loss": 0.4748, "step": 2045 }, { "epoch": 0.06028018797047862, "grad_norm": 1.8601972476975708, "learning_rate": 6.0265095729013254e-06, "loss": 0.4877, "step": 2046 }, { "epoch": 0.060309650427942195, "grad_norm": 1.8786918329797144, "learning_rate": 6.029455081001473e-06, "loss": 0.5991, "step": 2047 }, { "epoch": 0.06033911288540577, "grad_norm": 2.086151195047866, "learning_rate": 6.03240058910162e-06, "loss": 0.6414, "step": 2048 }, { "epoch": 0.06036857534286935, "grad_norm": 1.6725527181022493, "learning_rate": 6.035346097201768e-06, "loss": 0.4977, "step": 2049 }, { "epoch": 0.06039803780033293, "grad_norm": 1.9347732549446357, "learning_rate": 6.038291605301915e-06, "loss": 0.4965, "step": 2050 }, { "epoch": 0.0604275002577965, "grad_norm": 2.074748127549838, "learning_rate": 6.041237113402062e-06, "loss": 0.5753, "step": 2051 }, { "epoch": 0.06045696271526008, "grad_norm": 1.8711846003938037, "learning_rate": 6.044182621502209e-06, "loss": 0.5797, "step": 2052 }, { "epoch": 0.06048642517272366, "grad_norm": 1.8815310007777406, "learning_rate": 6.0471281296023564e-06, "loss": 0.5137, "step": 2053 }, { "epoch": 0.06051588763018723, "grad_norm": 1.774075793252007, "learning_rate": 6.050073637702504e-06, "loss": 0.6021, "step": 2054 }, { "epoch": 0.06054535008765081, "grad_norm": 2.0531868438048098, "learning_rate": 6.053019145802651e-06, "loss": 0.6084, "step": 2055 }, { "epoch": 0.06057481254511439, "grad_norm": 1.8450873136234112, "learning_rate": 6.055964653902798e-06, "loss": 0.5412, "step": 2056 }, { "epoch": 0.060604275002577965, "grad_norm": 2.1376022634337866, "learning_rate": 6.058910162002946e-06, "loss": 0.682, "step": 2057 }, { "epoch": 0.06063373746004154, "grad_norm": 1.8725296739966821, "learning_rate": 6.061855670103093e-06, "loss": 0.6206, "step": 2058 }, { "epoch": 0.06066319991750512, "grad_norm": 2.086467816827535, "learning_rate": 6.06480117820324e-06, "loss": 0.5777, "step": 2059 }, { "epoch": 0.060692662374968696, "grad_norm": 2.1959467225419114, "learning_rate": 6.067746686303387e-06, "loss": 0.8524, "step": 2060 }, { "epoch": 0.06072212483243227, "grad_norm": 1.7916613255398972, "learning_rate": 6.0706921944035346e-06, "loss": 0.4474, "step": 2061 }, { "epoch": 0.06075158728989585, "grad_norm": 1.7199892328147686, "learning_rate": 6.073637702503682e-06, "loss": 0.5457, "step": 2062 }, { "epoch": 0.06078104974735943, "grad_norm": 1.7119814899051942, "learning_rate": 6.076583210603829e-06, "loss": 0.4974, "step": 2063 }, { "epoch": 0.060810512204823, "grad_norm": 1.874179644181801, "learning_rate": 6.079528718703976e-06, "loss": 0.6766, "step": 2064 }, { "epoch": 0.060839974662286585, "grad_norm": 1.8327277353891753, "learning_rate": 6.082474226804124e-06, "loss": 0.4939, "step": 2065 }, { "epoch": 0.06086943711975016, "grad_norm": 1.8484952187543349, "learning_rate": 6.085419734904271e-06, "loss": 0.6078, "step": 2066 }, { "epoch": 0.060898899577213735, "grad_norm": 1.7987923630752116, "learning_rate": 6.088365243004418e-06, "loss": 0.5804, "step": 2067 }, { "epoch": 0.06092836203467731, "grad_norm": 1.9387029118352157, "learning_rate": 6.0913107511045655e-06, "loss": 0.5601, "step": 2068 }, { "epoch": 0.06095782449214089, "grad_norm": 1.8594669165927358, "learning_rate": 6.0942562592047135e-06, "loss": 0.6468, "step": 2069 }, { "epoch": 0.060987286949604466, "grad_norm": 1.9123955158115198, "learning_rate": 6.0972017673048616e-06, "loss": 0.6509, "step": 2070 }, { "epoch": 0.06101674940706804, "grad_norm": 2.1078064093143203, "learning_rate": 6.100147275405009e-06, "loss": 0.6556, "step": 2071 }, { "epoch": 0.06104621186453162, "grad_norm": 1.9393478007320428, "learning_rate": 6.103092783505156e-06, "loss": 0.6591, "step": 2072 }, { "epoch": 0.0610756743219952, "grad_norm": 1.6795627046867383, "learning_rate": 6.106038291605303e-06, "loss": 0.5284, "step": 2073 }, { "epoch": 0.06110513677945877, "grad_norm": 2.263760921189945, "learning_rate": 6.10898379970545e-06, "loss": 0.6193, "step": 2074 }, { "epoch": 0.061134599236922355, "grad_norm": 1.8094750624117781, "learning_rate": 6.111929307805597e-06, "loss": 0.5907, "step": 2075 }, { "epoch": 0.06116406169438593, "grad_norm": 1.9843470468777842, "learning_rate": 6.1148748159057445e-06, "loss": 0.6537, "step": 2076 }, { "epoch": 0.061193524151849504, "grad_norm": 1.6944987992131668, "learning_rate": 6.117820324005892e-06, "loss": 0.4186, "step": 2077 }, { "epoch": 0.061222986609313086, "grad_norm": 2.1716599566141612, "learning_rate": 6.120765832106039e-06, "loss": 0.6735, "step": 2078 }, { "epoch": 0.06125244906677666, "grad_norm": 2.093252855534154, "learning_rate": 6.123711340206187e-06, "loss": 0.6293, "step": 2079 }, { "epoch": 0.061281911524240236, "grad_norm": 1.901117379548539, "learning_rate": 6.126656848306334e-06, "loss": 0.6113, "step": 2080 }, { "epoch": 0.06131137398170381, "grad_norm": 2.361851381157599, "learning_rate": 6.129602356406481e-06, "loss": 0.7069, "step": 2081 }, { "epoch": 0.06134083643916739, "grad_norm": 1.848539738607469, "learning_rate": 6.132547864506628e-06, "loss": 0.458, "step": 2082 }, { "epoch": 0.06137029889663097, "grad_norm": 1.9518618762013673, "learning_rate": 6.1354933726067755e-06, "loss": 0.642, "step": 2083 }, { "epoch": 0.06139976135409454, "grad_norm": 1.9450574087900285, "learning_rate": 6.138438880706923e-06, "loss": 0.5865, "step": 2084 }, { "epoch": 0.061429223811558124, "grad_norm": 1.8186756091433658, "learning_rate": 6.14138438880707e-06, "loss": 0.5231, "step": 2085 }, { "epoch": 0.0614586862690217, "grad_norm": 1.8442221828467829, "learning_rate": 6.144329896907217e-06, "loss": 0.4643, "step": 2086 }, { "epoch": 0.061488148726485274, "grad_norm": 1.8259093600116163, "learning_rate": 6.147275405007365e-06, "loss": 0.474, "step": 2087 }, { "epoch": 0.061517611183948856, "grad_norm": 1.796497184805409, "learning_rate": 6.150220913107512e-06, "loss": 0.5067, "step": 2088 }, { "epoch": 0.06154707364141243, "grad_norm": 1.9583785770562514, "learning_rate": 6.153166421207659e-06, "loss": 0.8252, "step": 2089 }, { "epoch": 0.061576536098876006, "grad_norm": 1.8091195497098638, "learning_rate": 6.1561119293078065e-06, "loss": 0.6144, "step": 2090 }, { "epoch": 0.06160599855633959, "grad_norm": 1.8700555547709574, "learning_rate": 6.159057437407954e-06, "loss": 0.5789, "step": 2091 }, { "epoch": 0.06163546101380316, "grad_norm": 2.2948994738155575, "learning_rate": 6.162002945508101e-06, "loss": 0.5933, "step": 2092 }, { "epoch": 0.06166492347126674, "grad_norm": 1.7731724969105933, "learning_rate": 6.164948453608248e-06, "loss": 0.5332, "step": 2093 }, { "epoch": 0.06169438592873031, "grad_norm": 1.7723583691673415, "learning_rate": 6.167893961708395e-06, "loss": 0.4355, "step": 2094 }, { "epoch": 0.061723848386193894, "grad_norm": 2.244608569947548, "learning_rate": 6.170839469808543e-06, "loss": 0.8091, "step": 2095 }, { "epoch": 0.06175331084365747, "grad_norm": 1.9212335270880723, "learning_rate": 6.17378497790869e-06, "loss": 0.5628, "step": 2096 }, { "epoch": 0.061782773301121044, "grad_norm": 2.129937102301879, "learning_rate": 6.1767304860088374e-06, "loss": 0.3944, "step": 2097 }, { "epoch": 0.061812235758584626, "grad_norm": 1.6004773023622934, "learning_rate": 6.179675994108985e-06, "loss": 0.4964, "step": 2098 }, { "epoch": 0.0618416982160482, "grad_norm": 1.9469213619871355, "learning_rate": 6.182621502209132e-06, "loss": 0.5727, "step": 2099 }, { "epoch": 0.061871160673511776, "grad_norm": 1.9718359421970926, "learning_rate": 6.185567010309279e-06, "loss": 0.7447, "step": 2100 }, { "epoch": 0.06190062313097536, "grad_norm": 1.8898795110257531, "learning_rate": 6.188512518409426e-06, "loss": 0.5876, "step": 2101 }, { "epoch": 0.06193008558843893, "grad_norm": 1.7430044603716295, "learning_rate": 6.191458026509573e-06, "loss": 0.4722, "step": 2102 }, { "epoch": 0.06195954804590251, "grad_norm": 1.8277249803663997, "learning_rate": 6.194403534609721e-06, "loss": 0.6796, "step": 2103 }, { "epoch": 0.06198901050336609, "grad_norm": 1.9671505242493985, "learning_rate": 6.197349042709868e-06, "loss": 0.5886, "step": 2104 }, { "epoch": 0.062018472960829664, "grad_norm": 1.9257979305645598, "learning_rate": 6.200294550810016e-06, "loss": 0.5749, "step": 2105 }, { "epoch": 0.06204793541829324, "grad_norm": 2.410759900636683, "learning_rate": 6.203240058910163e-06, "loss": 0.6012, "step": 2106 }, { "epoch": 0.062077397875756814, "grad_norm": 1.853956255927783, "learning_rate": 6.20618556701031e-06, "loss": 0.6246, "step": 2107 }, { "epoch": 0.062106860333220396, "grad_norm": 1.9930159620206251, "learning_rate": 6.209131075110457e-06, "loss": 0.6377, "step": 2108 }, { "epoch": 0.06213632279068397, "grad_norm": 1.971948792253636, "learning_rate": 6.212076583210604e-06, "loss": 0.4982, "step": 2109 }, { "epoch": 0.062165785248147545, "grad_norm": 2.2830172880942734, "learning_rate": 6.215022091310751e-06, "loss": 0.6325, "step": 2110 }, { "epoch": 0.06219524770561113, "grad_norm": 2.1016996029644304, "learning_rate": 6.217967599410899e-06, "loss": 0.5626, "step": 2111 }, { "epoch": 0.0622247101630747, "grad_norm": 1.9298000594837976, "learning_rate": 6.2209131075110466e-06, "loss": 0.5917, "step": 2112 }, { "epoch": 0.06225417262053828, "grad_norm": 2.1316488134444675, "learning_rate": 6.223858615611194e-06, "loss": 0.5132, "step": 2113 }, { "epoch": 0.06228363507800186, "grad_norm": 2.0788632328599115, "learning_rate": 6.226804123711341e-06, "loss": 0.7001, "step": 2114 }, { "epoch": 0.062313097535465434, "grad_norm": 1.7776996778002792, "learning_rate": 6.229749631811488e-06, "loss": 0.5937, "step": 2115 }, { "epoch": 0.06234255999292901, "grad_norm": 1.7987533932401887, "learning_rate": 6.232695139911635e-06, "loss": 0.424, "step": 2116 }, { "epoch": 0.06237202245039259, "grad_norm": 1.8527858634390464, "learning_rate": 6.235640648011782e-06, "loss": 0.5022, "step": 2117 }, { "epoch": 0.062401484907856165, "grad_norm": 1.790143827702178, "learning_rate": 6.2385861561119295e-06, "loss": 0.5302, "step": 2118 }, { "epoch": 0.06243094736531974, "grad_norm": 2.299616334657876, "learning_rate": 6.2415316642120775e-06, "loss": 0.5451, "step": 2119 }, { "epoch": 0.062460409822783315, "grad_norm": 1.8538078483328115, "learning_rate": 6.244477172312225e-06, "loss": 0.6741, "step": 2120 }, { "epoch": 0.0624898722802469, "grad_norm": 2.1111093217701287, "learning_rate": 6.247422680412372e-06, "loss": 0.6545, "step": 2121 }, { "epoch": 0.06251933473771047, "grad_norm": 1.8375360165347532, "learning_rate": 6.250368188512519e-06, "loss": 0.4249, "step": 2122 }, { "epoch": 0.06254879719517405, "grad_norm": 2.164389557367307, "learning_rate": 6.253313696612666e-06, "loss": 0.4601, "step": 2123 }, { "epoch": 0.06257825965263762, "grad_norm": 1.8247556309548179, "learning_rate": 6.256259204712813e-06, "loss": 0.576, "step": 2124 }, { "epoch": 0.06260772211010121, "grad_norm": 1.6842150814678165, "learning_rate": 6.2592047128129605e-06, "loss": 0.5203, "step": 2125 }, { "epoch": 0.06263718456756479, "grad_norm": 1.7858794816827754, "learning_rate": 6.262150220913108e-06, "loss": 0.4398, "step": 2126 }, { "epoch": 0.06266664702502836, "grad_norm": 1.8588046461739163, "learning_rate": 6.265095729013256e-06, "loss": 0.6448, "step": 2127 }, { "epoch": 0.06269610948249194, "grad_norm": 1.74117515943002, "learning_rate": 6.268041237113403e-06, "loss": 0.5731, "step": 2128 }, { "epoch": 0.06272557193995551, "grad_norm": 2.3008295721610983, "learning_rate": 6.27098674521355e-06, "loss": 0.7017, "step": 2129 }, { "epoch": 0.06275503439741908, "grad_norm": 1.9698020541636572, "learning_rate": 6.273932253313697e-06, "loss": 0.6703, "step": 2130 }, { "epoch": 0.06278449685488266, "grad_norm": 1.8475215047752271, "learning_rate": 6.276877761413844e-06, "loss": 0.561, "step": 2131 }, { "epoch": 0.06281395931234625, "grad_norm": 1.8264505659240606, "learning_rate": 6.2798232695139915e-06, "loss": 0.4416, "step": 2132 }, { "epoch": 0.06284342176980982, "grad_norm": 2.161294994211468, "learning_rate": 6.282768777614139e-06, "loss": 0.6666, "step": 2133 }, { "epoch": 0.0628728842272734, "grad_norm": 1.8981581017164275, "learning_rate": 6.285714285714286e-06, "loss": 0.5903, "step": 2134 }, { "epoch": 0.06290234668473697, "grad_norm": 2.008415462768064, "learning_rate": 6.288659793814433e-06, "loss": 0.5809, "step": 2135 }, { "epoch": 0.06293180914220055, "grad_norm": 1.6609006600275107, "learning_rate": 6.291605301914581e-06, "loss": 0.5259, "step": 2136 }, { "epoch": 0.06296127159966412, "grad_norm": 1.6686333476599422, "learning_rate": 6.294550810014728e-06, "loss": 0.4538, "step": 2137 }, { "epoch": 0.06299073405712771, "grad_norm": 2.197784418069016, "learning_rate": 6.297496318114875e-06, "loss": 0.6124, "step": 2138 }, { "epoch": 0.06302019651459129, "grad_norm": 1.90154316706026, "learning_rate": 6.3004418262150224e-06, "loss": 0.4556, "step": 2139 }, { "epoch": 0.06304965897205486, "grad_norm": 1.8696394700220453, "learning_rate": 6.30338733431517e-06, "loss": 0.5726, "step": 2140 }, { "epoch": 0.06307912142951844, "grad_norm": 1.9068134916620951, "learning_rate": 6.306332842415317e-06, "loss": 0.538, "step": 2141 }, { "epoch": 0.06310858388698201, "grad_norm": 2.0670107744128057, "learning_rate": 6.309278350515464e-06, "loss": 0.6553, "step": 2142 }, { "epoch": 0.06313804634444559, "grad_norm": 1.7344491607668735, "learning_rate": 6.312223858615611e-06, "loss": 0.603, "step": 2143 }, { "epoch": 0.06316750880190916, "grad_norm": 1.9107779595573875, "learning_rate": 6.315169366715759e-06, "loss": 0.6277, "step": 2144 }, { "epoch": 0.06319697125937275, "grad_norm": 1.9898364350121864, "learning_rate": 6.318114874815906e-06, "loss": 0.7431, "step": 2145 }, { "epoch": 0.06322643371683632, "grad_norm": 2.025474225401361, "learning_rate": 6.321060382916053e-06, "loss": 0.6361, "step": 2146 }, { "epoch": 0.0632558961742999, "grad_norm": 1.8937326085299866, "learning_rate": 6.324005891016201e-06, "loss": 0.4468, "step": 2147 }, { "epoch": 0.06328535863176347, "grad_norm": 1.6431827297910195, "learning_rate": 6.326951399116348e-06, "loss": 0.4881, "step": 2148 }, { "epoch": 0.06331482108922705, "grad_norm": 1.82056782402502, "learning_rate": 6.329896907216495e-06, "loss": 0.5125, "step": 2149 }, { "epoch": 0.06334428354669062, "grad_norm": 1.9562856965496749, "learning_rate": 6.332842415316642e-06, "loss": 0.6424, "step": 2150 }, { "epoch": 0.06337374600415421, "grad_norm": 2.3703140352443772, "learning_rate": 6.335787923416789e-06, "loss": 0.416, "step": 2151 }, { "epoch": 0.06340320846161779, "grad_norm": 2.032218750289063, "learning_rate": 6.338733431516937e-06, "loss": 0.5405, "step": 2152 }, { "epoch": 0.06343267091908136, "grad_norm": 1.891181148007094, "learning_rate": 6.341678939617084e-06, "loss": 0.5798, "step": 2153 }, { "epoch": 0.06346213337654494, "grad_norm": 1.8924470651969982, "learning_rate": 6.3446244477172316e-06, "loss": 0.6007, "step": 2154 }, { "epoch": 0.06349159583400851, "grad_norm": 2.0225150566837793, "learning_rate": 6.347569955817379e-06, "loss": 0.6388, "step": 2155 }, { "epoch": 0.06352105829147209, "grad_norm": 2.1209761218001826, "learning_rate": 6.350515463917526e-06, "loss": 0.5494, "step": 2156 }, { "epoch": 0.06355052074893566, "grad_norm": 1.9038379686656683, "learning_rate": 6.353460972017673e-06, "loss": 0.4947, "step": 2157 }, { "epoch": 0.06357998320639925, "grad_norm": 1.7204183756266997, "learning_rate": 6.35640648011782e-06, "loss": 0.4975, "step": 2158 }, { "epoch": 0.06360944566386283, "grad_norm": 2.0697171947394053, "learning_rate": 6.359351988217967e-06, "loss": 0.7199, "step": 2159 }, { "epoch": 0.0636389081213264, "grad_norm": 1.8683152379181243, "learning_rate": 6.362297496318115e-06, "loss": 0.4973, "step": 2160 }, { "epoch": 0.06366837057878998, "grad_norm": 1.969503426630628, "learning_rate": 6.3652430044182625e-06, "loss": 0.5487, "step": 2161 }, { "epoch": 0.06369783303625355, "grad_norm": 1.942720832325882, "learning_rate": 6.36818851251841e-06, "loss": 0.5285, "step": 2162 }, { "epoch": 0.06372729549371713, "grad_norm": 1.8948276059019509, "learning_rate": 6.371134020618557e-06, "loss": 0.5328, "step": 2163 }, { "epoch": 0.06375675795118071, "grad_norm": 1.7164358173407934, "learning_rate": 6.374079528718704e-06, "loss": 0.5965, "step": 2164 }, { "epoch": 0.06378622040864429, "grad_norm": 1.8350107720550457, "learning_rate": 6.377025036818851e-06, "loss": 0.5138, "step": 2165 }, { "epoch": 0.06381568286610786, "grad_norm": 1.9832527368320152, "learning_rate": 6.379970544918998e-06, "loss": 0.6588, "step": 2166 }, { "epoch": 0.06384514532357144, "grad_norm": 1.926158287440875, "learning_rate": 6.3829160530191455e-06, "loss": 0.6006, "step": 2167 }, { "epoch": 0.06387460778103501, "grad_norm": 1.9229408980596336, "learning_rate": 6.3858615611192935e-06, "loss": 0.5756, "step": 2168 }, { "epoch": 0.06390407023849859, "grad_norm": 2.002000532074723, "learning_rate": 6.388807069219441e-06, "loss": 0.5864, "step": 2169 }, { "epoch": 0.06393353269596216, "grad_norm": 1.9490925223660949, "learning_rate": 6.391752577319588e-06, "loss": 0.5064, "step": 2170 }, { "epoch": 0.06396299515342575, "grad_norm": 1.7424609727911895, "learning_rate": 6.394698085419735e-06, "loss": 0.5511, "step": 2171 }, { "epoch": 0.06399245761088933, "grad_norm": 1.9336396891504448, "learning_rate": 6.397643593519882e-06, "loss": 0.5104, "step": 2172 }, { "epoch": 0.0640219200683529, "grad_norm": 2.0232891605924213, "learning_rate": 6.400589101620029e-06, "loss": 0.5624, "step": 2173 }, { "epoch": 0.06405138252581648, "grad_norm": 1.9487150894954337, "learning_rate": 6.4035346097201765e-06, "loss": 0.5262, "step": 2174 }, { "epoch": 0.06408084498328005, "grad_norm": 1.8766217792076254, "learning_rate": 6.406480117820325e-06, "loss": 0.5778, "step": 2175 }, { "epoch": 0.06411030744074363, "grad_norm": 1.6986648418720538, "learning_rate": 6.4094256259204725e-06, "loss": 0.4969, "step": 2176 }, { "epoch": 0.06413976989820722, "grad_norm": 2.0265453607351644, "learning_rate": 6.41237113402062e-06, "loss": 0.5647, "step": 2177 }, { "epoch": 0.06416923235567079, "grad_norm": 1.93118198704534, "learning_rate": 6.415316642120767e-06, "loss": 0.5322, "step": 2178 }, { "epoch": 0.06419869481313437, "grad_norm": 1.8247310401298953, "learning_rate": 6.418262150220914e-06, "loss": 0.4729, "step": 2179 }, { "epoch": 0.06422815727059794, "grad_norm": 1.9664992991691483, "learning_rate": 6.421207658321061e-06, "loss": 0.4667, "step": 2180 }, { "epoch": 0.06425761972806152, "grad_norm": 1.7094709070898282, "learning_rate": 6.424153166421208e-06, "loss": 0.5261, "step": 2181 }, { "epoch": 0.06428708218552509, "grad_norm": 1.799984005605101, "learning_rate": 6.427098674521356e-06, "loss": 0.5479, "step": 2182 }, { "epoch": 0.06431654464298867, "grad_norm": 2.1086543108847455, "learning_rate": 6.4300441826215035e-06, "loss": 0.5354, "step": 2183 }, { "epoch": 0.06434600710045225, "grad_norm": 1.7807580177684714, "learning_rate": 6.432989690721651e-06, "loss": 0.6019, "step": 2184 }, { "epoch": 0.06437546955791583, "grad_norm": 1.81311876255424, "learning_rate": 6.435935198821798e-06, "loss": 0.3438, "step": 2185 }, { "epoch": 0.0644049320153794, "grad_norm": 1.8466297072624778, "learning_rate": 6.438880706921945e-06, "loss": 0.5986, "step": 2186 }, { "epoch": 0.06443439447284298, "grad_norm": 1.8552578389300196, "learning_rate": 6.441826215022092e-06, "loss": 0.4598, "step": 2187 }, { "epoch": 0.06446385693030655, "grad_norm": 1.8413579387992507, "learning_rate": 6.444771723122239e-06, "loss": 0.4329, "step": 2188 }, { "epoch": 0.06449331938777013, "grad_norm": 1.7012186400903957, "learning_rate": 6.4477172312223864e-06, "loss": 0.5492, "step": 2189 }, { "epoch": 0.06452278184523372, "grad_norm": 1.9846510496697136, "learning_rate": 6.4506627393225344e-06, "loss": 0.7376, "step": 2190 }, { "epoch": 0.06455224430269729, "grad_norm": 2.1824036012394354, "learning_rate": 6.453608247422682e-06, "loss": 0.5467, "step": 2191 }, { "epoch": 0.06458170676016087, "grad_norm": 1.9035739277830217, "learning_rate": 6.456553755522829e-06, "loss": 0.6141, "step": 2192 }, { "epoch": 0.06461116921762444, "grad_norm": 1.8650849628532227, "learning_rate": 6.459499263622976e-06, "loss": 0.4252, "step": 2193 }, { "epoch": 0.06464063167508802, "grad_norm": 1.9226470780388152, "learning_rate": 6.462444771723123e-06, "loss": 0.4071, "step": 2194 }, { "epoch": 0.06467009413255159, "grad_norm": 1.6270123548907254, "learning_rate": 6.46539027982327e-06, "loss": 0.4856, "step": 2195 }, { "epoch": 0.06469955659001517, "grad_norm": 1.9087839855503106, "learning_rate": 6.468335787923417e-06, "loss": 0.7544, "step": 2196 }, { "epoch": 0.06472901904747876, "grad_norm": 1.6758398448831482, "learning_rate": 6.4712812960235646e-06, "loss": 0.4953, "step": 2197 }, { "epoch": 0.06475848150494233, "grad_norm": 1.7250273566188794, "learning_rate": 6.4742268041237126e-06, "loss": 0.5103, "step": 2198 }, { "epoch": 0.0647879439624059, "grad_norm": 1.7792239034428265, "learning_rate": 6.47717231222386e-06, "loss": 0.4538, "step": 2199 }, { "epoch": 0.06481740641986948, "grad_norm": 1.7267382477265938, "learning_rate": 6.480117820324007e-06, "loss": 0.5095, "step": 2200 }, { "epoch": 0.06484686887733306, "grad_norm": 1.7736572474690768, "learning_rate": 6.483063328424154e-06, "loss": 0.5105, "step": 2201 }, { "epoch": 0.06487633133479663, "grad_norm": 1.770255090431443, "learning_rate": 6.486008836524301e-06, "loss": 0.4524, "step": 2202 }, { "epoch": 0.06490579379226022, "grad_norm": 1.8344721482896453, "learning_rate": 6.488954344624448e-06, "loss": 0.5633, "step": 2203 }, { "epoch": 0.0649352562497238, "grad_norm": 1.9278579494879977, "learning_rate": 6.4918998527245955e-06, "loss": 0.6219, "step": 2204 }, { "epoch": 0.06496471870718737, "grad_norm": 1.7989584699030585, "learning_rate": 6.494845360824743e-06, "loss": 0.6368, "step": 2205 }, { "epoch": 0.06499418116465094, "grad_norm": 2.3378312655141182, "learning_rate": 6.497790868924891e-06, "loss": 0.7092, "step": 2206 }, { "epoch": 0.06502364362211452, "grad_norm": 1.8952597233473185, "learning_rate": 6.500736377025038e-06, "loss": 0.4447, "step": 2207 }, { "epoch": 0.0650531060795781, "grad_norm": 1.9693790792514614, "learning_rate": 6.503681885125185e-06, "loss": 0.5, "step": 2208 }, { "epoch": 0.06508256853704167, "grad_norm": 1.859074712126047, "learning_rate": 6.506627393225332e-06, "loss": 0.5525, "step": 2209 }, { "epoch": 0.06511203099450526, "grad_norm": 1.8812424765849514, "learning_rate": 6.509572901325479e-06, "loss": 0.715, "step": 2210 }, { "epoch": 0.06514149345196883, "grad_norm": 1.9537038083267229, "learning_rate": 6.5125184094256265e-06, "loss": 0.564, "step": 2211 }, { "epoch": 0.06517095590943241, "grad_norm": 1.8439297618999981, "learning_rate": 6.515463917525774e-06, "loss": 0.5139, "step": 2212 }, { "epoch": 0.06520041836689598, "grad_norm": 1.8172401112514323, "learning_rate": 6.518409425625921e-06, "loss": 0.6306, "step": 2213 }, { "epoch": 0.06522988082435956, "grad_norm": 2.0374821342048834, "learning_rate": 6.521354933726069e-06, "loss": 0.5164, "step": 2214 }, { "epoch": 0.06525934328182313, "grad_norm": 2.250932091157236, "learning_rate": 6.524300441826216e-06, "loss": 0.7235, "step": 2215 }, { "epoch": 0.06528880573928672, "grad_norm": 1.9479103009919239, "learning_rate": 6.527245949926363e-06, "loss": 0.5332, "step": 2216 }, { "epoch": 0.0653182681967503, "grad_norm": 2.1511228525653383, "learning_rate": 6.53019145802651e-06, "loss": 0.7032, "step": 2217 }, { "epoch": 0.06534773065421387, "grad_norm": 1.8680995226459993, "learning_rate": 6.5331369661266575e-06, "loss": 0.5281, "step": 2218 }, { "epoch": 0.06537719311167745, "grad_norm": 1.9154653271810034, "learning_rate": 6.536082474226805e-06, "loss": 0.5665, "step": 2219 }, { "epoch": 0.06540665556914102, "grad_norm": 2.1084870784085616, "learning_rate": 6.539027982326952e-06, "loss": 0.7513, "step": 2220 }, { "epoch": 0.0654361180266046, "grad_norm": 1.8132932814878577, "learning_rate": 6.541973490427099e-06, "loss": 0.4932, "step": 2221 }, { "epoch": 0.06546558048406817, "grad_norm": 1.83405033587441, "learning_rate": 6.544918998527246e-06, "loss": 0.5456, "step": 2222 }, { "epoch": 0.06549504294153176, "grad_norm": 2.0584548010266577, "learning_rate": 6.547864506627394e-06, "loss": 0.5772, "step": 2223 }, { "epoch": 0.06552450539899533, "grad_norm": 1.8740177065670645, "learning_rate": 6.550810014727541e-06, "loss": 0.6178, "step": 2224 }, { "epoch": 0.06555396785645891, "grad_norm": 1.6992688050093652, "learning_rate": 6.5537555228276885e-06, "loss": 0.4966, "step": 2225 }, { "epoch": 0.06558343031392248, "grad_norm": 2.002851483166752, "learning_rate": 6.556701030927836e-06, "loss": 0.5971, "step": 2226 }, { "epoch": 0.06561289277138606, "grad_norm": 2.182237853892157, "learning_rate": 6.559646539027983e-06, "loss": 0.7884, "step": 2227 }, { "epoch": 0.06564235522884963, "grad_norm": 1.886227581617955, "learning_rate": 6.56259204712813e-06, "loss": 0.5437, "step": 2228 }, { "epoch": 0.06567181768631322, "grad_norm": 1.7632138801028159, "learning_rate": 6.565537555228277e-06, "loss": 0.4364, "step": 2229 }, { "epoch": 0.0657012801437768, "grad_norm": 1.6331993447760313, "learning_rate": 6.568483063328424e-06, "loss": 0.4744, "step": 2230 }, { "epoch": 0.06573074260124037, "grad_norm": 1.8312228382627453, "learning_rate": 6.571428571428572e-06, "loss": 0.6378, "step": 2231 }, { "epoch": 0.06576020505870395, "grad_norm": 1.748788916385169, "learning_rate": 6.5743740795287194e-06, "loss": 0.4955, "step": 2232 }, { "epoch": 0.06578966751616752, "grad_norm": 1.7759999116139777, "learning_rate": 6.577319587628867e-06, "loss": 0.5029, "step": 2233 }, { "epoch": 0.0658191299736311, "grad_norm": 1.8868852554776114, "learning_rate": 6.580265095729014e-06, "loss": 0.7194, "step": 2234 }, { "epoch": 0.06584859243109467, "grad_norm": 1.8697236543454543, "learning_rate": 6.583210603829161e-06, "loss": 0.4815, "step": 2235 }, { "epoch": 0.06587805488855826, "grad_norm": 2.0802447445153045, "learning_rate": 6.586156111929308e-06, "loss": 0.7487, "step": 2236 }, { "epoch": 0.06590751734602183, "grad_norm": 1.6840949646042191, "learning_rate": 6.589101620029455e-06, "loss": 0.5428, "step": 2237 }, { "epoch": 0.06593697980348541, "grad_norm": 2.189963166690878, "learning_rate": 6.592047128129602e-06, "loss": 0.4871, "step": 2238 }, { "epoch": 0.06596644226094898, "grad_norm": 1.9556910784625723, "learning_rate": 6.59499263622975e-06, "loss": 0.5708, "step": 2239 }, { "epoch": 0.06599590471841256, "grad_norm": 1.7816749946045527, "learning_rate": 6.597938144329898e-06, "loss": 0.5685, "step": 2240 }, { "epoch": 0.06602536717587613, "grad_norm": 2.001363234124259, "learning_rate": 6.600883652430045e-06, "loss": 0.6443, "step": 2241 }, { "epoch": 0.06605482963333972, "grad_norm": 1.9578612673647913, "learning_rate": 6.603829160530192e-06, "loss": 0.6921, "step": 2242 }, { "epoch": 0.0660842920908033, "grad_norm": 2.0758921372762864, "learning_rate": 6.606774668630339e-06, "loss": 0.5627, "step": 2243 }, { "epoch": 0.06611375454826687, "grad_norm": 1.9000467173343334, "learning_rate": 6.609720176730486e-06, "loss": 0.6782, "step": 2244 }, { "epoch": 0.06614321700573045, "grad_norm": 1.8828693150730684, "learning_rate": 6.612665684830633e-06, "loss": 0.5634, "step": 2245 }, { "epoch": 0.06617267946319402, "grad_norm": 2.1377383111457804, "learning_rate": 6.6156111929307805e-06, "loss": 0.6798, "step": 2246 }, { "epoch": 0.0662021419206576, "grad_norm": 1.8596607484626515, "learning_rate": 6.6185567010309286e-06, "loss": 0.6969, "step": 2247 }, { "epoch": 0.06623160437812117, "grad_norm": 1.8827428698829505, "learning_rate": 6.621502209131076e-06, "loss": 0.6567, "step": 2248 }, { "epoch": 0.06626106683558476, "grad_norm": 1.7629757745985244, "learning_rate": 6.624447717231223e-06, "loss": 0.58, "step": 2249 }, { "epoch": 0.06629052929304834, "grad_norm": 2.0339830308658344, "learning_rate": 6.62739322533137e-06, "loss": 0.5837, "step": 2250 }, { "epoch": 0.06631999175051191, "grad_norm": 1.8721094292702285, "learning_rate": 6.630338733431517e-06, "loss": 0.6075, "step": 2251 }, { "epoch": 0.06634945420797549, "grad_norm": 1.880857012686477, "learning_rate": 6.633284241531664e-06, "loss": 0.5684, "step": 2252 }, { "epoch": 0.06637891666543906, "grad_norm": 1.8139928840894368, "learning_rate": 6.6362297496318115e-06, "loss": 0.5064, "step": 2253 }, { "epoch": 0.06640837912290264, "grad_norm": 1.8586200796510324, "learning_rate": 6.639175257731959e-06, "loss": 0.5763, "step": 2254 }, { "epoch": 0.06643784158036622, "grad_norm": 1.8326876797391385, "learning_rate": 6.642120765832107e-06, "loss": 0.566, "step": 2255 }, { "epoch": 0.0664673040378298, "grad_norm": 1.8045674802462455, "learning_rate": 6.645066273932254e-06, "loss": 0.5195, "step": 2256 }, { "epoch": 0.06649676649529337, "grad_norm": 1.9314206140158812, "learning_rate": 6.648011782032401e-06, "loss": 0.6089, "step": 2257 }, { "epoch": 0.06652622895275695, "grad_norm": 1.8785419836977078, "learning_rate": 6.650957290132548e-06, "loss": 0.6819, "step": 2258 }, { "epoch": 0.06655569141022052, "grad_norm": 1.881443562428105, "learning_rate": 6.653902798232695e-06, "loss": 0.5605, "step": 2259 }, { "epoch": 0.0665851538676841, "grad_norm": 2.116159305852569, "learning_rate": 6.6568483063328425e-06, "loss": 0.8454, "step": 2260 }, { "epoch": 0.06661461632514767, "grad_norm": 1.7954705504379314, "learning_rate": 6.65979381443299e-06, "loss": 0.4908, "step": 2261 }, { "epoch": 0.06664407878261126, "grad_norm": 1.7889384809768187, "learning_rate": 6.662739322533137e-06, "loss": 0.7155, "step": 2262 }, { "epoch": 0.06667354124007484, "grad_norm": 1.6405115359129778, "learning_rate": 6.665684830633285e-06, "loss": 0.4625, "step": 2263 }, { "epoch": 0.06670300369753841, "grad_norm": 1.9208443205766568, "learning_rate": 6.668630338733432e-06, "loss": 0.5367, "step": 2264 }, { "epoch": 0.06673246615500199, "grad_norm": 1.7361814611176296, "learning_rate": 6.671575846833579e-06, "loss": 0.6204, "step": 2265 }, { "epoch": 0.06676192861246556, "grad_norm": 1.926274589262977, "learning_rate": 6.674521354933726e-06, "loss": 0.4143, "step": 2266 }, { "epoch": 0.06679139106992914, "grad_norm": 2.03586561414494, "learning_rate": 6.6774668630338735e-06, "loss": 0.6124, "step": 2267 }, { "epoch": 0.06682085352739273, "grad_norm": 1.9235972966457646, "learning_rate": 6.680412371134021e-06, "loss": 0.6246, "step": 2268 }, { "epoch": 0.0668503159848563, "grad_norm": 1.8406317661331726, "learning_rate": 6.683357879234168e-06, "loss": 0.6397, "step": 2269 }, { "epoch": 0.06687977844231988, "grad_norm": 1.9774581283729051, "learning_rate": 6.686303387334315e-06, "loss": 0.5581, "step": 2270 }, { "epoch": 0.06690924089978345, "grad_norm": 1.940048456424758, "learning_rate": 6.689248895434463e-06, "loss": 0.6208, "step": 2271 }, { "epoch": 0.06693870335724703, "grad_norm": 2.0449607555250555, "learning_rate": 6.69219440353461e-06, "loss": 0.4883, "step": 2272 }, { "epoch": 0.0669681658147106, "grad_norm": 1.6315166012264593, "learning_rate": 6.695139911634757e-06, "loss": 0.3509, "step": 2273 }, { "epoch": 0.06699762827217418, "grad_norm": 1.7771219023216298, "learning_rate": 6.6980854197349044e-06, "loss": 0.6223, "step": 2274 }, { "epoch": 0.06702709072963776, "grad_norm": 1.7925667628744097, "learning_rate": 6.701030927835052e-06, "loss": 0.4624, "step": 2275 }, { "epoch": 0.06705655318710134, "grad_norm": 1.8268464158856799, "learning_rate": 6.703976435935199e-06, "loss": 0.4862, "step": 2276 }, { "epoch": 0.06708601564456491, "grad_norm": 1.919112036164492, "learning_rate": 6.706921944035346e-06, "loss": 0.5325, "step": 2277 }, { "epoch": 0.06711547810202849, "grad_norm": 1.7300963394345958, "learning_rate": 6.709867452135493e-06, "loss": 0.4471, "step": 2278 }, { "epoch": 0.06714494055949206, "grad_norm": 1.6813354257121427, "learning_rate": 6.71281296023564e-06, "loss": 0.4777, "step": 2279 }, { "epoch": 0.06717440301695564, "grad_norm": 1.8844157862749296, "learning_rate": 6.715758468335788e-06, "loss": 0.5117, "step": 2280 }, { "epoch": 0.06720386547441923, "grad_norm": 1.786567235907356, "learning_rate": 6.718703976435935e-06, "loss": 0.5488, "step": 2281 }, { "epoch": 0.0672333279318828, "grad_norm": 2.0534624448182015, "learning_rate": 6.7216494845360834e-06, "loss": 0.5523, "step": 2282 }, { "epoch": 0.06726279038934638, "grad_norm": 1.8687928662278477, "learning_rate": 6.724594992636231e-06, "loss": 0.5327, "step": 2283 }, { "epoch": 0.06729225284680995, "grad_norm": 2.0386584488397714, "learning_rate": 6.727540500736378e-06, "loss": 0.6028, "step": 2284 }, { "epoch": 0.06732171530427353, "grad_norm": 1.9185424416383772, "learning_rate": 6.730486008836526e-06, "loss": 0.6326, "step": 2285 }, { "epoch": 0.0673511777617371, "grad_norm": 1.869984982796073, "learning_rate": 6.733431516936673e-06, "loss": 0.5789, "step": 2286 }, { "epoch": 0.06738064021920068, "grad_norm": 1.7947262608647399, "learning_rate": 6.73637702503682e-06, "loss": 0.4695, "step": 2287 }, { "epoch": 0.06741010267666427, "grad_norm": 1.853135527584648, "learning_rate": 6.739322533136967e-06, "loss": 0.6436, "step": 2288 }, { "epoch": 0.06743956513412784, "grad_norm": 1.700271918288249, "learning_rate": 6.742268041237114e-06, "loss": 0.5413, "step": 2289 }, { "epoch": 0.06746902759159142, "grad_norm": 1.8979905751663668, "learning_rate": 6.7452135493372616e-06, "loss": 0.5121, "step": 2290 }, { "epoch": 0.06749849004905499, "grad_norm": 1.9487226250623895, "learning_rate": 6.748159057437409e-06, "loss": 0.6241, "step": 2291 }, { "epoch": 0.06752795250651857, "grad_norm": 1.9438670610940931, "learning_rate": 6.751104565537556e-06, "loss": 0.6864, "step": 2292 }, { "epoch": 0.06755741496398214, "grad_norm": 1.8588705286771328, "learning_rate": 6.754050073637704e-06, "loss": 0.4491, "step": 2293 }, { "epoch": 0.06758687742144573, "grad_norm": 1.7395033676213547, "learning_rate": 6.756995581737851e-06, "loss": 0.4413, "step": 2294 }, { "epoch": 0.0676163398789093, "grad_norm": 1.9315838303350428, "learning_rate": 6.759941089837998e-06, "loss": 0.6356, "step": 2295 }, { "epoch": 0.06764580233637288, "grad_norm": 1.7360617375437053, "learning_rate": 6.762886597938145e-06, "loss": 0.645, "step": 2296 }, { "epoch": 0.06767526479383645, "grad_norm": 1.9307680203700475, "learning_rate": 6.7658321060382925e-06, "loss": 0.6835, "step": 2297 }, { "epoch": 0.06770472725130003, "grad_norm": 2.0241744260151204, "learning_rate": 6.76877761413844e-06, "loss": 0.5918, "step": 2298 }, { "epoch": 0.0677341897087636, "grad_norm": 1.8375213229404759, "learning_rate": 6.771723122238587e-06, "loss": 0.4221, "step": 2299 }, { "epoch": 0.06776365216622718, "grad_norm": 1.9323569475089837, "learning_rate": 6.774668630338734e-06, "loss": 0.7333, "step": 2300 }, { "epoch": 0.06779311462369077, "grad_norm": 1.821329655923569, "learning_rate": 6.777614138438882e-06, "loss": 0.5514, "step": 2301 }, { "epoch": 0.06782257708115434, "grad_norm": 2.4200912136633783, "learning_rate": 6.780559646539029e-06, "loss": 0.3684, "step": 2302 }, { "epoch": 0.06785203953861792, "grad_norm": 2.0417766338976877, "learning_rate": 6.783505154639176e-06, "loss": 0.4117, "step": 2303 }, { "epoch": 0.06788150199608149, "grad_norm": 1.704548373284366, "learning_rate": 6.7864506627393235e-06, "loss": 0.4238, "step": 2304 }, { "epoch": 0.06791096445354507, "grad_norm": 1.9750374088157454, "learning_rate": 6.789396170839471e-06, "loss": 0.5303, "step": 2305 }, { "epoch": 0.06794042691100864, "grad_norm": 1.9374942550160565, "learning_rate": 6.792341678939618e-06, "loss": 0.5377, "step": 2306 }, { "epoch": 0.06796988936847223, "grad_norm": 1.8024954177426493, "learning_rate": 6.795287187039765e-06, "loss": 0.5139, "step": 2307 }, { "epoch": 0.0679993518259358, "grad_norm": 2.0446996401612347, "learning_rate": 6.798232695139912e-06, "loss": 0.5801, "step": 2308 }, { "epoch": 0.06802881428339938, "grad_norm": 1.7163115233896347, "learning_rate": 6.801178203240059e-06, "loss": 0.6266, "step": 2309 }, { "epoch": 0.06805827674086296, "grad_norm": 1.572486780756826, "learning_rate": 6.804123711340207e-06, "loss": 0.3984, "step": 2310 }, { "epoch": 0.06808773919832653, "grad_norm": 2.0675326995609242, "learning_rate": 6.8070692194403545e-06, "loss": 0.6711, "step": 2311 }, { "epoch": 0.0681172016557901, "grad_norm": 1.882235490222097, "learning_rate": 6.810014727540502e-06, "loss": 0.6892, "step": 2312 }, { "epoch": 0.06814666411325368, "grad_norm": 1.6847301940844281, "learning_rate": 6.812960235640649e-06, "loss": 0.5583, "step": 2313 }, { "epoch": 0.06817612657071727, "grad_norm": 1.8085675651092499, "learning_rate": 6.815905743740796e-06, "loss": 0.7332, "step": 2314 }, { "epoch": 0.06820558902818084, "grad_norm": 1.9218080226704992, "learning_rate": 6.818851251840943e-06, "loss": 0.4936, "step": 2315 }, { "epoch": 0.06823505148564442, "grad_norm": 1.9041324784396054, "learning_rate": 6.82179675994109e-06, "loss": 0.4636, "step": 2316 }, { "epoch": 0.068264513943108, "grad_norm": 1.7540932757387369, "learning_rate": 6.8247422680412375e-06, "loss": 0.6724, "step": 2317 }, { "epoch": 0.06829397640057157, "grad_norm": 1.9598227271120183, "learning_rate": 6.8276877761413855e-06, "loss": 0.5993, "step": 2318 }, { "epoch": 0.06832343885803514, "grad_norm": 1.74239113117908, "learning_rate": 6.830633284241533e-06, "loss": 0.619, "step": 2319 }, { "epoch": 0.06835290131549873, "grad_norm": 1.7362273310549579, "learning_rate": 6.83357879234168e-06, "loss": 0.4981, "step": 2320 }, { "epoch": 0.0683823637729623, "grad_norm": 2.0746645759392317, "learning_rate": 6.836524300441827e-06, "loss": 0.4828, "step": 2321 }, { "epoch": 0.06841182623042588, "grad_norm": 1.770308005595416, "learning_rate": 6.839469808541974e-06, "loss": 0.6364, "step": 2322 }, { "epoch": 0.06844128868788946, "grad_norm": 2.1434196802661396, "learning_rate": 6.842415316642121e-06, "loss": 0.6564, "step": 2323 }, { "epoch": 0.06847075114535303, "grad_norm": 1.881421151830695, "learning_rate": 6.8453608247422684e-06, "loss": 0.5517, "step": 2324 }, { "epoch": 0.0685002136028166, "grad_norm": 1.7893718867678272, "learning_rate": 6.848306332842416e-06, "loss": 0.4661, "step": 2325 }, { "epoch": 0.06852967606028018, "grad_norm": 1.8434077845103427, "learning_rate": 6.851251840942564e-06, "loss": 0.5122, "step": 2326 }, { "epoch": 0.06855913851774377, "grad_norm": 1.8259396282993092, "learning_rate": 6.854197349042711e-06, "loss": 0.6251, "step": 2327 }, { "epoch": 0.06858860097520735, "grad_norm": 2.0959701100307915, "learning_rate": 6.857142857142858e-06, "loss": 0.4752, "step": 2328 }, { "epoch": 0.06861806343267092, "grad_norm": 1.955826825327499, "learning_rate": 6.860088365243005e-06, "loss": 0.5254, "step": 2329 }, { "epoch": 0.0686475258901345, "grad_norm": 1.885014550631103, "learning_rate": 6.863033873343152e-06, "loss": 0.593, "step": 2330 }, { "epoch": 0.06867698834759807, "grad_norm": 1.8748548323471237, "learning_rate": 6.865979381443299e-06, "loss": 0.6308, "step": 2331 }, { "epoch": 0.06870645080506164, "grad_norm": 2.1301174645249628, "learning_rate": 6.8689248895434466e-06, "loss": 0.7, "step": 2332 }, { "epoch": 0.06873591326252523, "grad_norm": 1.9897271586110639, "learning_rate": 6.871870397643594e-06, "loss": 0.652, "step": 2333 }, { "epoch": 0.06876537571998881, "grad_norm": 1.8070542632255209, "learning_rate": 6.874815905743742e-06, "loss": 0.4346, "step": 2334 }, { "epoch": 0.06879483817745238, "grad_norm": 1.7396445158789449, "learning_rate": 6.877761413843889e-06, "loss": 0.6878, "step": 2335 }, { "epoch": 0.06882430063491596, "grad_norm": 2.0491837781946622, "learning_rate": 6.880706921944036e-06, "loss": 0.633, "step": 2336 }, { "epoch": 0.06885376309237953, "grad_norm": 1.7583432037773545, "learning_rate": 6.883652430044183e-06, "loss": 0.4156, "step": 2337 }, { "epoch": 0.06888322554984311, "grad_norm": 1.9289451994326405, "learning_rate": 6.88659793814433e-06, "loss": 0.5782, "step": 2338 }, { "epoch": 0.06891268800730668, "grad_norm": 1.9949516648358365, "learning_rate": 6.8895434462444775e-06, "loss": 0.702, "step": 2339 }, { "epoch": 0.06894215046477027, "grad_norm": 1.821814134451824, "learning_rate": 6.892488954344625e-06, "loss": 0.4447, "step": 2340 }, { "epoch": 0.06897161292223385, "grad_norm": 1.961513712592418, "learning_rate": 6.895434462444772e-06, "loss": 0.7, "step": 2341 }, { "epoch": 0.06900107537969742, "grad_norm": 1.956656784083917, "learning_rate": 6.89837997054492e-06, "loss": 0.6216, "step": 2342 }, { "epoch": 0.069030537837161, "grad_norm": 1.7701605927241553, "learning_rate": 6.901325478645067e-06, "loss": 0.5957, "step": 2343 }, { "epoch": 0.06906000029462457, "grad_norm": 1.858323524727115, "learning_rate": 6.904270986745214e-06, "loss": 0.573, "step": 2344 }, { "epoch": 0.06908946275208815, "grad_norm": 1.9773795913674541, "learning_rate": 6.907216494845361e-06, "loss": 0.6272, "step": 2345 }, { "epoch": 0.06911892520955173, "grad_norm": 1.9485146088543412, "learning_rate": 6.9101620029455085e-06, "loss": 0.653, "step": 2346 }, { "epoch": 0.06914838766701531, "grad_norm": 2.224840413686575, "learning_rate": 6.913107511045656e-06, "loss": 0.7428, "step": 2347 }, { "epoch": 0.06917785012447888, "grad_norm": 1.8053050228377814, "learning_rate": 6.916053019145803e-06, "loss": 0.4947, "step": 2348 }, { "epoch": 0.06920731258194246, "grad_norm": 1.7693538464492817, "learning_rate": 6.91899852724595e-06, "loss": 0.567, "step": 2349 }, { "epoch": 0.06923677503940603, "grad_norm": 1.9098232499899142, "learning_rate": 6.921944035346098e-06, "loss": 0.5688, "step": 2350 }, { "epoch": 0.06926623749686961, "grad_norm": 1.8808211609224543, "learning_rate": 6.924889543446245e-06, "loss": 0.4127, "step": 2351 }, { "epoch": 0.06929569995433318, "grad_norm": 2.1841452343357863, "learning_rate": 6.927835051546392e-06, "loss": 0.5524, "step": 2352 }, { "epoch": 0.06932516241179677, "grad_norm": 1.6699455162922492, "learning_rate": 6.9307805596465395e-06, "loss": 0.5295, "step": 2353 }, { "epoch": 0.06935462486926035, "grad_norm": 1.9241637496058255, "learning_rate": 6.933726067746687e-06, "loss": 0.5418, "step": 2354 }, { "epoch": 0.06938408732672392, "grad_norm": 1.8077119329083131, "learning_rate": 6.936671575846834e-06, "loss": 0.5812, "step": 2355 }, { "epoch": 0.0694135497841875, "grad_norm": 1.8498688098946001, "learning_rate": 6.939617083946981e-06, "loss": 0.5427, "step": 2356 }, { "epoch": 0.06944301224165107, "grad_norm": 1.8875977787986615, "learning_rate": 6.942562592047128e-06, "loss": 0.6809, "step": 2357 }, { "epoch": 0.06947247469911465, "grad_norm": 2.0312622573081636, "learning_rate": 6.945508100147276e-06, "loss": 0.6558, "step": 2358 }, { "epoch": 0.06950193715657824, "grad_norm": 1.8838888092809598, "learning_rate": 6.948453608247423e-06, "loss": 0.6042, "step": 2359 }, { "epoch": 0.06953139961404181, "grad_norm": 1.7508225411659168, "learning_rate": 6.9513991163475705e-06, "loss": 0.525, "step": 2360 }, { "epoch": 0.06956086207150539, "grad_norm": 2.0230493490618975, "learning_rate": 6.954344624447718e-06, "loss": 0.654, "step": 2361 }, { "epoch": 0.06959032452896896, "grad_norm": 1.9064141396339795, "learning_rate": 6.957290132547865e-06, "loss": 0.6872, "step": 2362 }, { "epoch": 0.06961978698643254, "grad_norm": 1.9262041744456384, "learning_rate": 6.960235640648012e-06, "loss": 0.6636, "step": 2363 }, { "epoch": 0.06964924944389611, "grad_norm": 1.8857684900977514, "learning_rate": 6.963181148748159e-06, "loss": 0.5093, "step": 2364 }, { "epoch": 0.06967871190135969, "grad_norm": 1.9476598535585354, "learning_rate": 6.966126656848306e-06, "loss": 0.5208, "step": 2365 }, { "epoch": 0.06970817435882327, "grad_norm": 1.8627834378648418, "learning_rate": 6.9690721649484534e-06, "loss": 0.6634, "step": 2366 }, { "epoch": 0.06973763681628685, "grad_norm": 1.6600911870660588, "learning_rate": 6.9720176730486014e-06, "loss": 0.3984, "step": 2367 }, { "epoch": 0.06976709927375042, "grad_norm": 1.8617588114425059, "learning_rate": 6.974963181148749e-06, "loss": 0.5608, "step": 2368 }, { "epoch": 0.069796561731214, "grad_norm": 2.1141658485776658, "learning_rate": 6.977908689248896e-06, "loss": 0.5645, "step": 2369 }, { "epoch": 0.06982602418867757, "grad_norm": 1.87910336918841, "learning_rate": 6.980854197349043e-06, "loss": 0.5749, "step": 2370 }, { "epoch": 0.06985548664614115, "grad_norm": 1.8381973590224712, "learning_rate": 6.98379970544919e-06, "loss": 0.5209, "step": 2371 }, { "epoch": 0.06988494910360474, "grad_norm": 1.8855813213498465, "learning_rate": 6.986745213549337e-06, "loss": 0.6428, "step": 2372 }, { "epoch": 0.06991441156106831, "grad_norm": 1.9398943312480657, "learning_rate": 6.989690721649484e-06, "loss": 0.6145, "step": 2373 }, { "epoch": 0.06994387401853189, "grad_norm": 1.6753932843400996, "learning_rate": 6.9926362297496316e-06, "loss": 0.4406, "step": 2374 }, { "epoch": 0.06997333647599546, "grad_norm": 2.0116803726924606, "learning_rate": 6.99558173784978e-06, "loss": 0.6016, "step": 2375 }, { "epoch": 0.07000279893345904, "grad_norm": 1.8920145572356688, "learning_rate": 6.998527245949927e-06, "loss": 0.5572, "step": 2376 }, { "epoch": 0.07003226139092261, "grad_norm": 1.8428584765105174, "learning_rate": 7.001472754050074e-06, "loss": 0.4222, "step": 2377 }, { "epoch": 0.07006172384838619, "grad_norm": 2.2304599582329896, "learning_rate": 7.004418262150221e-06, "loss": 0.6134, "step": 2378 }, { "epoch": 0.07009118630584978, "grad_norm": 1.8022350157473737, "learning_rate": 7.007363770250368e-06, "loss": 0.5664, "step": 2379 }, { "epoch": 0.07012064876331335, "grad_norm": 2.0022737334581078, "learning_rate": 7.010309278350515e-06, "loss": 0.5963, "step": 2380 }, { "epoch": 0.07015011122077693, "grad_norm": 1.9057759506495011, "learning_rate": 7.0132547864506625e-06, "loss": 0.6448, "step": 2381 }, { "epoch": 0.0701795736782405, "grad_norm": 1.8447332141483646, "learning_rate": 7.01620029455081e-06, "loss": 0.6794, "step": 2382 }, { "epoch": 0.07020903613570408, "grad_norm": 1.854254584269738, "learning_rate": 7.019145802650958e-06, "loss": 0.4417, "step": 2383 }, { "epoch": 0.07023849859316765, "grad_norm": 1.8387214924791735, "learning_rate": 7.022091310751105e-06, "loss": 0.555, "step": 2384 }, { "epoch": 0.07026796105063124, "grad_norm": 2.0699395658278203, "learning_rate": 7.025036818851252e-06, "loss": 0.7053, "step": 2385 }, { "epoch": 0.07029742350809481, "grad_norm": 1.699067281726591, "learning_rate": 7.027982326951399e-06, "loss": 0.5855, "step": 2386 }, { "epoch": 0.07032688596555839, "grad_norm": 1.9584093195400447, "learning_rate": 7.030927835051546e-06, "loss": 0.5884, "step": 2387 }, { "epoch": 0.07035634842302196, "grad_norm": 1.7196467085602238, "learning_rate": 7.033873343151694e-06, "loss": 0.4719, "step": 2388 }, { "epoch": 0.07038581088048554, "grad_norm": 1.8274559392623315, "learning_rate": 7.036818851251842e-06, "loss": 0.5684, "step": 2389 }, { "epoch": 0.07041527333794911, "grad_norm": 1.7543839880233458, "learning_rate": 7.0397643593519895e-06, "loss": 0.3441, "step": 2390 }, { "epoch": 0.07044473579541269, "grad_norm": 1.8411868108523908, "learning_rate": 7.042709867452137e-06, "loss": 0.5793, "step": 2391 }, { "epoch": 0.07047419825287628, "grad_norm": 1.9441445265484674, "learning_rate": 7.045655375552284e-06, "loss": 0.6678, "step": 2392 }, { "epoch": 0.07050366071033985, "grad_norm": 1.8402060939178537, "learning_rate": 7.048600883652431e-06, "loss": 0.5604, "step": 2393 }, { "epoch": 0.07053312316780343, "grad_norm": 1.724318403108308, "learning_rate": 7.051546391752578e-06, "loss": 0.4454, "step": 2394 }, { "epoch": 0.070562585625267, "grad_norm": 1.9923432554565372, "learning_rate": 7.054491899852725e-06, "loss": 0.4893, "step": 2395 }, { "epoch": 0.07059204808273058, "grad_norm": 1.9268933760044415, "learning_rate": 7.0574374079528725e-06, "loss": 0.4087, "step": 2396 }, { "epoch": 0.07062151054019415, "grad_norm": 1.7973206947190015, "learning_rate": 7.0603829160530205e-06, "loss": 0.544, "step": 2397 }, { "epoch": 0.07065097299765774, "grad_norm": 2.3467474313752708, "learning_rate": 7.063328424153168e-06, "loss": 0.8513, "step": 2398 }, { "epoch": 0.07068043545512132, "grad_norm": 1.917514511467544, "learning_rate": 7.066273932253315e-06, "loss": 0.3931, "step": 2399 }, { "epoch": 0.07070989791258489, "grad_norm": 1.7584047555508964, "learning_rate": 7.069219440353462e-06, "loss": 0.5461, "step": 2400 }, { "epoch": 0.07073936037004847, "grad_norm": 1.7312467479170772, "learning_rate": 7.072164948453609e-06, "loss": 0.4378, "step": 2401 }, { "epoch": 0.07076882282751204, "grad_norm": 2.1390640592341286, "learning_rate": 7.075110456553756e-06, "loss": 0.6354, "step": 2402 }, { "epoch": 0.07079828528497562, "grad_norm": 1.8852793368322345, "learning_rate": 7.0780559646539035e-06, "loss": 0.5413, "step": 2403 }, { "epoch": 0.07082774774243919, "grad_norm": 2.2001156056239624, "learning_rate": 7.081001472754051e-06, "loss": 0.6985, "step": 2404 }, { "epoch": 0.07085721019990278, "grad_norm": 1.7787210446938317, "learning_rate": 7.083946980854199e-06, "loss": 0.4676, "step": 2405 }, { "epoch": 0.07088667265736635, "grad_norm": 2.272993909692736, "learning_rate": 7.086892488954346e-06, "loss": 0.6003, "step": 2406 }, { "epoch": 0.07091613511482993, "grad_norm": 1.8328227703788975, "learning_rate": 7.089837997054493e-06, "loss": 0.5882, "step": 2407 }, { "epoch": 0.0709455975722935, "grad_norm": 1.806182276244945, "learning_rate": 7.09278350515464e-06, "loss": 0.5918, "step": 2408 }, { "epoch": 0.07097506002975708, "grad_norm": 1.8452285906445143, "learning_rate": 7.095729013254787e-06, "loss": 0.5469, "step": 2409 }, { "epoch": 0.07100452248722065, "grad_norm": 1.8386367740106564, "learning_rate": 7.0986745213549345e-06, "loss": 0.4484, "step": 2410 }, { "epoch": 0.07103398494468424, "grad_norm": 2.1019197242131, "learning_rate": 7.101620029455082e-06, "loss": 0.5627, "step": 2411 }, { "epoch": 0.07106344740214782, "grad_norm": 1.999663833710946, "learning_rate": 7.104565537555229e-06, "loss": 0.6605, "step": 2412 }, { "epoch": 0.07109290985961139, "grad_norm": 1.8835567661825336, "learning_rate": 7.107511045655377e-06, "loss": 0.6679, "step": 2413 }, { "epoch": 0.07112237231707497, "grad_norm": 1.7434934293323998, "learning_rate": 7.110456553755524e-06, "loss": 0.5329, "step": 2414 }, { "epoch": 0.07115183477453854, "grad_norm": 1.9267340175679148, "learning_rate": 7.113402061855671e-06, "loss": 0.5418, "step": 2415 }, { "epoch": 0.07118129723200212, "grad_norm": 2.0961137993072767, "learning_rate": 7.116347569955818e-06, "loss": 0.5598, "step": 2416 }, { "epoch": 0.0712107596894657, "grad_norm": 1.9026639662360683, "learning_rate": 7.1192930780559654e-06, "loss": 0.5919, "step": 2417 }, { "epoch": 0.07124022214692928, "grad_norm": 1.7028639165661845, "learning_rate": 7.122238586156113e-06, "loss": 0.478, "step": 2418 }, { "epoch": 0.07126968460439286, "grad_norm": 1.6801765322792825, "learning_rate": 7.12518409425626e-06, "loss": 0.4829, "step": 2419 }, { "epoch": 0.07129914706185643, "grad_norm": 1.7303920665509207, "learning_rate": 7.128129602356407e-06, "loss": 0.5264, "step": 2420 }, { "epoch": 0.07132860951932, "grad_norm": 1.9293827541865431, "learning_rate": 7.131075110456555e-06, "loss": 0.4981, "step": 2421 }, { "epoch": 0.07135807197678358, "grad_norm": 1.7826111769411948, "learning_rate": 7.134020618556702e-06, "loss": 0.5279, "step": 2422 }, { "epoch": 0.07138753443424715, "grad_norm": 1.864334171834953, "learning_rate": 7.136966126656849e-06, "loss": 0.544, "step": 2423 }, { "epoch": 0.07141699689171074, "grad_norm": 1.7524873359917323, "learning_rate": 7.139911634756996e-06, "loss": 0.5517, "step": 2424 }, { "epoch": 0.07144645934917432, "grad_norm": 2.095759731167092, "learning_rate": 7.1428571428571436e-06, "loss": 0.6766, "step": 2425 }, { "epoch": 0.0714759218066379, "grad_norm": 1.7697950323757121, "learning_rate": 7.145802650957291e-06, "loss": 0.4955, "step": 2426 }, { "epoch": 0.07150538426410147, "grad_norm": 2.0903673709784583, "learning_rate": 7.148748159057438e-06, "loss": 0.5371, "step": 2427 }, { "epoch": 0.07153484672156504, "grad_norm": 2.115080670961838, "learning_rate": 7.151693667157585e-06, "loss": 0.7306, "step": 2428 }, { "epoch": 0.07156430917902862, "grad_norm": 2.1214702573597357, "learning_rate": 7.154639175257733e-06, "loss": 0.6722, "step": 2429 }, { "epoch": 0.0715937716364922, "grad_norm": 1.883794926917431, "learning_rate": 7.15758468335788e-06, "loss": 0.5731, "step": 2430 }, { "epoch": 0.07162323409395578, "grad_norm": 1.6797586194449743, "learning_rate": 7.160530191458027e-06, "loss": 0.5312, "step": 2431 }, { "epoch": 0.07165269655141936, "grad_norm": 1.9379833702199964, "learning_rate": 7.1634756995581745e-06, "loss": 0.6527, "step": 2432 }, { "epoch": 0.07168215900888293, "grad_norm": 1.7919646128535822, "learning_rate": 7.166421207658322e-06, "loss": 0.5589, "step": 2433 }, { "epoch": 0.0717116214663465, "grad_norm": 1.661830015330422, "learning_rate": 7.169366715758469e-06, "loss": 0.5149, "step": 2434 }, { "epoch": 0.07174108392381008, "grad_norm": 1.6982194586124697, "learning_rate": 7.172312223858616e-06, "loss": 0.5492, "step": 2435 }, { "epoch": 0.07177054638127366, "grad_norm": 2.081173362931254, "learning_rate": 7.175257731958763e-06, "loss": 0.7212, "step": 2436 }, { "epoch": 0.07180000883873724, "grad_norm": 1.8303158999424387, "learning_rate": 7.178203240058911e-06, "loss": 0.6264, "step": 2437 }, { "epoch": 0.07182947129620082, "grad_norm": 1.85835701168213, "learning_rate": 7.181148748159058e-06, "loss": 0.6076, "step": 2438 }, { "epoch": 0.0718589337536644, "grad_norm": 1.9139949313973095, "learning_rate": 7.1840942562592055e-06, "loss": 0.6005, "step": 2439 }, { "epoch": 0.07188839621112797, "grad_norm": 1.780826760167302, "learning_rate": 7.187039764359353e-06, "loss": 0.5176, "step": 2440 }, { "epoch": 0.07191785866859154, "grad_norm": 1.885346386749301, "learning_rate": 7.1899852724595e-06, "loss": 0.6026, "step": 2441 }, { "epoch": 0.07194732112605512, "grad_norm": 1.8611522951149877, "learning_rate": 7.192930780559647e-06, "loss": 0.5332, "step": 2442 }, { "epoch": 0.07197678358351871, "grad_norm": 1.759439193822452, "learning_rate": 7.195876288659794e-06, "loss": 0.5452, "step": 2443 }, { "epoch": 0.07200624604098228, "grad_norm": 1.781893384862183, "learning_rate": 7.198821796759941e-06, "loss": 0.5976, "step": 2444 }, { "epoch": 0.07203570849844586, "grad_norm": 1.8799043025397861, "learning_rate": 7.201767304860089e-06, "loss": 0.6554, "step": 2445 }, { "epoch": 0.07206517095590943, "grad_norm": 1.7599146983020442, "learning_rate": 7.2047128129602365e-06, "loss": 0.5223, "step": 2446 }, { "epoch": 0.07209463341337301, "grad_norm": 1.8342176467467595, "learning_rate": 7.207658321060384e-06, "loss": 0.4818, "step": 2447 }, { "epoch": 0.07212409587083658, "grad_norm": 1.69919289551457, "learning_rate": 7.210603829160531e-06, "loss": 0.5123, "step": 2448 }, { "epoch": 0.07215355832830016, "grad_norm": 1.8426781952394318, "learning_rate": 7.213549337260678e-06, "loss": 0.5457, "step": 2449 }, { "epoch": 0.07218302078576375, "grad_norm": 2.1912104345913384, "learning_rate": 7.216494845360825e-06, "loss": 0.6074, "step": 2450 }, { "epoch": 0.07221248324322732, "grad_norm": 1.9755448134758118, "learning_rate": 7.219440353460972e-06, "loss": 0.7125, "step": 2451 }, { "epoch": 0.0722419457006909, "grad_norm": 2.1094445168671077, "learning_rate": 7.2223858615611195e-06, "loss": 0.4113, "step": 2452 }, { "epoch": 0.07227140815815447, "grad_norm": 1.868466391206045, "learning_rate": 7.225331369661267e-06, "loss": 0.5154, "step": 2453 }, { "epoch": 0.07230087061561805, "grad_norm": 1.8219005986623116, "learning_rate": 7.228276877761415e-06, "loss": 0.5263, "step": 2454 }, { "epoch": 0.07233033307308162, "grad_norm": 2.0834978155917248, "learning_rate": 7.231222385861562e-06, "loss": 0.7928, "step": 2455 }, { "epoch": 0.07235979553054521, "grad_norm": 2.1296671495600052, "learning_rate": 7.234167893961709e-06, "loss": 0.5775, "step": 2456 }, { "epoch": 0.07238925798800878, "grad_norm": 1.7484685861666307, "learning_rate": 7.237113402061856e-06, "loss": 0.5062, "step": 2457 }, { "epoch": 0.07241872044547236, "grad_norm": 1.8813952330946406, "learning_rate": 7.240058910162003e-06, "loss": 0.6068, "step": 2458 }, { "epoch": 0.07244818290293593, "grad_norm": 1.9258450829139253, "learning_rate": 7.2430044182621504e-06, "loss": 0.6716, "step": 2459 }, { "epoch": 0.07247764536039951, "grad_norm": 1.8188820165954263, "learning_rate": 7.245949926362298e-06, "loss": 0.6097, "step": 2460 }, { "epoch": 0.07250710781786308, "grad_norm": 1.676069337661763, "learning_rate": 7.248895434462445e-06, "loss": 0.3821, "step": 2461 }, { "epoch": 0.07253657027532666, "grad_norm": 1.776269837158961, "learning_rate": 7.251840942562593e-06, "loss": 0.5773, "step": 2462 }, { "epoch": 0.07256603273279025, "grad_norm": 1.8690827539925277, "learning_rate": 7.25478645066274e-06, "loss": 0.6518, "step": 2463 }, { "epoch": 0.07259549519025382, "grad_norm": 1.9957802466267687, "learning_rate": 7.257731958762887e-06, "loss": 0.519, "step": 2464 }, { "epoch": 0.0726249576477174, "grad_norm": 1.9261081127363797, "learning_rate": 7.260677466863034e-06, "loss": 0.6855, "step": 2465 }, { "epoch": 0.07265442010518097, "grad_norm": 2.1932591332164777, "learning_rate": 7.263622974963181e-06, "loss": 0.4722, "step": 2466 }, { "epoch": 0.07268388256264455, "grad_norm": 1.7640040201497185, "learning_rate": 7.2665684830633286e-06, "loss": 0.6431, "step": 2467 }, { "epoch": 0.07271334502010812, "grad_norm": 1.8107754198164343, "learning_rate": 7.269513991163476e-06, "loss": 0.5685, "step": 2468 }, { "epoch": 0.07274280747757171, "grad_norm": 1.9559891670184713, "learning_rate": 7.272459499263623e-06, "loss": 0.7302, "step": 2469 }, { "epoch": 0.07277226993503529, "grad_norm": 1.9363146722204325, "learning_rate": 7.275405007363771e-06, "loss": 0.6459, "step": 2470 }, { "epoch": 0.07280173239249886, "grad_norm": 1.7429772395326786, "learning_rate": 7.278350515463918e-06, "loss": 0.7485, "step": 2471 }, { "epoch": 0.07283119484996244, "grad_norm": 1.87656321441682, "learning_rate": 7.281296023564065e-06, "loss": 0.5888, "step": 2472 }, { "epoch": 0.07286065730742601, "grad_norm": 1.8243738459816317, "learning_rate": 7.284241531664212e-06, "loss": 0.5434, "step": 2473 }, { "epoch": 0.07289011976488959, "grad_norm": 1.8765987075441941, "learning_rate": 7.2871870397643595e-06, "loss": 0.5992, "step": 2474 }, { "epoch": 0.07291958222235316, "grad_norm": 1.7384757387439302, "learning_rate": 7.290132547864507e-06, "loss": 0.5945, "step": 2475 }, { "epoch": 0.07294904467981675, "grad_norm": 1.7772013730268537, "learning_rate": 7.293078055964654e-06, "loss": 0.5734, "step": 2476 }, { "epoch": 0.07297850713728032, "grad_norm": 1.9043250554073279, "learning_rate": 7.296023564064801e-06, "loss": 0.535, "step": 2477 }, { "epoch": 0.0730079695947439, "grad_norm": 2.0218376979723693, "learning_rate": 7.298969072164949e-06, "loss": 0.6912, "step": 2478 }, { "epoch": 0.07303743205220747, "grad_norm": 1.8633904588348769, "learning_rate": 7.301914580265096e-06, "loss": 0.5478, "step": 2479 }, { "epoch": 0.07306689450967105, "grad_norm": 1.635132833331722, "learning_rate": 7.304860088365243e-06, "loss": 0.4642, "step": 2480 }, { "epoch": 0.07309635696713462, "grad_norm": 1.8493664707659905, "learning_rate": 7.3078055964653905e-06, "loss": 0.5528, "step": 2481 }, { "epoch": 0.07312581942459821, "grad_norm": 1.893341376868616, "learning_rate": 7.310751104565538e-06, "loss": 0.4434, "step": 2482 }, { "epoch": 0.07315528188206179, "grad_norm": 1.9959696331476107, "learning_rate": 7.313696612665685e-06, "loss": 0.6452, "step": 2483 }, { "epoch": 0.07318474433952536, "grad_norm": 1.635070570528802, "learning_rate": 7.316642120765832e-06, "loss": 0.5207, "step": 2484 }, { "epoch": 0.07321420679698894, "grad_norm": 1.7711658791373068, "learning_rate": 7.319587628865979e-06, "loss": 0.4725, "step": 2485 }, { "epoch": 0.07324366925445251, "grad_norm": 2.0052301628281004, "learning_rate": 7.322533136966127e-06, "loss": 0.7756, "step": 2486 }, { "epoch": 0.07327313171191609, "grad_norm": 1.9810216494676505, "learning_rate": 7.325478645066274e-06, "loss": 0.6437, "step": 2487 }, { "epoch": 0.07330259416937966, "grad_norm": 1.7457122670572829, "learning_rate": 7.3284241531664215e-06, "loss": 0.5576, "step": 2488 }, { "epoch": 0.07333205662684325, "grad_norm": 1.9231080978589536, "learning_rate": 7.331369661266569e-06, "loss": 0.5727, "step": 2489 }, { "epoch": 0.07336151908430683, "grad_norm": 2.072691073340658, "learning_rate": 7.334315169366716e-06, "loss": 0.4699, "step": 2490 }, { "epoch": 0.0733909815417704, "grad_norm": 1.8195426723627852, "learning_rate": 7.337260677466863e-06, "loss": 0.4793, "step": 2491 }, { "epoch": 0.07342044399923398, "grad_norm": 1.9154067839208029, "learning_rate": 7.34020618556701e-06, "loss": 0.5926, "step": 2492 }, { "epoch": 0.07344990645669755, "grad_norm": 1.6774513353474052, "learning_rate": 7.343151693667157e-06, "loss": 0.4144, "step": 2493 }, { "epoch": 0.07347936891416113, "grad_norm": 1.9266071473388717, "learning_rate": 7.346097201767306e-06, "loss": 0.4894, "step": 2494 }, { "epoch": 0.07350883137162471, "grad_norm": 2.0135038744243174, "learning_rate": 7.349042709867453e-06, "loss": 0.5754, "step": 2495 }, { "epoch": 0.07353829382908829, "grad_norm": 1.8206862012048073, "learning_rate": 7.3519882179676005e-06, "loss": 0.4867, "step": 2496 }, { "epoch": 0.07356775628655186, "grad_norm": 2.092738814607405, "learning_rate": 7.354933726067748e-06, "loss": 0.6376, "step": 2497 }, { "epoch": 0.07359721874401544, "grad_norm": 2.015811061016545, "learning_rate": 7.357879234167895e-06, "loss": 0.5442, "step": 2498 }, { "epoch": 0.07362668120147901, "grad_norm": 2.133623936516152, "learning_rate": 7.360824742268042e-06, "loss": 0.6351, "step": 2499 }, { "epoch": 0.07365614365894259, "grad_norm": 1.8716263890121883, "learning_rate": 7.36377025036819e-06, "loss": 0.6988, "step": 2500 }, { "epoch": 0.07368560611640616, "grad_norm": 1.8975785642759322, "learning_rate": 7.366715758468337e-06, "loss": 0.5645, "step": 2501 }, { "epoch": 0.07371506857386975, "grad_norm": 1.9496443415639833, "learning_rate": 7.369661266568484e-06, "loss": 0.6118, "step": 2502 }, { "epoch": 0.07374453103133333, "grad_norm": 1.7888572665083098, "learning_rate": 7.3726067746686314e-06, "loss": 0.5953, "step": 2503 }, { "epoch": 0.0737739934887969, "grad_norm": 2.145937843231173, "learning_rate": 7.375552282768779e-06, "loss": 0.5125, "step": 2504 }, { "epoch": 0.07380345594626048, "grad_norm": 1.8821086443409858, "learning_rate": 7.378497790868926e-06, "loss": 0.7418, "step": 2505 }, { "epoch": 0.07383291840372405, "grad_norm": 1.886427210396459, "learning_rate": 7.381443298969073e-06, "loss": 0.4371, "step": 2506 }, { "epoch": 0.07386238086118763, "grad_norm": 1.7787594022305564, "learning_rate": 7.38438880706922e-06, "loss": 0.4846, "step": 2507 }, { "epoch": 0.07389184331865122, "grad_norm": 1.6749929271808874, "learning_rate": 7.387334315169368e-06, "loss": 0.5675, "step": 2508 }, { "epoch": 0.07392130577611479, "grad_norm": 1.8542363374717505, "learning_rate": 7.390279823269515e-06, "loss": 0.5541, "step": 2509 }, { "epoch": 0.07395076823357837, "grad_norm": 1.7739457452937293, "learning_rate": 7.393225331369662e-06, "loss": 0.5512, "step": 2510 }, { "epoch": 0.07398023069104194, "grad_norm": 2.125218044282708, "learning_rate": 7.39617083946981e-06, "loss": 0.5673, "step": 2511 }, { "epoch": 0.07400969314850551, "grad_norm": 1.792038546760182, "learning_rate": 7.399116347569957e-06, "loss": 0.4659, "step": 2512 }, { "epoch": 0.07403915560596909, "grad_norm": 2.1469126549438484, "learning_rate": 7.402061855670104e-06, "loss": 0.607, "step": 2513 }, { "epoch": 0.07406861806343266, "grad_norm": 2.1416138349034064, "learning_rate": 7.405007363770251e-06, "loss": 0.5857, "step": 2514 }, { "epoch": 0.07409808052089625, "grad_norm": 1.6987842526947112, "learning_rate": 7.407952871870398e-06, "loss": 0.6099, "step": 2515 }, { "epoch": 0.07412754297835983, "grad_norm": 1.8174953276378878, "learning_rate": 7.410898379970546e-06, "loss": 0.5185, "step": 2516 }, { "epoch": 0.0741570054358234, "grad_norm": 1.7811295033232464, "learning_rate": 7.413843888070693e-06, "loss": 0.4648, "step": 2517 }, { "epoch": 0.07418646789328698, "grad_norm": 2.125208443896715, "learning_rate": 7.4167893961708406e-06, "loss": 0.7175, "step": 2518 }, { "epoch": 0.07421593035075055, "grad_norm": 1.9155276739413762, "learning_rate": 7.419734904270988e-06, "loss": 0.656, "step": 2519 }, { "epoch": 0.07424539280821413, "grad_norm": 2.0638157593365922, "learning_rate": 7.422680412371135e-06, "loss": 0.4917, "step": 2520 }, { "epoch": 0.07427485526567772, "grad_norm": 1.7970222161322227, "learning_rate": 7.425625920471282e-06, "loss": 0.5002, "step": 2521 }, { "epoch": 0.07430431772314129, "grad_norm": 1.775980271420812, "learning_rate": 7.428571428571429e-06, "loss": 0.4817, "step": 2522 }, { "epoch": 0.07433378018060487, "grad_norm": 1.8307113692148402, "learning_rate": 7.431516936671576e-06, "loss": 0.6876, "step": 2523 }, { "epoch": 0.07436324263806844, "grad_norm": 1.7677568455324697, "learning_rate": 7.434462444771724e-06, "loss": 0.5698, "step": 2524 }, { "epoch": 0.07439270509553202, "grad_norm": 2.0078305539906474, "learning_rate": 7.4374079528718715e-06, "loss": 0.4972, "step": 2525 }, { "epoch": 0.07442216755299559, "grad_norm": 2.109502746430433, "learning_rate": 7.440353460972019e-06, "loss": 0.5583, "step": 2526 }, { "epoch": 0.07445163001045917, "grad_norm": 1.9881498723869742, "learning_rate": 7.443298969072166e-06, "loss": 0.5803, "step": 2527 }, { "epoch": 0.07448109246792275, "grad_norm": 1.869631955823816, "learning_rate": 7.446244477172313e-06, "loss": 0.5773, "step": 2528 }, { "epoch": 0.07451055492538633, "grad_norm": 1.9372152115820496, "learning_rate": 7.44918998527246e-06, "loss": 0.5876, "step": 2529 }, { "epoch": 0.0745400173828499, "grad_norm": 1.5723105019946015, "learning_rate": 7.452135493372607e-06, "loss": 0.5578, "step": 2530 }, { "epoch": 0.07456947984031348, "grad_norm": 1.6915475060067153, "learning_rate": 7.4550810014727545e-06, "loss": 0.3706, "step": 2531 }, { "epoch": 0.07459894229777705, "grad_norm": 1.6430920747961348, "learning_rate": 7.458026509572902e-06, "loss": 0.4688, "step": 2532 }, { "epoch": 0.07462840475524063, "grad_norm": 1.9912475843067146, "learning_rate": 7.46097201767305e-06, "loss": 0.4523, "step": 2533 }, { "epoch": 0.07465786721270422, "grad_norm": 1.8863735545109408, "learning_rate": 7.463917525773197e-06, "loss": 0.4004, "step": 2534 }, { "epoch": 0.07468732967016779, "grad_norm": 2.093626289001254, "learning_rate": 7.466863033873344e-06, "loss": 0.4999, "step": 2535 }, { "epoch": 0.07471679212763137, "grad_norm": 1.9292952128122356, "learning_rate": 7.469808541973491e-06, "loss": 0.6248, "step": 2536 }, { "epoch": 0.07474625458509494, "grad_norm": 1.8054244199937934, "learning_rate": 7.472754050073638e-06, "loss": 0.55, "step": 2537 }, { "epoch": 0.07477571704255852, "grad_norm": 1.6563787546842381, "learning_rate": 7.4756995581737855e-06, "loss": 0.4749, "step": 2538 }, { "epoch": 0.07480517950002209, "grad_norm": 1.74342879811234, "learning_rate": 7.478645066273933e-06, "loss": 0.513, "step": 2539 }, { "epoch": 0.07483464195748567, "grad_norm": 1.709921644406873, "learning_rate": 7.48159057437408e-06, "loss": 0.5132, "step": 2540 }, { "epoch": 0.07486410441494926, "grad_norm": 1.827509943928977, "learning_rate": 7.484536082474228e-06, "loss": 0.6253, "step": 2541 }, { "epoch": 0.07489356687241283, "grad_norm": 1.7511226525922117, "learning_rate": 7.487481590574375e-06, "loss": 0.4926, "step": 2542 }, { "epoch": 0.0749230293298764, "grad_norm": 2.080269069291148, "learning_rate": 7.490427098674522e-06, "loss": 0.7103, "step": 2543 }, { "epoch": 0.07495249178733998, "grad_norm": 1.69004758009266, "learning_rate": 7.493372606774669e-06, "loss": 0.5162, "step": 2544 }, { "epoch": 0.07498195424480356, "grad_norm": 2.2048381170718376, "learning_rate": 7.4963181148748165e-06, "loss": 0.7393, "step": 2545 }, { "epoch": 0.07501141670226713, "grad_norm": 1.8901934978681394, "learning_rate": 7.499263622974964e-06, "loss": 0.5626, "step": 2546 }, { "epoch": 0.07504087915973072, "grad_norm": 1.7747459189672956, "learning_rate": 7.502209131075111e-06, "loss": 0.5974, "step": 2547 }, { "epoch": 0.0750703416171943, "grad_norm": 1.9983295106151029, "learning_rate": 7.505154639175258e-06, "loss": 0.4064, "step": 2548 }, { "epoch": 0.07509980407465787, "grad_norm": 1.9083555689569414, "learning_rate": 7.508100147275406e-06, "loss": 0.4462, "step": 2549 }, { "epoch": 0.07512926653212144, "grad_norm": 1.79502382202933, "learning_rate": 7.511045655375553e-06, "loss": 0.5572, "step": 2550 }, { "epoch": 0.07515872898958502, "grad_norm": 2.2483895855669394, "learning_rate": 7.5139911634757e-06, "loss": 0.4787, "step": 2551 }, { "epoch": 0.0751881914470486, "grad_norm": 1.8131154895609611, "learning_rate": 7.5169366715758474e-06, "loss": 0.5692, "step": 2552 }, { "epoch": 0.07521765390451217, "grad_norm": 1.7393809421325717, "learning_rate": 7.519882179675995e-06, "loss": 0.5226, "step": 2553 }, { "epoch": 0.07524711636197576, "grad_norm": 2.2410469190710316, "learning_rate": 7.522827687776142e-06, "loss": 0.6578, "step": 2554 }, { "epoch": 0.07527657881943933, "grad_norm": 2.0661565410655336, "learning_rate": 7.525773195876289e-06, "loss": 0.7036, "step": 2555 }, { "epoch": 0.07530604127690291, "grad_norm": 1.6159772966601285, "learning_rate": 7.528718703976436e-06, "loss": 0.3827, "step": 2556 }, { "epoch": 0.07533550373436648, "grad_norm": 1.7372225368029368, "learning_rate": 7.531664212076584e-06, "loss": 0.4305, "step": 2557 }, { "epoch": 0.07536496619183006, "grad_norm": 1.74491408208411, "learning_rate": 7.534609720176731e-06, "loss": 0.5159, "step": 2558 }, { "epoch": 0.07539442864929363, "grad_norm": 1.827804417690819, "learning_rate": 7.537555228276878e-06, "loss": 0.624, "step": 2559 }, { "epoch": 0.07542389110675722, "grad_norm": 1.7591724658034293, "learning_rate": 7.5405007363770256e-06, "loss": 0.5756, "step": 2560 }, { "epoch": 0.0754533535642208, "grad_norm": 1.8792282919754824, "learning_rate": 7.543446244477173e-06, "loss": 0.5251, "step": 2561 }, { "epoch": 0.07548281602168437, "grad_norm": 2.084332247752275, "learning_rate": 7.54639175257732e-06, "loss": 0.7217, "step": 2562 }, { "epoch": 0.07551227847914795, "grad_norm": 1.980800430417609, "learning_rate": 7.549337260677467e-06, "loss": 0.5788, "step": 2563 }, { "epoch": 0.07554174093661152, "grad_norm": 1.8641461389884344, "learning_rate": 7.552282768777614e-06, "loss": 0.6378, "step": 2564 }, { "epoch": 0.0755712033940751, "grad_norm": 1.9656017107857835, "learning_rate": 7.555228276877762e-06, "loss": 0.5775, "step": 2565 }, { "epoch": 0.07560066585153867, "grad_norm": 1.6919113103705143, "learning_rate": 7.558173784977909e-06, "loss": 0.4695, "step": 2566 }, { "epoch": 0.07563012830900226, "grad_norm": 1.8416444428544043, "learning_rate": 7.5611192930780565e-06, "loss": 0.5377, "step": 2567 }, { "epoch": 0.07565959076646583, "grad_norm": 1.4491561840361102, "learning_rate": 7.564064801178204e-06, "loss": 0.3371, "step": 2568 }, { "epoch": 0.07568905322392941, "grad_norm": 1.9726154833517788, "learning_rate": 7.567010309278351e-06, "loss": 0.5905, "step": 2569 }, { "epoch": 0.07571851568139298, "grad_norm": 1.744962784475546, "learning_rate": 7.569955817378498e-06, "loss": 0.6128, "step": 2570 }, { "epoch": 0.07574797813885656, "grad_norm": 1.7175404872879498, "learning_rate": 7.572901325478645e-06, "loss": 0.4451, "step": 2571 }, { "epoch": 0.07577744059632013, "grad_norm": 1.9020346684900566, "learning_rate": 7.575846833578792e-06, "loss": 0.4656, "step": 2572 }, { "epoch": 0.07580690305378372, "grad_norm": 1.6689240733665374, "learning_rate": 7.57879234167894e-06, "loss": 0.411, "step": 2573 }, { "epoch": 0.0758363655112473, "grad_norm": 1.8807435960186707, "learning_rate": 7.5817378497790875e-06, "loss": 0.5529, "step": 2574 }, { "epoch": 0.07586582796871087, "grad_norm": 1.8491257700340045, "learning_rate": 7.584683357879235e-06, "loss": 0.6158, "step": 2575 }, { "epoch": 0.07589529042617445, "grad_norm": 1.909019397334247, "learning_rate": 7.587628865979382e-06, "loss": 0.5963, "step": 2576 }, { "epoch": 0.07592475288363802, "grad_norm": 1.5794994310498593, "learning_rate": 7.590574374079529e-06, "loss": 0.4504, "step": 2577 }, { "epoch": 0.0759542153411016, "grad_norm": 1.8076445252987265, "learning_rate": 7.593519882179676e-06, "loss": 0.4816, "step": 2578 }, { "epoch": 0.07598367779856517, "grad_norm": 1.945933673832075, "learning_rate": 7.596465390279823e-06, "loss": 0.5665, "step": 2579 }, { "epoch": 0.07601314025602876, "grad_norm": 1.7006563258419136, "learning_rate": 7.5994108983799705e-06, "loss": 0.5176, "step": 2580 }, { "epoch": 0.07604260271349234, "grad_norm": 1.8420737628591175, "learning_rate": 7.6023564064801185e-06, "loss": 0.4808, "step": 2581 }, { "epoch": 0.07607206517095591, "grad_norm": 2.024620864429711, "learning_rate": 7.605301914580266e-06, "loss": 0.6349, "step": 2582 }, { "epoch": 0.07610152762841949, "grad_norm": 1.856588273258346, "learning_rate": 7.608247422680413e-06, "loss": 0.579, "step": 2583 }, { "epoch": 0.07613099008588306, "grad_norm": 1.8869010350332398, "learning_rate": 7.61119293078056e-06, "loss": 0.5414, "step": 2584 }, { "epoch": 0.07616045254334664, "grad_norm": 1.6422420909943107, "learning_rate": 7.614138438880707e-06, "loss": 0.4744, "step": 2585 }, { "epoch": 0.07618991500081022, "grad_norm": 2.018128941885034, "learning_rate": 7.617083946980854e-06, "loss": 0.7202, "step": 2586 }, { "epoch": 0.0762193774582738, "grad_norm": 2.1159057556478618, "learning_rate": 7.6200294550810015e-06, "loss": 0.5596, "step": 2587 }, { "epoch": 0.07624883991573737, "grad_norm": 1.8514949287004794, "learning_rate": 7.622974963181149e-06, "loss": 0.5405, "step": 2588 }, { "epoch": 0.07627830237320095, "grad_norm": 1.7953169308543853, "learning_rate": 7.625920471281297e-06, "loss": 0.6009, "step": 2589 }, { "epoch": 0.07630776483066452, "grad_norm": 1.754084863866651, "learning_rate": 7.628865979381444e-06, "loss": 0.6921, "step": 2590 }, { "epoch": 0.0763372272881281, "grad_norm": 2.1038659877028483, "learning_rate": 7.63181148748159e-06, "loss": 0.6088, "step": 2591 }, { "epoch": 0.07636668974559167, "grad_norm": 1.9286777930548924, "learning_rate": 7.634756995581738e-06, "loss": 0.5629, "step": 2592 }, { "epoch": 0.07639615220305526, "grad_norm": 1.9660860210213396, "learning_rate": 7.637702503681884e-06, "loss": 0.5944, "step": 2593 }, { "epoch": 0.07642561466051884, "grad_norm": 1.7605445349443822, "learning_rate": 7.640648011782032e-06, "loss": 0.5997, "step": 2594 }, { "epoch": 0.07645507711798241, "grad_norm": 1.6527839388437278, "learning_rate": 7.64359351988218e-06, "loss": 0.5237, "step": 2595 }, { "epoch": 0.07648453957544599, "grad_norm": 1.8049078889632364, "learning_rate": 7.646539027982327e-06, "loss": 0.4178, "step": 2596 }, { "epoch": 0.07651400203290956, "grad_norm": 1.9222493712526494, "learning_rate": 7.649484536082475e-06, "loss": 0.519, "step": 2597 }, { "epoch": 0.07654346449037314, "grad_norm": 1.6914208753832156, "learning_rate": 7.652430044182621e-06, "loss": 0.5283, "step": 2598 }, { "epoch": 0.07657292694783673, "grad_norm": 1.8798744781907473, "learning_rate": 7.655375552282769e-06, "loss": 0.6777, "step": 2599 }, { "epoch": 0.0766023894053003, "grad_norm": 1.795032536176134, "learning_rate": 7.658321060382917e-06, "loss": 0.5475, "step": 2600 }, { "epoch": 0.07663185186276388, "grad_norm": 1.8538527424821123, "learning_rate": 7.661266568483065e-06, "loss": 0.645, "step": 2601 }, { "epoch": 0.07666131432022745, "grad_norm": 1.767114829168879, "learning_rate": 7.664212076583211e-06, "loss": 0.5297, "step": 2602 }, { "epoch": 0.07669077677769102, "grad_norm": 1.7432621843201754, "learning_rate": 7.66715758468336e-06, "loss": 0.5694, "step": 2603 }, { "epoch": 0.0767202392351546, "grad_norm": 2.0229380606798344, "learning_rate": 7.670103092783506e-06, "loss": 0.4703, "step": 2604 }, { "epoch": 0.07674970169261817, "grad_norm": 1.9765774824963915, "learning_rate": 7.673048600883654e-06, "loss": 0.7883, "step": 2605 }, { "epoch": 0.07677916415008176, "grad_norm": 2.0457430252697466, "learning_rate": 7.6759941089838e-06, "loss": 0.7109, "step": 2606 }, { "epoch": 0.07680862660754534, "grad_norm": 1.7221875807385016, "learning_rate": 7.678939617083948e-06, "loss": 0.5563, "step": 2607 }, { "epoch": 0.07683808906500891, "grad_norm": 1.7632830185223776, "learning_rate": 7.681885125184096e-06, "loss": 0.5397, "step": 2608 }, { "epoch": 0.07686755152247249, "grad_norm": 1.9201326497340347, "learning_rate": 7.684830633284242e-06, "loss": 0.4624, "step": 2609 }, { "epoch": 0.07689701397993606, "grad_norm": 1.643754128801388, "learning_rate": 7.68777614138439e-06, "loss": 0.5552, "step": 2610 }, { "epoch": 0.07692647643739964, "grad_norm": 2.0129212951590065, "learning_rate": 7.690721649484537e-06, "loss": 0.5779, "step": 2611 }, { "epoch": 0.07695593889486323, "grad_norm": 1.8484567450772118, "learning_rate": 7.693667157584685e-06, "loss": 0.4762, "step": 2612 }, { "epoch": 0.0769854013523268, "grad_norm": 2.052400321253879, "learning_rate": 7.696612665684831e-06, "loss": 0.7545, "step": 2613 }, { "epoch": 0.07701486380979038, "grad_norm": 1.9204439554230848, "learning_rate": 7.699558173784979e-06, "loss": 0.699, "step": 2614 }, { "epoch": 0.07704432626725395, "grad_norm": 2.044257649260611, "learning_rate": 7.702503681885127e-06, "loss": 0.4904, "step": 2615 }, { "epoch": 0.07707378872471753, "grad_norm": 1.8691621883491198, "learning_rate": 7.705449189985273e-06, "loss": 0.6594, "step": 2616 }, { "epoch": 0.0771032511821811, "grad_norm": 1.9292439803039594, "learning_rate": 7.708394698085421e-06, "loss": 0.6971, "step": 2617 }, { "epoch": 0.07713271363964468, "grad_norm": 1.6524815666223536, "learning_rate": 7.711340206185568e-06, "loss": 0.4874, "step": 2618 }, { "epoch": 0.07716217609710826, "grad_norm": 1.8150086010480264, "learning_rate": 7.714285714285716e-06, "loss": 0.5019, "step": 2619 }, { "epoch": 0.07719163855457184, "grad_norm": 1.997599995412917, "learning_rate": 7.717231222385862e-06, "loss": 0.5938, "step": 2620 }, { "epoch": 0.07722110101203541, "grad_norm": 1.9274569027592865, "learning_rate": 7.72017673048601e-06, "loss": 0.5331, "step": 2621 }, { "epoch": 0.07725056346949899, "grad_norm": 2.04709359187261, "learning_rate": 7.723122238586156e-06, "loss": 0.5698, "step": 2622 }, { "epoch": 0.07728002592696256, "grad_norm": 1.7493929086985829, "learning_rate": 7.726067746686304e-06, "loss": 0.5238, "step": 2623 }, { "epoch": 0.07730948838442614, "grad_norm": 1.7442593484057374, "learning_rate": 7.729013254786452e-06, "loss": 0.5069, "step": 2624 }, { "epoch": 0.07733895084188973, "grad_norm": 1.9010045229512078, "learning_rate": 7.731958762886599e-06, "loss": 0.6445, "step": 2625 }, { "epoch": 0.0773684132993533, "grad_norm": 1.912151671125734, "learning_rate": 7.734904270986747e-06, "loss": 0.558, "step": 2626 }, { "epoch": 0.07739787575681688, "grad_norm": 1.763384398302099, "learning_rate": 7.737849779086893e-06, "loss": 0.5632, "step": 2627 }, { "epoch": 0.07742733821428045, "grad_norm": 1.9080175539247353, "learning_rate": 7.740795287187041e-06, "loss": 0.4785, "step": 2628 }, { "epoch": 0.07745680067174403, "grad_norm": 1.8067093105564724, "learning_rate": 7.743740795287187e-06, "loss": 0.4723, "step": 2629 }, { "epoch": 0.0774862631292076, "grad_norm": 1.7998422433507097, "learning_rate": 7.746686303387335e-06, "loss": 0.515, "step": 2630 }, { "epoch": 0.07751572558667118, "grad_norm": 2.0507508844175164, "learning_rate": 7.749631811487482e-06, "loss": 0.6796, "step": 2631 }, { "epoch": 0.07754518804413477, "grad_norm": 1.7650879880606336, "learning_rate": 7.75257731958763e-06, "loss": 0.5076, "step": 2632 }, { "epoch": 0.07757465050159834, "grad_norm": 1.6933871658563684, "learning_rate": 7.755522827687778e-06, "loss": 0.4142, "step": 2633 }, { "epoch": 0.07760411295906192, "grad_norm": 1.9115124341901193, "learning_rate": 7.758468335787924e-06, "loss": 0.5982, "step": 2634 }, { "epoch": 0.07763357541652549, "grad_norm": 1.7108764968883312, "learning_rate": 7.761413843888072e-06, "loss": 0.5577, "step": 2635 }, { "epoch": 0.07766303787398907, "grad_norm": 1.9017609738448695, "learning_rate": 7.764359351988218e-06, "loss": 0.5545, "step": 2636 }, { "epoch": 0.07769250033145264, "grad_norm": 1.9393632113444423, "learning_rate": 7.767304860088366e-06, "loss": 0.6398, "step": 2637 }, { "epoch": 0.07772196278891623, "grad_norm": 1.759910430837055, "learning_rate": 7.770250368188513e-06, "loss": 0.5684, "step": 2638 }, { "epoch": 0.0777514252463798, "grad_norm": 1.9776555232108128, "learning_rate": 7.77319587628866e-06, "loss": 0.6794, "step": 2639 }, { "epoch": 0.07778088770384338, "grad_norm": 1.7227991566444378, "learning_rate": 7.776141384388809e-06, "loss": 0.5426, "step": 2640 }, { "epoch": 0.07781035016130695, "grad_norm": 1.6713645180526087, "learning_rate": 7.779086892488955e-06, "loss": 0.5479, "step": 2641 }, { "epoch": 0.07783981261877053, "grad_norm": 1.8189590373365208, "learning_rate": 7.782032400589103e-06, "loss": 0.5694, "step": 2642 }, { "epoch": 0.0778692750762341, "grad_norm": 1.7427894031569418, "learning_rate": 7.78497790868925e-06, "loss": 0.5749, "step": 2643 }, { "epoch": 0.07789873753369768, "grad_norm": 1.777118480233315, "learning_rate": 7.787923416789397e-06, "loss": 0.5378, "step": 2644 }, { "epoch": 0.07792819999116127, "grad_norm": 1.8046486575978056, "learning_rate": 7.790868924889544e-06, "loss": 0.4364, "step": 2645 }, { "epoch": 0.07795766244862484, "grad_norm": 1.6550027472641307, "learning_rate": 7.793814432989692e-06, "loss": 0.5387, "step": 2646 }, { "epoch": 0.07798712490608842, "grad_norm": 1.7179350552929324, "learning_rate": 7.796759941089838e-06, "loss": 0.5026, "step": 2647 }, { "epoch": 0.07801658736355199, "grad_norm": 2.0617353653935417, "learning_rate": 7.799705449189986e-06, "loss": 0.609, "step": 2648 }, { "epoch": 0.07804604982101557, "grad_norm": 1.9666380793483975, "learning_rate": 7.802650957290134e-06, "loss": 0.6791, "step": 2649 }, { "epoch": 0.07807551227847914, "grad_norm": 1.914745820547612, "learning_rate": 7.80559646539028e-06, "loss": 0.4884, "step": 2650 }, { "epoch": 0.07810497473594273, "grad_norm": 1.8044758471172315, "learning_rate": 7.808541973490428e-06, "loss": 0.5178, "step": 2651 }, { "epoch": 0.0781344371934063, "grad_norm": 1.9378095768902324, "learning_rate": 7.811487481590575e-06, "loss": 0.5746, "step": 2652 }, { "epoch": 0.07816389965086988, "grad_norm": 2.056223018206075, "learning_rate": 7.814432989690723e-06, "loss": 0.6054, "step": 2653 }, { "epoch": 0.07819336210833346, "grad_norm": 2.267056705633494, "learning_rate": 7.817378497790869e-06, "loss": 0.6463, "step": 2654 }, { "epoch": 0.07822282456579703, "grad_norm": 1.9463136696309598, "learning_rate": 7.820324005891017e-06, "loss": 0.5432, "step": 2655 }, { "epoch": 0.0782522870232606, "grad_norm": 2.0814470787468524, "learning_rate": 7.823269513991165e-06, "loss": 0.706, "step": 2656 }, { "epoch": 0.07828174948072418, "grad_norm": 2.1461270275100883, "learning_rate": 7.826215022091311e-06, "loss": 0.6485, "step": 2657 }, { "epoch": 0.07831121193818777, "grad_norm": 1.8856009853165017, "learning_rate": 7.82916053019146e-06, "loss": 0.7032, "step": 2658 }, { "epoch": 0.07834067439565134, "grad_norm": 2.000870711240841, "learning_rate": 7.832106038291606e-06, "loss": 0.6803, "step": 2659 }, { "epoch": 0.07837013685311492, "grad_norm": 1.7806812502296172, "learning_rate": 7.835051546391754e-06, "loss": 0.6161, "step": 2660 }, { "epoch": 0.0783995993105785, "grad_norm": 1.745254700083523, "learning_rate": 7.8379970544919e-06, "loss": 0.6022, "step": 2661 }, { "epoch": 0.07842906176804207, "grad_norm": 1.9979414474671717, "learning_rate": 7.840942562592048e-06, "loss": 0.695, "step": 2662 }, { "epoch": 0.07845852422550564, "grad_norm": 1.7022480198063616, "learning_rate": 7.843888070692194e-06, "loss": 0.4148, "step": 2663 }, { "epoch": 0.07848798668296923, "grad_norm": 1.995780138537087, "learning_rate": 7.846833578792342e-06, "loss": 0.5631, "step": 2664 }, { "epoch": 0.07851744914043281, "grad_norm": 1.890028174658527, "learning_rate": 7.84977908689249e-06, "loss": 0.6312, "step": 2665 }, { "epoch": 0.07854691159789638, "grad_norm": 1.78088265581987, "learning_rate": 7.852724594992636e-06, "loss": 0.477, "step": 2666 }, { "epoch": 0.07857637405535996, "grad_norm": 2.0484719253678882, "learning_rate": 7.855670103092785e-06, "loss": 0.6079, "step": 2667 }, { "epoch": 0.07860583651282353, "grad_norm": 1.9615973975869836, "learning_rate": 7.85861561119293e-06, "loss": 0.6757, "step": 2668 }, { "epoch": 0.0786352989702871, "grad_norm": 1.7190771763793833, "learning_rate": 7.861561119293079e-06, "loss": 0.4985, "step": 2669 }, { "epoch": 0.07866476142775068, "grad_norm": 2.053932720940993, "learning_rate": 7.864506627393225e-06, "loss": 0.6764, "step": 2670 }, { "epoch": 0.07869422388521427, "grad_norm": 1.9694198068281106, "learning_rate": 7.867452135493373e-06, "loss": 0.6858, "step": 2671 }, { "epoch": 0.07872368634267785, "grad_norm": 1.9774168227249203, "learning_rate": 7.870397643593521e-06, "loss": 0.6667, "step": 2672 }, { "epoch": 0.07875314880014142, "grad_norm": 2.1007739894244017, "learning_rate": 7.873343151693667e-06, "loss": 0.5439, "step": 2673 }, { "epoch": 0.078782611257605, "grad_norm": 1.682739189627772, "learning_rate": 7.876288659793815e-06, "loss": 0.618, "step": 2674 }, { "epoch": 0.07881207371506857, "grad_norm": 1.8190222442704007, "learning_rate": 7.879234167893962e-06, "loss": 0.6281, "step": 2675 }, { "epoch": 0.07884153617253215, "grad_norm": 2.251737882277788, "learning_rate": 7.88217967599411e-06, "loss": 0.6277, "step": 2676 }, { "epoch": 0.07887099862999573, "grad_norm": 1.7777519157537451, "learning_rate": 7.885125184094256e-06, "loss": 0.5028, "step": 2677 }, { "epoch": 0.07890046108745931, "grad_norm": 1.7450248559362915, "learning_rate": 7.888070692194404e-06, "loss": 0.4633, "step": 2678 }, { "epoch": 0.07892992354492288, "grad_norm": 1.826730916626859, "learning_rate": 7.89101620029455e-06, "loss": 0.6147, "step": 2679 }, { "epoch": 0.07895938600238646, "grad_norm": 1.908921468559643, "learning_rate": 7.893961708394698e-06, "loss": 0.5393, "step": 2680 }, { "epoch": 0.07898884845985003, "grad_norm": 2.0109571405585607, "learning_rate": 7.896907216494846e-06, "loss": 0.5311, "step": 2681 }, { "epoch": 0.07901831091731361, "grad_norm": 1.8021886796782483, "learning_rate": 7.899852724594993e-06, "loss": 0.5963, "step": 2682 }, { "epoch": 0.07904777337477718, "grad_norm": 2.1227399264513114, "learning_rate": 7.90279823269514e-06, "loss": 0.7566, "step": 2683 }, { "epoch": 0.07907723583224077, "grad_norm": 2.156912174084432, "learning_rate": 7.905743740795287e-06, "loss": 0.5347, "step": 2684 }, { "epoch": 0.07910669828970435, "grad_norm": 1.7806586878570054, "learning_rate": 7.908689248895435e-06, "loss": 0.464, "step": 2685 }, { "epoch": 0.07913616074716792, "grad_norm": 1.9628724240395017, "learning_rate": 7.911634756995581e-06, "loss": 0.6293, "step": 2686 }, { "epoch": 0.0791656232046315, "grad_norm": 1.6678959858638582, "learning_rate": 7.91458026509573e-06, "loss": 0.5088, "step": 2687 }, { "epoch": 0.07919508566209507, "grad_norm": 1.691666737549409, "learning_rate": 7.917525773195876e-06, "loss": 0.6054, "step": 2688 }, { "epoch": 0.07922454811955865, "grad_norm": 1.8952323300768044, "learning_rate": 7.920471281296024e-06, "loss": 0.5285, "step": 2689 }, { "epoch": 0.07925401057702224, "grad_norm": 1.7524956474088105, "learning_rate": 7.923416789396172e-06, "loss": 0.6531, "step": 2690 }, { "epoch": 0.07928347303448581, "grad_norm": 1.954301092962938, "learning_rate": 7.926362297496318e-06, "loss": 0.5645, "step": 2691 }, { "epoch": 0.07931293549194939, "grad_norm": 1.9507167579397806, "learning_rate": 7.929307805596466e-06, "loss": 0.6345, "step": 2692 }, { "epoch": 0.07934239794941296, "grad_norm": 1.897285821420906, "learning_rate": 7.932253313696612e-06, "loss": 0.6573, "step": 2693 }, { "epoch": 0.07937186040687653, "grad_norm": 2.0188368753587778, "learning_rate": 7.93519882179676e-06, "loss": 0.7203, "step": 2694 }, { "epoch": 0.07940132286434011, "grad_norm": 1.9319166436579078, "learning_rate": 7.938144329896907e-06, "loss": 0.4947, "step": 2695 }, { "epoch": 0.0794307853218037, "grad_norm": 1.8695332521852601, "learning_rate": 7.941089837997055e-06, "loss": 0.541, "step": 2696 }, { "epoch": 0.07946024777926727, "grad_norm": 1.7102531167445796, "learning_rate": 7.944035346097203e-06, "loss": 0.4815, "step": 2697 }, { "epoch": 0.07948971023673085, "grad_norm": 2.2328614368380766, "learning_rate": 7.946980854197349e-06, "loss": 0.6523, "step": 2698 }, { "epoch": 0.07951917269419442, "grad_norm": 1.8988972305293048, "learning_rate": 7.949926362297497e-06, "loss": 0.5478, "step": 2699 }, { "epoch": 0.079548635151658, "grad_norm": 1.7860501426652753, "learning_rate": 7.952871870397643e-06, "loss": 0.5158, "step": 2700 }, { "epoch": 0.07957809760912157, "grad_norm": 1.8988795470377826, "learning_rate": 7.955817378497791e-06, "loss": 0.6964, "step": 2701 }, { "epoch": 0.07960756006658515, "grad_norm": 1.9159540887732136, "learning_rate": 7.958762886597938e-06, "loss": 0.6618, "step": 2702 }, { "epoch": 0.07963702252404874, "grad_norm": 1.995225087811803, "learning_rate": 7.961708394698086e-06, "loss": 0.7073, "step": 2703 }, { "epoch": 0.07966648498151231, "grad_norm": 1.7380892695398003, "learning_rate": 7.964653902798232e-06, "loss": 0.6252, "step": 2704 }, { "epoch": 0.07969594743897589, "grad_norm": 1.783136738516889, "learning_rate": 7.96759941089838e-06, "loss": 0.6075, "step": 2705 }, { "epoch": 0.07972540989643946, "grad_norm": 1.8179997002524153, "learning_rate": 7.970544918998528e-06, "loss": 0.6621, "step": 2706 }, { "epoch": 0.07975487235390304, "grad_norm": 2.177242338283743, "learning_rate": 7.973490427098676e-06, "loss": 0.7837, "step": 2707 }, { "epoch": 0.07978433481136661, "grad_norm": 1.9695051359601397, "learning_rate": 7.976435935198822e-06, "loss": 0.6539, "step": 2708 }, { "epoch": 0.0798137972688302, "grad_norm": 1.7638118752816572, "learning_rate": 7.97938144329897e-06, "loss": 0.5265, "step": 2709 }, { "epoch": 0.07984325972629377, "grad_norm": 1.819781357608895, "learning_rate": 7.982326951399117e-06, "loss": 0.406, "step": 2710 }, { "epoch": 0.07987272218375735, "grad_norm": 1.7134680623338618, "learning_rate": 7.985272459499265e-06, "loss": 0.461, "step": 2711 }, { "epoch": 0.07990218464122092, "grad_norm": 1.798376117751621, "learning_rate": 7.988217967599413e-06, "loss": 0.4584, "step": 2712 }, { "epoch": 0.0799316470986845, "grad_norm": 1.6514369668629088, "learning_rate": 7.991163475699559e-06, "loss": 0.5534, "step": 2713 }, { "epoch": 0.07996110955614807, "grad_norm": 2.019765355077467, "learning_rate": 7.994108983799707e-06, "loss": 0.6775, "step": 2714 }, { "epoch": 0.07999057201361165, "grad_norm": 1.7545585650416282, "learning_rate": 7.997054491899853e-06, "loss": 0.5717, "step": 2715 }, { "epoch": 0.08002003447107524, "grad_norm": 1.9399556411156562, "learning_rate": 8.000000000000001e-06, "loss": 0.5263, "step": 2716 }, { "epoch": 0.08004949692853881, "grad_norm": 1.8736472292868602, "learning_rate": 8.002945508100148e-06, "loss": 0.5435, "step": 2717 }, { "epoch": 0.08007895938600239, "grad_norm": 1.696839033770963, "learning_rate": 8.005891016200296e-06, "loss": 0.4153, "step": 2718 }, { "epoch": 0.08010842184346596, "grad_norm": 1.9973426890228523, "learning_rate": 8.008836524300444e-06, "loss": 0.646, "step": 2719 }, { "epoch": 0.08013788430092954, "grad_norm": 1.9905422659444765, "learning_rate": 8.01178203240059e-06, "loss": 0.656, "step": 2720 }, { "epoch": 0.08016734675839311, "grad_norm": 1.650447350090589, "learning_rate": 8.014727540500738e-06, "loss": 0.4403, "step": 2721 }, { "epoch": 0.0801968092158567, "grad_norm": 1.7880455993107969, "learning_rate": 8.017673048600884e-06, "loss": 0.5803, "step": 2722 }, { "epoch": 0.08022627167332028, "grad_norm": 1.910853305200354, "learning_rate": 8.020618556701032e-06, "loss": 0.6268, "step": 2723 }, { "epoch": 0.08025573413078385, "grad_norm": 1.7527369079018413, "learning_rate": 8.023564064801179e-06, "loss": 0.4861, "step": 2724 }, { "epoch": 0.08028519658824743, "grad_norm": 1.892613202833594, "learning_rate": 8.026509572901327e-06, "loss": 0.6571, "step": 2725 }, { "epoch": 0.080314659045711, "grad_norm": 2.031729763545865, "learning_rate": 8.029455081001473e-06, "loss": 0.607, "step": 2726 }, { "epoch": 0.08034412150317458, "grad_norm": 1.872190972091833, "learning_rate": 8.032400589101621e-06, "loss": 0.4543, "step": 2727 }, { "epoch": 0.08037358396063815, "grad_norm": 1.8239304164354555, "learning_rate": 8.035346097201769e-06, "loss": 0.6996, "step": 2728 }, { "epoch": 0.08040304641810174, "grad_norm": 1.8113558111094645, "learning_rate": 8.038291605301915e-06, "loss": 0.6206, "step": 2729 }, { "epoch": 0.08043250887556531, "grad_norm": 1.8606972340027808, "learning_rate": 8.041237113402063e-06, "loss": 0.5738, "step": 2730 }, { "epoch": 0.08046197133302889, "grad_norm": 1.7854802444244988, "learning_rate": 8.04418262150221e-06, "loss": 0.551, "step": 2731 }, { "epoch": 0.08049143379049246, "grad_norm": 1.9291077638943879, "learning_rate": 8.047128129602358e-06, "loss": 0.711, "step": 2732 }, { "epoch": 0.08052089624795604, "grad_norm": 1.8761752451490645, "learning_rate": 8.050073637702504e-06, "loss": 0.5941, "step": 2733 }, { "epoch": 0.08055035870541961, "grad_norm": 1.6062100031004787, "learning_rate": 8.053019145802652e-06, "loss": 0.4988, "step": 2734 }, { "epoch": 0.0805798211628832, "grad_norm": 1.8845567135223442, "learning_rate": 8.0559646539028e-06, "loss": 0.5364, "step": 2735 }, { "epoch": 0.08060928362034678, "grad_norm": 2.0004187949596597, "learning_rate": 8.058910162002946e-06, "loss": 0.6353, "step": 2736 }, { "epoch": 0.08063874607781035, "grad_norm": 2.0523080149956874, "learning_rate": 8.061855670103094e-06, "loss": 0.6312, "step": 2737 }, { "epoch": 0.08066820853527393, "grad_norm": 1.9052896633835799, "learning_rate": 8.06480117820324e-06, "loss": 0.6125, "step": 2738 }, { "epoch": 0.0806976709927375, "grad_norm": 1.9079435115177221, "learning_rate": 8.067746686303389e-06, "loss": 0.6266, "step": 2739 }, { "epoch": 0.08072713345020108, "grad_norm": 1.8368438909288223, "learning_rate": 8.070692194403535e-06, "loss": 0.601, "step": 2740 }, { "epoch": 0.08075659590766465, "grad_norm": 1.9813379535006612, "learning_rate": 8.073637702503683e-06, "loss": 0.6837, "step": 2741 }, { "epoch": 0.08078605836512824, "grad_norm": 1.679928103812317, "learning_rate": 8.07658321060383e-06, "loss": 0.4797, "step": 2742 }, { "epoch": 0.08081552082259182, "grad_norm": 2.046254061110147, "learning_rate": 8.079528718703977e-06, "loss": 0.6734, "step": 2743 }, { "epoch": 0.08084498328005539, "grad_norm": 1.7226291443604627, "learning_rate": 8.082474226804125e-06, "loss": 0.6282, "step": 2744 }, { "epoch": 0.08087444573751897, "grad_norm": 1.6581258297174453, "learning_rate": 8.085419734904272e-06, "loss": 0.5681, "step": 2745 }, { "epoch": 0.08090390819498254, "grad_norm": 2.012863580737167, "learning_rate": 8.08836524300442e-06, "loss": 0.5364, "step": 2746 }, { "epoch": 0.08093337065244612, "grad_norm": 2.1335512759909547, "learning_rate": 8.091310751104566e-06, "loss": 0.5928, "step": 2747 }, { "epoch": 0.0809628331099097, "grad_norm": 1.6107708202155337, "learning_rate": 8.094256259204714e-06, "loss": 0.4287, "step": 2748 }, { "epoch": 0.08099229556737328, "grad_norm": 1.893017639202842, "learning_rate": 8.09720176730486e-06, "loss": 0.6054, "step": 2749 }, { "epoch": 0.08102175802483685, "grad_norm": 1.631894594620929, "learning_rate": 8.100147275405008e-06, "loss": 0.3617, "step": 2750 }, { "epoch": 0.08105122048230043, "grad_norm": 1.9195503674542256, "learning_rate": 8.103092783505156e-06, "loss": 0.6663, "step": 2751 }, { "epoch": 0.081080682939764, "grad_norm": 1.749532986860702, "learning_rate": 8.106038291605303e-06, "loss": 0.516, "step": 2752 }, { "epoch": 0.08111014539722758, "grad_norm": 2.0769482378492325, "learning_rate": 8.10898379970545e-06, "loss": 0.5465, "step": 2753 }, { "epoch": 0.08113960785469115, "grad_norm": 1.8186400991130576, "learning_rate": 8.111929307805597e-06, "loss": 0.6706, "step": 2754 }, { "epoch": 0.08116907031215474, "grad_norm": 1.7154187109208023, "learning_rate": 8.114874815905745e-06, "loss": 0.6014, "step": 2755 }, { "epoch": 0.08119853276961832, "grad_norm": 2.0756036570033523, "learning_rate": 8.117820324005891e-06, "loss": 0.6754, "step": 2756 }, { "epoch": 0.08122799522708189, "grad_norm": 1.8965531659582606, "learning_rate": 8.12076583210604e-06, "loss": 0.6372, "step": 2757 }, { "epoch": 0.08125745768454547, "grad_norm": 1.8474723204286665, "learning_rate": 8.123711340206185e-06, "loss": 0.6464, "step": 2758 }, { "epoch": 0.08128692014200904, "grad_norm": 1.6286247948158346, "learning_rate": 8.126656848306333e-06, "loss": 0.5323, "step": 2759 }, { "epoch": 0.08131638259947262, "grad_norm": 1.8739199974956697, "learning_rate": 8.129602356406482e-06, "loss": 0.5438, "step": 2760 }, { "epoch": 0.0813458450569362, "grad_norm": 1.7883972708227533, "learning_rate": 8.132547864506628e-06, "loss": 0.4694, "step": 2761 }, { "epoch": 0.08137530751439978, "grad_norm": 1.8252497589205399, "learning_rate": 8.135493372606776e-06, "loss": 0.5466, "step": 2762 }, { "epoch": 0.08140476997186336, "grad_norm": 1.610903142352832, "learning_rate": 8.138438880706922e-06, "loss": 0.4707, "step": 2763 }, { "epoch": 0.08143423242932693, "grad_norm": 1.878460999561169, "learning_rate": 8.14138438880707e-06, "loss": 0.4376, "step": 2764 }, { "epoch": 0.0814636948867905, "grad_norm": 1.880081566563687, "learning_rate": 8.144329896907216e-06, "loss": 0.5245, "step": 2765 }, { "epoch": 0.08149315734425408, "grad_norm": 1.6345810923097603, "learning_rate": 8.147275405007364e-06, "loss": 0.4359, "step": 2766 }, { "epoch": 0.08152261980171766, "grad_norm": 2.0347549007510892, "learning_rate": 8.15022091310751e-06, "loss": 0.5544, "step": 2767 }, { "epoch": 0.08155208225918124, "grad_norm": 1.8893415844060284, "learning_rate": 8.153166421207659e-06, "loss": 0.5598, "step": 2768 }, { "epoch": 0.08158154471664482, "grad_norm": 1.6771250577637766, "learning_rate": 8.156111929307807e-06, "loss": 0.3883, "step": 2769 }, { "epoch": 0.0816110071741084, "grad_norm": 1.8848566555417867, "learning_rate": 8.159057437407953e-06, "loss": 0.5859, "step": 2770 }, { "epoch": 0.08164046963157197, "grad_norm": 2.3625167644294542, "learning_rate": 8.162002945508101e-06, "loss": 0.7014, "step": 2771 }, { "epoch": 0.08166993208903554, "grad_norm": 1.8515082560752163, "learning_rate": 8.164948453608247e-06, "loss": 0.5462, "step": 2772 }, { "epoch": 0.08169939454649912, "grad_norm": 1.756757898974505, "learning_rate": 8.167893961708395e-06, "loss": 0.5834, "step": 2773 }, { "epoch": 0.08172885700396271, "grad_norm": 1.8965500873152814, "learning_rate": 8.170839469808542e-06, "loss": 0.3877, "step": 2774 }, { "epoch": 0.08175831946142628, "grad_norm": 1.8329589134493296, "learning_rate": 8.17378497790869e-06, "loss": 0.5854, "step": 2775 }, { "epoch": 0.08178778191888986, "grad_norm": 1.9238561838217088, "learning_rate": 8.176730486008838e-06, "loss": 0.6156, "step": 2776 }, { "epoch": 0.08181724437635343, "grad_norm": 1.65379972009175, "learning_rate": 8.179675994108984e-06, "loss": 0.5145, "step": 2777 }, { "epoch": 0.081846706833817, "grad_norm": 1.8410525875471722, "learning_rate": 8.182621502209132e-06, "loss": 0.641, "step": 2778 }, { "epoch": 0.08187616929128058, "grad_norm": 1.9553020309392082, "learning_rate": 8.185567010309278e-06, "loss": 0.6795, "step": 2779 }, { "epoch": 0.08190563174874416, "grad_norm": 1.9312643543793309, "learning_rate": 8.188512518409426e-06, "loss": 0.5342, "step": 2780 }, { "epoch": 0.08193509420620775, "grad_norm": 1.831409202916718, "learning_rate": 8.191458026509573e-06, "loss": 0.4739, "step": 2781 }, { "epoch": 0.08196455666367132, "grad_norm": 1.9645896486433716, "learning_rate": 8.19440353460972e-06, "loss": 0.4719, "step": 2782 }, { "epoch": 0.0819940191211349, "grad_norm": 1.9631883101249696, "learning_rate": 8.197349042709867e-06, "loss": 0.565, "step": 2783 }, { "epoch": 0.08202348157859847, "grad_norm": 1.7453626387910468, "learning_rate": 8.200294550810015e-06, "loss": 0.502, "step": 2784 }, { "epoch": 0.08205294403606204, "grad_norm": 1.587544484813044, "learning_rate": 8.203240058910163e-06, "loss": 0.4619, "step": 2785 }, { "epoch": 0.08208240649352562, "grad_norm": 2.0508865065741917, "learning_rate": 8.20618556701031e-06, "loss": 0.6816, "step": 2786 }, { "epoch": 0.08211186895098921, "grad_norm": 1.8673716806601486, "learning_rate": 8.209131075110457e-06, "loss": 0.4489, "step": 2787 }, { "epoch": 0.08214133140845278, "grad_norm": 1.9187958031232126, "learning_rate": 8.212076583210604e-06, "loss": 0.4782, "step": 2788 }, { "epoch": 0.08217079386591636, "grad_norm": 1.7301490771681767, "learning_rate": 8.215022091310752e-06, "loss": 0.571, "step": 2789 }, { "epoch": 0.08220025632337993, "grad_norm": 1.6950604231550779, "learning_rate": 8.217967599410898e-06, "loss": 0.5276, "step": 2790 }, { "epoch": 0.08222971878084351, "grad_norm": 1.8400996313739582, "learning_rate": 8.220913107511046e-06, "loss": 0.5672, "step": 2791 }, { "epoch": 0.08225918123830708, "grad_norm": 1.8821509017466747, "learning_rate": 8.223858615611194e-06, "loss": 0.738, "step": 2792 }, { "epoch": 0.08228864369577066, "grad_norm": 2.077877217942586, "learning_rate": 8.22680412371134e-06, "loss": 0.6137, "step": 2793 }, { "epoch": 0.08231810615323425, "grad_norm": 1.877872312803391, "learning_rate": 8.229749631811488e-06, "loss": 0.5527, "step": 2794 }, { "epoch": 0.08234756861069782, "grad_norm": 1.8642579240720671, "learning_rate": 8.232695139911635e-06, "loss": 0.6286, "step": 2795 }, { "epoch": 0.0823770310681614, "grad_norm": 1.999363352228413, "learning_rate": 8.235640648011783e-06, "loss": 0.6255, "step": 2796 }, { "epoch": 0.08240649352562497, "grad_norm": 1.7569714403274657, "learning_rate": 8.238586156111929e-06, "loss": 0.5313, "step": 2797 }, { "epoch": 0.08243595598308855, "grad_norm": 2.0468560816780874, "learning_rate": 8.241531664212077e-06, "loss": 0.5823, "step": 2798 }, { "epoch": 0.08246541844055212, "grad_norm": 1.8329703684799576, "learning_rate": 8.244477172312223e-06, "loss": 0.4738, "step": 2799 }, { "epoch": 0.08249488089801571, "grad_norm": 2.028915595798761, "learning_rate": 8.247422680412371e-06, "loss": 0.7381, "step": 2800 }, { "epoch": 0.08252434335547928, "grad_norm": 1.8082137018283233, "learning_rate": 8.25036818851252e-06, "loss": 0.5447, "step": 2801 }, { "epoch": 0.08255380581294286, "grad_norm": 1.8785325215536763, "learning_rate": 8.253313696612666e-06, "loss": 0.545, "step": 2802 }, { "epoch": 0.08258326827040643, "grad_norm": 1.957989182939517, "learning_rate": 8.256259204712814e-06, "loss": 0.6837, "step": 2803 }, { "epoch": 0.08261273072787001, "grad_norm": 1.805015864725742, "learning_rate": 8.25920471281296e-06, "loss": 0.4212, "step": 2804 }, { "epoch": 0.08264219318533358, "grad_norm": 1.992896115706211, "learning_rate": 8.262150220913108e-06, "loss": 0.6167, "step": 2805 }, { "epoch": 0.08267165564279716, "grad_norm": 1.7458245499049314, "learning_rate": 8.265095729013254e-06, "loss": 0.5441, "step": 2806 }, { "epoch": 0.08270111810026075, "grad_norm": 2.6428641501278247, "learning_rate": 8.268041237113402e-06, "loss": 0.7228, "step": 2807 }, { "epoch": 0.08273058055772432, "grad_norm": 1.6779687431006054, "learning_rate": 8.27098674521355e-06, "loss": 0.6115, "step": 2808 }, { "epoch": 0.0827600430151879, "grad_norm": 1.90790272292341, "learning_rate": 8.273932253313697e-06, "loss": 0.6104, "step": 2809 }, { "epoch": 0.08278950547265147, "grad_norm": 1.6968443540745914, "learning_rate": 8.276877761413845e-06, "loss": 0.3903, "step": 2810 }, { "epoch": 0.08281896793011505, "grad_norm": 1.8017673537786645, "learning_rate": 8.279823269513991e-06, "loss": 0.6373, "step": 2811 }, { "epoch": 0.08284843038757862, "grad_norm": 2.0594750850323877, "learning_rate": 8.282768777614139e-06, "loss": 0.6095, "step": 2812 }, { "epoch": 0.08287789284504221, "grad_norm": 1.8440721445008448, "learning_rate": 8.285714285714287e-06, "loss": 0.6892, "step": 2813 }, { "epoch": 0.08290735530250579, "grad_norm": 2.0747871421748325, "learning_rate": 8.288659793814435e-06, "loss": 0.6224, "step": 2814 }, { "epoch": 0.08293681775996936, "grad_norm": 1.7708087420920209, "learning_rate": 8.291605301914581e-06, "loss": 0.5512, "step": 2815 }, { "epoch": 0.08296628021743294, "grad_norm": 1.650444738663226, "learning_rate": 8.29455081001473e-06, "loss": 0.4668, "step": 2816 }, { "epoch": 0.08299574267489651, "grad_norm": 1.9263112536160922, "learning_rate": 8.297496318114876e-06, "loss": 0.696, "step": 2817 }, { "epoch": 0.08302520513236009, "grad_norm": 1.675196187056921, "learning_rate": 8.300441826215024e-06, "loss": 0.506, "step": 2818 }, { "epoch": 0.08305466758982366, "grad_norm": 1.9049525649144685, "learning_rate": 8.30338733431517e-06, "loss": 0.6763, "step": 2819 }, { "epoch": 0.08308413004728725, "grad_norm": 2.0664268106712336, "learning_rate": 8.306332842415318e-06, "loss": 0.8023, "step": 2820 }, { "epoch": 0.08311359250475082, "grad_norm": 1.7819275081260784, "learning_rate": 8.309278350515464e-06, "loss": 0.6328, "step": 2821 }, { "epoch": 0.0831430549622144, "grad_norm": 1.7622101594108914, "learning_rate": 8.312223858615612e-06, "loss": 0.5713, "step": 2822 }, { "epoch": 0.08317251741967797, "grad_norm": 2.0046819377400444, "learning_rate": 8.31516936671576e-06, "loss": 0.6989, "step": 2823 }, { "epoch": 0.08320197987714155, "grad_norm": 2.029958598586329, "learning_rate": 8.318114874815907e-06, "loss": 0.7229, "step": 2824 }, { "epoch": 0.08323144233460512, "grad_norm": 1.8454960053223306, "learning_rate": 8.321060382916055e-06, "loss": 0.6564, "step": 2825 }, { "epoch": 0.08326090479206871, "grad_norm": 1.8424134105104915, "learning_rate": 8.324005891016201e-06, "loss": 0.5463, "step": 2826 }, { "epoch": 0.08329036724953229, "grad_norm": 2.0356496402009676, "learning_rate": 8.326951399116349e-06, "loss": 0.418, "step": 2827 }, { "epoch": 0.08331982970699586, "grad_norm": 1.7678258044956103, "learning_rate": 8.329896907216495e-06, "loss": 0.5852, "step": 2828 }, { "epoch": 0.08334929216445944, "grad_norm": 1.8558620831968644, "learning_rate": 8.332842415316643e-06, "loss": 0.6399, "step": 2829 }, { "epoch": 0.08337875462192301, "grad_norm": 1.597395646453323, "learning_rate": 8.335787923416791e-06, "loss": 0.4637, "step": 2830 }, { "epoch": 0.08340821707938659, "grad_norm": 1.883733772599934, "learning_rate": 8.338733431516938e-06, "loss": 0.5895, "step": 2831 }, { "epoch": 0.08343767953685016, "grad_norm": 1.6597021299288235, "learning_rate": 8.341678939617086e-06, "loss": 0.4402, "step": 2832 }, { "epoch": 0.08346714199431375, "grad_norm": 1.8105540252801982, "learning_rate": 8.344624447717232e-06, "loss": 0.5478, "step": 2833 }, { "epoch": 0.08349660445177733, "grad_norm": 1.8990982590847405, "learning_rate": 8.34756995581738e-06, "loss": 0.605, "step": 2834 }, { "epoch": 0.0835260669092409, "grad_norm": 1.8051570400929302, "learning_rate": 8.350515463917526e-06, "loss": 0.3743, "step": 2835 }, { "epoch": 0.08355552936670448, "grad_norm": 1.9826869915086525, "learning_rate": 8.353460972017674e-06, "loss": 0.488, "step": 2836 }, { "epoch": 0.08358499182416805, "grad_norm": 1.7454020586211734, "learning_rate": 8.35640648011782e-06, "loss": 0.4927, "step": 2837 }, { "epoch": 0.08361445428163163, "grad_norm": 1.7634006600709318, "learning_rate": 8.359351988217969e-06, "loss": 0.3974, "step": 2838 }, { "epoch": 0.08364391673909521, "grad_norm": 1.780711497969966, "learning_rate": 8.362297496318117e-06, "loss": 0.668, "step": 2839 }, { "epoch": 0.08367337919655879, "grad_norm": 1.8782556808170414, "learning_rate": 8.365243004418263e-06, "loss": 0.6713, "step": 2840 }, { "epoch": 0.08370284165402236, "grad_norm": 1.7451723993465058, "learning_rate": 8.368188512518411e-06, "loss": 0.4596, "step": 2841 }, { "epoch": 0.08373230411148594, "grad_norm": 1.7092784878978196, "learning_rate": 8.371134020618557e-06, "loss": 0.5797, "step": 2842 }, { "epoch": 0.08376176656894951, "grad_norm": 1.8888368655247523, "learning_rate": 8.374079528718705e-06, "loss": 0.362, "step": 2843 }, { "epoch": 0.08379122902641309, "grad_norm": 1.887758162966216, "learning_rate": 8.377025036818852e-06, "loss": 0.3794, "step": 2844 }, { "epoch": 0.08382069148387666, "grad_norm": 1.865421402152161, "learning_rate": 8.379970544919e-06, "loss": 0.5781, "step": 2845 }, { "epoch": 0.08385015394134025, "grad_norm": 1.8627546623865845, "learning_rate": 8.382916053019148e-06, "loss": 0.4471, "step": 2846 }, { "epoch": 0.08387961639880383, "grad_norm": 1.713722302106709, "learning_rate": 8.385861561119294e-06, "loss": 0.5063, "step": 2847 }, { "epoch": 0.0839090788562674, "grad_norm": 1.648474146714854, "learning_rate": 8.388807069219442e-06, "loss": 0.4767, "step": 2848 }, { "epoch": 0.08393854131373098, "grad_norm": 1.818292288845019, "learning_rate": 8.391752577319588e-06, "loss": 0.5887, "step": 2849 }, { "epoch": 0.08396800377119455, "grad_norm": 1.9997789916416906, "learning_rate": 8.394698085419736e-06, "loss": 0.4896, "step": 2850 }, { "epoch": 0.08399746622865813, "grad_norm": 1.9677800649466342, "learning_rate": 8.397643593519882e-06, "loss": 0.4975, "step": 2851 }, { "epoch": 0.08402692868612172, "grad_norm": 1.8189768690226524, "learning_rate": 8.40058910162003e-06, "loss": 0.5552, "step": 2852 }, { "epoch": 0.08405639114358529, "grad_norm": 1.8078162922124676, "learning_rate": 8.403534609720177e-06, "loss": 0.7027, "step": 2853 }, { "epoch": 0.08408585360104887, "grad_norm": 2.0058141498157704, "learning_rate": 8.406480117820325e-06, "loss": 0.5748, "step": 2854 }, { "epoch": 0.08411531605851244, "grad_norm": 1.82187751749321, "learning_rate": 8.409425625920473e-06, "loss": 0.4249, "step": 2855 }, { "epoch": 0.08414477851597602, "grad_norm": 1.6178730597310702, "learning_rate": 8.412371134020619e-06, "loss": 0.4135, "step": 2856 }, { "epoch": 0.08417424097343959, "grad_norm": 1.786957663035851, "learning_rate": 8.415316642120767e-06, "loss": 0.5619, "step": 2857 }, { "epoch": 0.08420370343090317, "grad_norm": 1.8321637386705767, "learning_rate": 8.418262150220913e-06, "loss": 0.582, "step": 2858 }, { "epoch": 0.08423316588836675, "grad_norm": 1.6819542017142517, "learning_rate": 8.421207658321061e-06, "loss": 0.3915, "step": 2859 }, { "epoch": 0.08426262834583033, "grad_norm": 2.1416707175147294, "learning_rate": 8.424153166421208e-06, "loss": 0.5753, "step": 2860 }, { "epoch": 0.0842920908032939, "grad_norm": 1.9748926008526302, "learning_rate": 8.427098674521356e-06, "loss": 0.4927, "step": 2861 }, { "epoch": 0.08432155326075748, "grad_norm": 2.026622273664218, "learning_rate": 8.430044182621502e-06, "loss": 0.5698, "step": 2862 }, { "epoch": 0.08435101571822105, "grad_norm": 1.7889871463749993, "learning_rate": 8.43298969072165e-06, "loss": 0.5713, "step": 2863 }, { "epoch": 0.08438047817568463, "grad_norm": 1.9571833385915833, "learning_rate": 8.435935198821798e-06, "loss": 0.5862, "step": 2864 }, { "epoch": 0.08440994063314822, "grad_norm": 2.018408458024371, "learning_rate": 8.438880706921944e-06, "loss": 0.7562, "step": 2865 }, { "epoch": 0.08443940309061179, "grad_norm": 1.9497371578706368, "learning_rate": 8.441826215022092e-06, "loss": 0.5608, "step": 2866 }, { "epoch": 0.08446886554807537, "grad_norm": 1.969484679565503, "learning_rate": 8.444771723122239e-06, "loss": 0.5727, "step": 2867 }, { "epoch": 0.08449832800553894, "grad_norm": 1.7453153897906024, "learning_rate": 8.447717231222387e-06, "loss": 0.5332, "step": 2868 }, { "epoch": 0.08452779046300252, "grad_norm": 2.0953479360481535, "learning_rate": 8.450662739322533e-06, "loss": 0.6207, "step": 2869 }, { "epoch": 0.08455725292046609, "grad_norm": 1.7688474596809578, "learning_rate": 8.453608247422681e-06, "loss": 0.5098, "step": 2870 }, { "epoch": 0.08458671537792967, "grad_norm": 1.8936055726129606, "learning_rate": 8.456553755522829e-06, "loss": 0.5751, "step": 2871 }, { "epoch": 0.08461617783539326, "grad_norm": 1.990085753314957, "learning_rate": 8.459499263622975e-06, "loss": 0.589, "step": 2872 }, { "epoch": 0.08464564029285683, "grad_norm": 1.900956721267684, "learning_rate": 8.462444771723123e-06, "loss": 0.6226, "step": 2873 }, { "epoch": 0.0846751027503204, "grad_norm": 1.7385765341032422, "learning_rate": 8.46539027982327e-06, "loss": 0.5434, "step": 2874 }, { "epoch": 0.08470456520778398, "grad_norm": 1.9145815736296694, "learning_rate": 8.468335787923418e-06, "loss": 0.4993, "step": 2875 }, { "epoch": 0.08473402766524755, "grad_norm": 1.9311715287809494, "learning_rate": 8.471281296023564e-06, "loss": 0.5726, "step": 2876 }, { "epoch": 0.08476349012271113, "grad_norm": 1.8279685610164758, "learning_rate": 8.474226804123712e-06, "loss": 0.569, "step": 2877 }, { "epoch": 0.08479295258017472, "grad_norm": 1.7141242460415673, "learning_rate": 8.477172312223858e-06, "loss": 0.4573, "step": 2878 }, { "epoch": 0.0848224150376383, "grad_norm": 2.0000237745639784, "learning_rate": 8.480117820324006e-06, "loss": 0.6441, "step": 2879 }, { "epoch": 0.08485187749510187, "grad_norm": 2.0226398188638997, "learning_rate": 8.483063328424154e-06, "loss": 0.5045, "step": 2880 }, { "epoch": 0.08488133995256544, "grad_norm": 1.9264752014512951, "learning_rate": 8.4860088365243e-06, "loss": 0.5703, "step": 2881 }, { "epoch": 0.08491080241002902, "grad_norm": 2.217929166418216, "learning_rate": 8.488954344624449e-06, "loss": 0.6185, "step": 2882 }, { "epoch": 0.0849402648674926, "grad_norm": 2.074681698294625, "learning_rate": 8.491899852724595e-06, "loss": 0.4859, "step": 2883 }, { "epoch": 0.08496972732495617, "grad_norm": 2.097912270594017, "learning_rate": 8.494845360824743e-06, "loss": 0.6766, "step": 2884 }, { "epoch": 0.08499918978241976, "grad_norm": 2.197404422670256, "learning_rate": 8.49779086892489e-06, "loss": 0.7304, "step": 2885 }, { "epoch": 0.08502865223988333, "grad_norm": 1.9344745961899614, "learning_rate": 8.500736377025037e-06, "loss": 0.6259, "step": 2886 }, { "epoch": 0.0850581146973469, "grad_norm": 1.8952760796155352, "learning_rate": 8.503681885125185e-06, "loss": 0.4905, "step": 2887 }, { "epoch": 0.08508757715481048, "grad_norm": 1.9870541285632317, "learning_rate": 8.506627393225332e-06, "loss": 0.5385, "step": 2888 }, { "epoch": 0.08511703961227406, "grad_norm": 1.8770133790881918, "learning_rate": 8.50957290132548e-06, "loss": 0.5506, "step": 2889 }, { "epoch": 0.08514650206973763, "grad_norm": 1.906409254878801, "learning_rate": 8.512518409425626e-06, "loss": 0.6644, "step": 2890 }, { "epoch": 0.08517596452720122, "grad_norm": 1.7511287425204487, "learning_rate": 8.515463917525774e-06, "loss": 0.5143, "step": 2891 }, { "epoch": 0.0852054269846648, "grad_norm": 1.8842249085704892, "learning_rate": 8.51840942562592e-06, "loss": 0.6104, "step": 2892 }, { "epoch": 0.08523488944212837, "grad_norm": 2.0740478005120364, "learning_rate": 8.521354933726068e-06, "loss": 0.6965, "step": 2893 }, { "epoch": 0.08526435189959194, "grad_norm": 1.9821082442559734, "learning_rate": 8.524300441826215e-06, "loss": 0.6985, "step": 2894 }, { "epoch": 0.08529381435705552, "grad_norm": 1.8173311001593309, "learning_rate": 8.527245949926363e-06, "loss": 0.6366, "step": 2895 }, { "epoch": 0.0853232768145191, "grad_norm": 1.7826380918981415, "learning_rate": 8.53019145802651e-06, "loss": 0.3841, "step": 2896 }, { "epoch": 0.08535273927198267, "grad_norm": 1.7822663214671304, "learning_rate": 8.533136966126657e-06, "loss": 0.6153, "step": 2897 }, { "epoch": 0.08538220172944626, "grad_norm": 1.713520325172757, "learning_rate": 8.536082474226805e-06, "loss": 0.5643, "step": 2898 }, { "epoch": 0.08541166418690983, "grad_norm": 1.9123853897269796, "learning_rate": 8.539027982326951e-06, "loss": 0.5558, "step": 2899 }, { "epoch": 0.08544112664437341, "grad_norm": 1.9270697676984943, "learning_rate": 8.5419734904271e-06, "loss": 0.7134, "step": 2900 }, { "epoch": 0.08547058910183698, "grad_norm": 1.8674368060913749, "learning_rate": 8.544918998527246e-06, "loss": 0.6122, "step": 2901 }, { "epoch": 0.08550005155930056, "grad_norm": 1.9460667954362647, "learning_rate": 8.547864506627394e-06, "loss": 0.5488, "step": 2902 }, { "epoch": 0.08552951401676413, "grad_norm": 1.6574794690548278, "learning_rate": 8.550810014727542e-06, "loss": 0.5213, "step": 2903 }, { "epoch": 0.08555897647422772, "grad_norm": 2.3552748003732202, "learning_rate": 8.553755522827688e-06, "loss": 0.5698, "step": 2904 }, { "epoch": 0.0855884389316913, "grad_norm": 1.8363512947066667, "learning_rate": 8.556701030927836e-06, "loss": 0.5339, "step": 2905 }, { "epoch": 0.08561790138915487, "grad_norm": 1.8231510340576074, "learning_rate": 8.559646539027982e-06, "loss": 0.4979, "step": 2906 }, { "epoch": 0.08564736384661845, "grad_norm": 1.7771642525270976, "learning_rate": 8.56259204712813e-06, "loss": 0.5197, "step": 2907 }, { "epoch": 0.08567682630408202, "grad_norm": 2.1027035231830866, "learning_rate": 8.565537555228277e-06, "loss": 0.6942, "step": 2908 }, { "epoch": 0.0857062887615456, "grad_norm": 1.9953002872845091, "learning_rate": 8.568483063328425e-06, "loss": 0.7019, "step": 2909 }, { "epoch": 0.08573575121900917, "grad_norm": 2.188880730404866, "learning_rate": 8.571428571428571e-06, "loss": 0.6343, "step": 2910 }, { "epoch": 0.08576521367647276, "grad_norm": 1.8071210670408762, "learning_rate": 8.574374079528719e-06, "loss": 0.5676, "step": 2911 }, { "epoch": 0.08579467613393633, "grad_norm": 1.7843751610159737, "learning_rate": 8.577319587628867e-06, "loss": 0.5232, "step": 2912 }, { "epoch": 0.08582413859139991, "grad_norm": 1.7986719139725398, "learning_rate": 8.580265095729013e-06, "loss": 0.5294, "step": 2913 }, { "epoch": 0.08585360104886348, "grad_norm": 1.8921215068757307, "learning_rate": 8.583210603829161e-06, "loss": 0.5562, "step": 2914 }, { "epoch": 0.08588306350632706, "grad_norm": 1.8015075875252262, "learning_rate": 8.586156111929308e-06, "loss": 0.5064, "step": 2915 }, { "epoch": 0.08591252596379063, "grad_norm": 2.2062562323483395, "learning_rate": 8.589101620029456e-06, "loss": 0.5564, "step": 2916 }, { "epoch": 0.08594198842125422, "grad_norm": 1.753150561602827, "learning_rate": 8.592047128129602e-06, "loss": 0.4076, "step": 2917 }, { "epoch": 0.0859714508787178, "grad_norm": 1.9386696204678147, "learning_rate": 8.594992636229752e-06, "loss": 0.5729, "step": 2918 }, { "epoch": 0.08600091333618137, "grad_norm": 1.8243372745427608, "learning_rate": 8.597938144329898e-06, "loss": 0.4602, "step": 2919 }, { "epoch": 0.08603037579364495, "grad_norm": 2.0033999704391237, "learning_rate": 8.600883652430046e-06, "loss": 0.619, "step": 2920 }, { "epoch": 0.08605983825110852, "grad_norm": 1.8194412605520087, "learning_rate": 8.603829160530192e-06, "loss": 0.5691, "step": 2921 }, { "epoch": 0.0860893007085721, "grad_norm": 1.829360537316156, "learning_rate": 8.60677466863034e-06, "loss": 0.4848, "step": 2922 }, { "epoch": 0.08611876316603567, "grad_norm": 1.7827478847242655, "learning_rate": 8.609720176730487e-06, "loss": 0.4387, "step": 2923 }, { "epoch": 0.08614822562349926, "grad_norm": 1.8661593007220088, "learning_rate": 8.612665684830635e-06, "loss": 0.5861, "step": 2924 }, { "epoch": 0.08617768808096284, "grad_norm": 1.9378716703627004, "learning_rate": 8.615611192930783e-06, "loss": 0.6652, "step": 2925 }, { "epoch": 0.08620715053842641, "grad_norm": 1.748037728268332, "learning_rate": 8.618556701030929e-06, "loss": 0.6151, "step": 2926 }, { "epoch": 0.08623661299588999, "grad_norm": 1.6018265740870576, "learning_rate": 8.621502209131077e-06, "loss": 0.4427, "step": 2927 }, { "epoch": 0.08626607545335356, "grad_norm": 2.2018760354382585, "learning_rate": 8.624447717231223e-06, "loss": 0.4544, "step": 2928 }, { "epoch": 0.08629553791081714, "grad_norm": 1.7410392203740743, "learning_rate": 8.627393225331371e-06, "loss": 0.5533, "step": 2929 }, { "epoch": 0.08632500036828072, "grad_norm": 1.6977208862017765, "learning_rate": 8.630338733431518e-06, "loss": 0.4931, "step": 2930 }, { "epoch": 0.0863544628257443, "grad_norm": 1.7005022177572102, "learning_rate": 8.633284241531666e-06, "loss": 0.3616, "step": 2931 }, { "epoch": 0.08638392528320787, "grad_norm": 1.7208193349887904, "learning_rate": 8.636229749631812e-06, "loss": 0.5467, "step": 2932 }, { "epoch": 0.08641338774067145, "grad_norm": 1.7721068599302343, "learning_rate": 8.63917525773196e-06, "loss": 0.5399, "step": 2933 }, { "epoch": 0.08644285019813502, "grad_norm": 2.0737641973158016, "learning_rate": 8.642120765832108e-06, "loss": 0.484, "step": 2934 }, { "epoch": 0.0864723126555986, "grad_norm": 2.0821850467774556, "learning_rate": 8.645066273932254e-06, "loss": 0.5689, "step": 2935 }, { "epoch": 0.08650177511306217, "grad_norm": 2.084834775630052, "learning_rate": 8.648011782032402e-06, "loss": 0.6348, "step": 2936 }, { "epoch": 0.08653123757052576, "grad_norm": 1.8351002178237608, "learning_rate": 8.650957290132549e-06, "loss": 0.7007, "step": 2937 }, { "epoch": 0.08656070002798934, "grad_norm": 1.8647533263309337, "learning_rate": 8.653902798232697e-06, "loss": 0.4044, "step": 2938 }, { "epoch": 0.08659016248545291, "grad_norm": 1.8706201745780764, "learning_rate": 8.656848306332843e-06, "loss": 0.4687, "step": 2939 }, { "epoch": 0.08661962494291649, "grad_norm": 1.8319606742963477, "learning_rate": 8.65979381443299e-06, "loss": 0.5059, "step": 2940 }, { "epoch": 0.08664908740038006, "grad_norm": 1.8068446247204846, "learning_rate": 8.662739322533137e-06, "loss": 0.572, "step": 2941 }, { "epoch": 0.08667854985784364, "grad_norm": 1.7542007226984195, "learning_rate": 8.665684830633285e-06, "loss": 0.5608, "step": 2942 }, { "epoch": 0.08670801231530723, "grad_norm": 1.825320124521482, "learning_rate": 8.668630338733433e-06, "loss": 0.5467, "step": 2943 }, { "epoch": 0.0867374747727708, "grad_norm": 1.6668345429835456, "learning_rate": 8.67157584683358e-06, "loss": 0.479, "step": 2944 }, { "epoch": 0.08676693723023438, "grad_norm": 1.784549672201226, "learning_rate": 8.674521354933727e-06, "loss": 0.5674, "step": 2945 }, { "epoch": 0.08679639968769795, "grad_norm": 1.8375530467493355, "learning_rate": 8.677466863033874e-06, "loss": 0.6022, "step": 2946 }, { "epoch": 0.08682586214516153, "grad_norm": 1.821705389852354, "learning_rate": 8.680412371134022e-06, "loss": 0.5196, "step": 2947 }, { "epoch": 0.0868553246026251, "grad_norm": 1.907948159579075, "learning_rate": 8.683357879234168e-06, "loss": 0.6548, "step": 2948 }, { "epoch": 0.08688478706008868, "grad_norm": 1.8154551809575226, "learning_rate": 8.686303387334316e-06, "loss": 0.6538, "step": 2949 }, { "epoch": 0.08691424951755226, "grad_norm": 1.8881235274963657, "learning_rate": 8.689248895434464e-06, "loss": 0.5868, "step": 2950 }, { "epoch": 0.08694371197501584, "grad_norm": 1.7517530636265857, "learning_rate": 8.69219440353461e-06, "loss": 0.4723, "step": 2951 }, { "epoch": 0.08697317443247941, "grad_norm": 1.8779710786017076, "learning_rate": 8.695139911634758e-06, "loss": 0.532, "step": 2952 }, { "epoch": 0.08700263688994299, "grad_norm": 1.785554061180321, "learning_rate": 8.698085419734905e-06, "loss": 0.5912, "step": 2953 }, { "epoch": 0.08703209934740656, "grad_norm": 1.640257360840349, "learning_rate": 8.701030927835053e-06, "loss": 0.5117, "step": 2954 }, { "epoch": 0.08706156180487014, "grad_norm": 2.142143267665508, "learning_rate": 8.703976435935199e-06, "loss": 0.4785, "step": 2955 }, { "epoch": 0.08709102426233373, "grad_norm": 1.5884082382977869, "learning_rate": 8.706921944035347e-06, "loss": 0.4368, "step": 2956 }, { "epoch": 0.0871204867197973, "grad_norm": 1.761031912700991, "learning_rate": 8.709867452135493e-06, "loss": 0.4334, "step": 2957 }, { "epoch": 0.08714994917726088, "grad_norm": 1.9213462772699306, "learning_rate": 8.712812960235641e-06, "loss": 0.529, "step": 2958 }, { "epoch": 0.08717941163472445, "grad_norm": 1.7915208991350742, "learning_rate": 8.71575846833579e-06, "loss": 0.5157, "step": 2959 }, { "epoch": 0.08720887409218803, "grad_norm": 2.1288713578911382, "learning_rate": 8.718703976435936e-06, "loss": 0.5927, "step": 2960 }, { "epoch": 0.0872383365496516, "grad_norm": 1.8197053677169115, "learning_rate": 8.721649484536084e-06, "loss": 0.4161, "step": 2961 }, { "epoch": 0.08726779900711518, "grad_norm": 1.7258582011497405, "learning_rate": 8.72459499263623e-06, "loss": 0.5868, "step": 2962 }, { "epoch": 0.08729726146457877, "grad_norm": 1.8520318945410983, "learning_rate": 8.727540500736378e-06, "loss": 0.4242, "step": 2963 }, { "epoch": 0.08732672392204234, "grad_norm": 1.7046218537386657, "learning_rate": 8.730486008836524e-06, "loss": 0.5796, "step": 2964 }, { "epoch": 0.08735618637950592, "grad_norm": 1.7587828818641493, "learning_rate": 8.733431516936672e-06, "loss": 0.4337, "step": 2965 }, { "epoch": 0.08738564883696949, "grad_norm": 2.081869178001491, "learning_rate": 8.73637702503682e-06, "loss": 0.3737, "step": 2966 }, { "epoch": 0.08741511129443306, "grad_norm": 1.9859372039620933, "learning_rate": 8.739322533136967e-06, "loss": 0.5881, "step": 2967 }, { "epoch": 0.08744457375189664, "grad_norm": 2.1039550572212615, "learning_rate": 8.742268041237115e-06, "loss": 0.763, "step": 2968 }, { "epoch": 0.08747403620936023, "grad_norm": 1.8349054626483996, "learning_rate": 8.745213549337261e-06, "loss": 0.6001, "step": 2969 }, { "epoch": 0.0875034986668238, "grad_norm": 1.7717274902903404, "learning_rate": 8.748159057437409e-06, "loss": 0.4513, "step": 2970 }, { "epoch": 0.08753296112428738, "grad_norm": 1.9023487477976226, "learning_rate": 8.751104565537555e-06, "loss": 0.5947, "step": 2971 }, { "epoch": 0.08756242358175095, "grad_norm": 1.5938840780753971, "learning_rate": 8.754050073637703e-06, "loss": 0.5327, "step": 2972 }, { "epoch": 0.08759188603921453, "grad_norm": 1.8340791497154771, "learning_rate": 8.75699558173785e-06, "loss": 0.5105, "step": 2973 }, { "epoch": 0.0876213484966781, "grad_norm": 1.8190182324042916, "learning_rate": 8.759941089837998e-06, "loss": 0.5225, "step": 2974 }, { "epoch": 0.08765081095414169, "grad_norm": 1.9149996717966729, "learning_rate": 8.762886597938146e-06, "loss": 0.4565, "step": 2975 }, { "epoch": 0.08768027341160527, "grad_norm": 1.5942700111060442, "learning_rate": 8.765832106038292e-06, "loss": 0.4773, "step": 2976 }, { "epoch": 0.08770973586906884, "grad_norm": 1.6538640911275222, "learning_rate": 8.76877761413844e-06, "loss": 0.4789, "step": 2977 }, { "epoch": 0.08773919832653242, "grad_norm": 2.007874707682959, "learning_rate": 8.771723122238586e-06, "loss": 0.551, "step": 2978 }, { "epoch": 0.08776866078399599, "grad_norm": 1.9169785785639155, "learning_rate": 8.774668630338734e-06, "loss": 0.5443, "step": 2979 }, { "epoch": 0.08779812324145957, "grad_norm": 2.064368623754679, "learning_rate": 8.77761413843888e-06, "loss": 0.5884, "step": 2980 }, { "epoch": 0.08782758569892314, "grad_norm": 1.6919348230056386, "learning_rate": 8.780559646539029e-06, "loss": 0.5531, "step": 2981 }, { "epoch": 0.08785704815638673, "grad_norm": 2.006469021084075, "learning_rate": 8.783505154639177e-06, "loss": 0.5625, "step": 2982 }, { "epoch": 0.0878865106138503, "grad_norm": 1.9034233881033706, "learning_rate": 8.786450662739323e-06, "loss": 0.6486, "step": 2983 }, { "epoch": 0.08791597307131388, "grad_norm": 2.0344870045517065, "learning_rate": 8.789396170839471e-06, "loss": 0.7725, "step": 2984 }, { "epoch": 0.08794543552877745, "grad_norm": 1.7475017363910306, "learning_rate": 8.792341678939617e-06, "loss": 0.5975, "step": 2985 }, { "epoch": 0.08797489798624103, "grad_norm": 1.7476352027039084, "learning_rate": 8.795287187039765e-06, "loss": 0.4976, "step": 2986 }, { "epoch": 0.0880043604437046, "grad_norm": 1.8458934877886986, "learning_rate": 8.798232695139912e-06, "loss": 0.4298, "step": 2987 }, { "epoch": 0.0880338229011682, "grad_norm": 1.6929046636229481, "learning_rate": 8.80117820324006e-06, "loss": 0.4732, "step": 2988 }, { "epoch": 0.08806328535863177, "grad_norm": 1.7820023840298542, "learning_rate": 8.804123711340206e-06, "loss": 0.6592, "step": 2989 }, { "epoch": 0.08809274781609534, "grad_norm": 1.9437120831993087, "learning_rate": 8.807069219440354e-06, "loss": 0.7332, "step": 2990 }, { "epoch": 0.08812221027355892, "grad_norm": 1.714005047910738, "learning_rate": 8.810014727540502e-06, "loss": 0.4967, "step": 2991 }, { "epoch": 0.08815167273102249, "grad_norm": 1.9027924616316063, "learning_rate": 8.812960235640648e-06, "loss": 0.654, "step": 2992 }, { "epoch": 0.08818113518848607, "grad_norm": 2.0549945606340296, "learning_rate": 8.815905743740796e-06, "loss": 0.5299, "step": 2993 }, { "epoch": 0.08821059764594964, "grad_norm": 1.7001731034348826, "learning_rate": 8.818851251840943e-06, "loss": 0.4433, "step": 2994 }, { "epoch": 0.08824006010341323, "grad_norm": 1.9342674241799522, "learning_rate": 8.82179675994109e-06, "loss": 0.7506, "step": 2995 }, { "epoch": 0.0882695225608768, "grad_norm": 1.630005854125575, "learning_rate": 8.824742268041237e-06, "loss": 0.3579, "step": 2996 }, { "epoch": 0.08829898501834038, "grad_norm": 1.7336878917619842, "learning_rate": 8.827687776141385e-06, "loss": 0.4727, "step": 2997 }, { "epoch": 0.08832844747580396, "grad_norm": 1.8646994759500144, "learning_rate": 8.830633284241531e-06, "loss": 0.5191, "step": 2998 }, { "epoch": 0.08835790993326753, "grad_norm": 1.7650536929012959, "learning_rate": 8.83357879234168e-06, "loss": 0.4484, "step": 2999 }, { "epoch": 0.0883873723907311, "grad_norm": 2.01250997536739, "learning_rate": 8.836524300441827e-06, "loss": 0.5937, "step": 3000 }, { "epoch": 0.0884168348481947, "grad_norm": 1.7932706651402304, "learning_rate": 8.839469808541974e-06, "loss": 0.6581, "step": 3001 }, { "epoch": 0.08844629730565827, "grad_norm": 1.8057723008404627, "learning_rate": 8.842415316642122e-06, "loss": 0.6063, "step": 3002 }, { "epoch": 0.08847575976312184, "grad_norm": 1.7623245212138416, "learning_rate": 8.845360824742268e-06, "loss": 0.5549, "step": 3003 }, { "epoch": 0.08850522222058542, "grad_norm": 1.9304853738081773, "learning_rate": 8.848306332842416e-06, "loss": 0.6539, "step": 3004 }, { "epoch": 0.088534684678049, "grad_norm": 1.8942652347988527, "learning_rate": 8.851251840942562e-06, "loss": 0.5359, "step": 3005 }, { "epoch": 0.08856414713551257, "grad_norm": 1.8757208717058547, "learning_rate": 8.85419734904271e-06, "loss": 0.4218, "step": 3006 }, { "epoch": 0.08859360959297614, "grad_norm": 1.8654691599436912, "learning_rate": 8.857142857142858e-06, "loss": 0.4585, "step": 3007 }, { "epoch": 0.08862307205043973, "grad_norm": 1.7799126708427382, "learning_rate": 8.860088365243005e-06, "loss": 0.442, "step": 3008 }, { "epoch": 0.08865253450790331, "grad_norm": 2.104992129862829, "learning_rate": 8.863033873343153e-06, "loss": 0.5601, "step": 3009 }, { "epoch": 0.08868199696536688, "grad_norm": 1.7677210187436183, "learning_rate": 8.865979381443299e-06, "loss": 0.5222, "step": 3010 }, { "epoch": 0.08871145942283046, "grad_norm": 1.918444252152058, "learning_rate": 8.868924889543447e-06, "loss": 0.555, "step": 3011 }, { "epoch": 0.08874092188029403, "grad_norm": 1.8852367709264588, "learning_rate": 8.871870397643593e-06, "loss": 0.5326, "step": 3012 }, { "epoch": 0.08877038433775761, "grad_norm": 1.898498475270544, "learning_rate": 8.874815905743741e-06, "loss": 0.4592, "step": 3013 }, { "epoch": 0.0887998467952212, "grad_norm": 1.8308680746447712, "learning_rate": 8.877761413843888e-06, "loss": 0.6559, "step": 3014 }, { "epoch": 0.08882930925268477, "grad_norm": 1.9150127056635082, "learning_rate": 8.880706921944036e-06, "loss": 0.5463, "step": 3015 }, { "epoch": 0.08885877171014835, "grad_norm": 1.9118261499582376, "learning_rate": 8.883652430044184e-06, "loss": 0.4658, "step": 3016 }, { "epoch": 0.08888823416761192, "grad_norm": 1.8704635707082027, "learning_rate": 8.88659793814433e-06, "loss": 0.5212, "step": 3017 }, { "epoch": 0.0889176966250755, "grad_norm": 1.7417833189263685, "learning_rate": 8.889543446244478e-06, "loss": 0.3447, "step": 3018 }, { "epoch": 0.08894715908253907, "grad_norm": 1.7675147197535752, "learning_rate": 8.892488954344624e-06, "loss": 0.4876, "step": 3019 }, { "epoch": 0.08897662154000265, "grad_norm": 2.004499659643354, "learning_rate": 8.895434462444772e-06, "loss": 0.5862, "step": 3020 }, { "epoch": 0.08900608399746623, "grad_norm": 1.8610824489351045, "learning_rate": 8.898379970544919e-06, "loss": 0.5483, "step": 3021 }, { "epoch": 0.08903554645492981, "grad_norm": 1.9312770339933836, "learning_rate": 8.901325478645067e-06, "loss": 0.6022, "step": 3022 }, { "epoch": 0.08906500891239338, "grad_norm": 1.8945245589086688, "learning_rate": 8.904270986745215e-06, "loss": 0.6128, "step": 3023 }, { "epoch": 0.08909447136985696, "grad_norm": 1.4942192985453884, "learning_rate": 8.907216494845363e-06, "loss": 0.4758, "step": 3024 }, { "epoch": 0.08912393382732053, "grad_norm": 2.00773475793439, "learning_rate": 8.910162002945509e-06, "loss": 0.5527, "step": 3025 }, { "epoch": 0.08915339628478411, "grad_norm": 1.903871622452077, "learning_rate": 8.913107511045657e-06, "loss": 0.5356, "step": 3026 }, { "epoch": 0.0891828587422477, "grad_norm": 1.7886236487516836, "learning_rate": 8.916053019145803e-06, "loss": 0.6965, "step": 3027 }, { "epoch": 0.08921232119971127, "grad_norm": 1.8046955566225322, "learning_rate": 8.918998527245951e-06, "loss": 0.4902, "step": 3028 }, { "epoch": 0.08924178365717485, "grad_norm": 1.7484099113690765, "learning_rate": 8.9219440353461e-06, "loss": 0.6481, "step": 3029 }, { "epoch": 0.08927124611463842, "grad_norm": 1.7052695056252463, "learning_rate": 8.924889543446246e-06, "loss": 0.4749, "step": 3030 }, { "epoch": 0.089300708572102, "grad_norm": 1.7057459280074334, "learning_rate": 8.927835051546394e-06, "loss": 0.6371, "step": 3031 }, { "epoch": 0.08933017102956557, "grad_norm": 1.6791428004696622, "learning_rate": 8.93078055964654e-06, "loss": 0.4273, "step": 3032 }, { "epoch": 0.08935963348702915, "grad_norm": 1.7886265831279569, "learning_rate": 8.933726067746688e-06, "loss": 0.7828, "step": 3033 }, { "epoch": 0.08938909594449274, "grad_norm": 1.8436462533130988, "learning_rate": 8.936671575846834e-06, "loss": 0.5797, "step": 3034 }, { "epoch": 0.08941855840195631, "grad_norm": 2.043066539438116, "learning_rate": 8.939617083946982e-06, "loss": 0.6098, "step": 3035 }, { "epoch": 0.08944802085941989, "grad_norm": 1.7639307472318706, "learning_rate": 8.942562592047128e-06, "loss": 0.6707, "step": 3036 }, { "epoch": 0.08947748331688346, "grad_norm": 1.8812650789792693, "learning_rate": 8.945508100147276e-06, "loss": 0.5587, "step": 3037 }, { "epoch": 0.08950694577434704, "grad_norm": 1.9372250845568766, "learning_rate": 8.948453608247424e-06, "loss": 0.5243, "step": 3038 }, { "epoch": 0.08953640823181061, "grad_norm": 1.8300360852008533, "learning_rate": 8.95139911634757e-06, "loss": 0.6966, "step": 3039 }, { "epoch": 0.0895658706892742, "grad_norm": 1.9348799658730722, "learning_rate": 8.954344624447719e-06, "loss": 0.478, "step": 3040 }, { "epoch": 0.08959533314673777, "grad_norm": 1.6588049796570787, "learning_rate": 8.957290132547865e-06, "loss": 0.4592, "step": 3041 }, { "epoch": 0.08962479560420135, "grad_norm": 1.7558911828856631, "learning_rate": 8.960235640648013e-06, "loss": 0.6291, "step": 3042 }, { "epoch": 0.08965425806166492, "grad_norm": 1.832662990060859, "learning_rate": 8.96318114874816e-06, "loss": 0.5742, "step": 3043 }, { "epoch": 0.0896837205191285, "grad_norm": 1.7380329795289873, "learning_rate": 8.966126656848307e-06, "loss": 0.5504, "step": 3044 }, { "epoch": 0.08971318297659207, "grad_norm": 1.814665765158258, "learning_rate": 8.969072164948455e-06, "loss": 0.6165, "step": 3045 }, { "epoch": 0.08974264543405565, "grad_norm": 1.8889738271968155, "learning_rate": 8.972017673048602e-06, "loss": 0.6559, "step": 3046 }, { "epoch": 0.08977210789151924, "grad_norm": 2.059206423612416, "learning_rate": 8.97496318114875e-06, "loss": 0.6143, "step": 3047 }, { "epoch": 0.08980157034898281, "grad_norm": 1.6520935337876537, "learning_rate": 8.977908689248896e-06, "loss": 0.5306, "step": 3048 }, { "epoch": 0.08983103280644639, "grad_norm": 1.858299790665991, "learning_rate": 8.980854197349044e-06, "loss": 0.6311, "step": 3049 }, { "epoch": 0.08986049526390996, "grad_norm": 1.8289011903098844, "learning_rate": 8.98379970544919e-06, "loss": 0.7195, "step": 3050 }, { "epoch": 0.08988995772137354, "grad_norm": 1.9254191449499016, "learning_rate": 8.986745213549338e-06, "loss": 0.6171, "step": 3051 }, { "epoch": 0.08991942017883711, "grad_norm": 2.014071105575231, "learning_rate": 8.989690721649485e-06, "loss": 0.664, "step": 3052 }, { "epoch": 0.0899488826363007, "grad_norm": 1.8871469696463543, "learning_rate": 8.992636229749633e-06, "loss": 0.6234, "step": 3053 }, { "epoch": 0.08997834509376428, "grad_norm": 1.8537046151429695, "learning_rate": 8.99558173784978e-06, "loss": 0.5519, "step": 3054 }, { "epoch": 0.09000780755122785, "grad_norm": 1.7492738393311307, "learning_rate": 8.998527245949927e-06, "loss": 0.6877, "step": 3055 }, { "epoch": 0.09003727000869143, "grad_norm": 1.8964454882018373, "learning_rate": 9.001472754050075e-06, "loss": 0.5681, "step": 3056 }, { "epoch": 0.090066732466155, "grad_norm": 1.6500885599325976, "learning_rate": 9.004418262150221e-06, "loss": 0.4992, "step": 3057 }, { "epoch": 0.09009619492361857, "grad_norm": 1.9514025985102557, "learning_rate": 9.00736377025037e-06, "loss": 0.5119, "step": 3058 }, { "epoch": 0.09012565738108215, "grad_norm": 2.006094974014502, "learning_rate": 9.010309278350516e-06, "loss": 0.5064, "step": 3059 }, { "epoch": 0.09015511983854574, "grad_norm": 1.7785305003635155, "learning_rate": 9.013254786450664e-06, "loss": 0.5438, "step": 3060 }, { "epoch": 0.09018458229600931, "grad_norm": 1.7474577352608456, "learning_rate": 9.016200294550812e-06, "loss": 0.6059, "step": 3061 }, { "epoch": 0.09021404475347289, "grad_norm": 1.8960833207182024, "learning_rate": 9.019145802650958e-06, "loss": 0.5423, "step": 3062 }, { "epoch": 0.09024350721093646, "grad_norm": 2.1922604860132, "learning_rate": 9.022091310751106e-06, "loss": 0.6201, "step": 3063 }, { "epoch": 0.09027296966840004, "grad_norm": 1.8574515738360462, "learning_rate": 9.025036818851252e-06, "loss": 0.6525, "step": 3064 }, { "epoch": 0.09030243212586361, "grad_norm": 2.1243527221264764, "learning_rate": 9.0279823269514e-06, "loss": 0.5807, "step": 3065 }, { "epoch": 0.0903318945833272, "grad_norm": 1.8472265887617425, "learning_rate": 9.030927835051547e-06, "loss": 0.5813, "step": 3066 }, { "epoch": 0.09036135704079078, "grad_norm": 1.4643446058861438, "learning_rate": 9.033873343151695e-06, "loss": 0.4385, "step": 3067 }, { "epoch": 0.09039081949825435, "grad_norm": 1.9145278523510885, "learning_rate": 9.036818851251841e-06, "loss": 0.5793, "step": 3068 }, { "epoch": 0.09042028195571793, "grad_norm": 2.0832224426779002, "learning_rate": 9.039764359351989e-06, "loss": 0.5484, "step": 3069 }, { "epoch": 0.0904497444131815, "grad_norm": 1.9151404482808458, "learning_rate": 9.042709867452137e-06, "loss": 0.6654, "step": 3070 }, { "epoch": 0.09047920687064508, "grad_norm": 1.8944304906198912, "learning_rate": 9.045655375552283e-06, "loss": 0.4985, "step": 3071 }, { "epoch": 0.09050866932810865, "grad_norm": 1.7970602657984598, "learning_rate": 9.048600883652431e-06, "loss": 0.4322, "step": 3072 }, { "epoch": 0.09053813178557224, "grad_norm": 2.090873032675229, "learning_rate": 9.051546391752578e-06, "loss": 0.5989, "step": 3073 }, { "epoch": 0.09056759424303581, "grad_norm": 2.108747056837011, "learning_rate": 9.054491899852726e-06, "loss": 0.6599, "step": 3074 }, { "epoch": 0.09059705670049939, "grad_norm": 1.9176162671560355, "learning_rate": 9.057437407952872e-06, "loss": 0.6408, "step": 3075 }, { "epoch": 0.09062651915796296, "grad_norm": 1.8203728910778774, "learning_rate": 9.06038291605302e-06, "loss": 0.4002, "step": 3076 }, { "epoch": 0.09065598161542654, "grad_norm": 1.7525930031885393, "learning_rate": 9.063328424153168e-06, "loss": 0.5669, "step": 3077 }, { "epoch": 0.09068544407289011, "grad_norm": 1.7974896514177265, "learning_rate": 9.066273932253314e-06, "loss": 0.4695, "step": 3078 }, { "epoch": 0.0907149065303537, "grad_norm": 1.8934161829826053, "learning_rate": 9.069219440353462e-06, "loss": 0.5115, "step": 3079 }, { "epoch": 0.09074436898781728, "grad_norm": 2.0328613732407934, "learning_rate": 9.072164948453609e-06, "loss": 0.678, "step": 3080 }, { "epoch": 0.09077383144528085, "grad_norm": 1.8067212542963325, "learning_rate": 9.075110456553757e-06, "loss": 0.4898, "step": 3081 }, { "epoch": 0.09080329390274443, "grad_norm": 2.079643616378179, "learning_rate": 9.078055964653903e-06, "loss": 0.5374, "step": 3082 }, { "epoch": 0.090832756360208, "grad_norm": 1.7860668544737397, "learning_rate": 9.081001472754051e-06, "loss": 0.5994, "step": 3083 }, { "epoch": 0.09086221881767158, "grad_norm": 1.7102395298497246, "learning_rate": 9.083946980854197e-06, "loss": 0.4752, "step": 3084 }, { "epoch": 0.09089168127513515, "grad_norm": 1.7912939904838419, "learning_rate": 9.086892488954345e-06, "loss": 0.7053, "step": 3085 }, { "epoch": 0.09092114373259874, "grad_norm": 1.9807316254657432, "learning_rate": 9.089837997054493e-06, "loss": 0.6553, "step": 3086 }, { "epoch": 0.09095060619006232, "grad_norm": 1.7424137961473838, "learning_rate": 9.09278350515464e-06, "loss": 0.4966, "step": 3087 }, { "epoch": 0.09098006864752589, "grad_norm": 1.903911755640042, "learning_rate": 9.095729013254788e-06, "loss": 0.6183, "step": 3088 }, { "epoch": 0.09100953110498947, "grad_norm": 1.8436355804016886, "learning_rate": 9.098674521354934e-06, "loss": 0.5504, "step": 3089 }, { "epoch": 0.09103899356245304, "grad_norm": 2.1254090163977235, "learning_rate": 9.101620029455082e-06, "loss": 0.655, "step": 3090 }, { "epoch": 0.09106845601991662, "grad_norm": 1.9182690435445535, "learning_rate": 9.104565537555228e-06, "loss": 0.3807, "step": 3091 }, { "epoch": 0.0910979184773802, "grad_norm": 1.8317488232549686, "learning_rate": 9.107511045655376e-06, "loss": 0.5399, "step": 3092 }, { "epoch": 0.09112738093484378, "grad_norm": 2.206425505202899, "learning_rate": 9.110456553755523e-06, "loss": 0.8007, "step": 3093 }, { "epoch": 0.09115684339230735, "grad_norm": 1.6044071505163453, "learning_rate": 9.11340206185567e-06, "loss": 0.3823, "step": 3094 }, { "epoch": 0.09118630584977093, "grad_norm": 1.5524014104168469, "learning_rate": 9.116347569955819e-06, "loss": 0.3763, "step": 3095 }, { "epoch": 0.0912157683072345, "grad_norm": 1.9806976863203223, "learning_rate": 9.119293078055965e-06, "loss": 0.8978, "step": 3096 }, { "epoch": 0.09124523076469808, "grad_norm": 1.7859664903191215, "learning_rate": 9.122238586156113e-06, "loss": 0.4896, "step": 3097 }, { "epoch": 0.09127469322216165, "grad_norm": 1.9060761196915466, "learning_rate": 9.12518409425626e-06, "loss": 0.7662, "step": 3098 }, { "epoch": 0.09130415567962524, "grad_norm": 2.0133601823083676, "learning_rate": 9.128129602356407e-06, "loss": 0.5462, "step": 3099 }, { "epoch": 0.09133361813708882, "grad_norm": 2.377035906669004, "learning_rate": 9.131075110456554e-06, "loss": 0.615, "step": 3100 }, { "epoch": 0.09136308059455239, "grad_norm": 1.8853234884352261, "learning_rate": 9.134020618556702e-06, "loss": 0.5422, "step": 3101 }, { "epoch": 0.09139254305201597, "grad_norm": 1.7558588884268433, "learning_rate": 9.13696612665685e-06, "loss": 0.4234, "step": 3102 }, { "epoch": 0.09142200550947954, "grad_norm": 1.7592984922623172, "learning_rate": 9.139911634756996e-06, "loss": 0.523, "step": 3103 }, { "epoch": 0.09145146796694312, "grad_norm": 1.8457617766642933, "learning_rate": 9.142857142857144e-06, "loss": 0.5196, "step": 3104 }, { "epoch": 0.0914809304244067, "grad_norm": 1.7540526365456506, "learning_rate": 9.14580265095729e-06, "loss": 0.5931, "step": 3105 }, { "epoch": 0.09151039288187028, "grad_norm": 1.6384058697136175, "learning_rate": 9.148748159057438e-06, "loss": 0.5129, "step": 3106 }, { "epoch": 0.09153985533933386, "grad_norm": 1.917644772849841, "learning_rate": 9.151693667157585e-06, "loss": 0.5985, "step": 3107 }, { "epoch": 0.09156931779679743, "grad_norm": 1.9832547281668174, "learning_rate": 9.154639175257733e-06, "loss": 0.581, "step": 3108 }, { "epoch": 0.091598780254261, "grad_norm": 1.921688016113929, "learning_rate": 9.157584683357879e-06, "loss": 0.6297, "step": 3109 }, { "epoch": 0.09162824271172458, "grad_norm": 1.8844586874941964, "learning_rate": 9.160530191458027e-06, "loss": 0.5624, "step": 3110 }, { "epoch": 0.09165770516918816, "grad_norm": 1.8390099505752375, "learning_rate": 9.163475699558175e-06, "loss": 0.4457, "step": 3111 }, { "epoch": 0.09168716762665174, "grad_norm": 2.083374055323297, "learning_rate": 9.166421207658321e-06, "loss": 0.6172, "step": 3112 }, { "epoch": 0.09171663008411532, "grad_norm": 1.7202716629862647, "learning_rate": 9.16936671575847e-06, "loss": 0.5574, "step": 3113 }, { "epoch": 0.0917460925415789, "grad_norm": 1.9388557402288118, "learning_rate": 9.172312223858616e-06, "loss": 0.6485, "step": 3114 }, { "epoch": 0.09177555499904247, "grad_norm": 1.9494247492750882, "learning_rate": 9.175257731958764e-06, "loss": 0.5009, "step": 3115 }, { "epoch": 0.09180501745650604, "grad_norm": 1.8750084517237335, "learning_rate": 9.17820324005891e-06, "loss": 0.6462, "step": 3116 }, { "epoch": 0.09183447991396962, "grad_norm": 1.680464260274568, "learning_rate": 9.181148748159058e-06, "loss": 0.4392, "step": 3117 }, { "epoch": 0.09186394237143321, "grad_norm": 1.82088914072003, "learning_rate": 9.184094256259206e-06, "loss": 0.4525, "step": 3118 }, { "epoch": 0.09189340482889678, "grad_norm": 1.789282670927745, "learning_rate": 9.187039764359352e-06, "loss": 0.6939, "step": 3119 }, { "epoch": 0.09192286728636036, "grad_norm": 1.5077623069960435, "learning_rate": 9.1899852724595e-06, "loss": 0.4691, "step": 3120 }, { "epoch": 0.09195232974382393, "grad_norm": 1.731772816597009, "learning_rate": 9.192930780559646e-06, "loss": 0.5603, "step": 3121 }, { "epoch": 0.09198179220128751, "grad_norm": 1.9073172424238607, "learning_rate": 9.195876288659794e-06, "loss": 0.5125, "step": 3122 }, { "epoch": 0.09201125465875108, "grad_norm": 1.7070146390528855, "learning_rate": 9.19882179675994e-06, "loss": 0.5637, "step": 3123 }, { "epoch": 0.09204071711621466, "grad_norm": 1.6525004389602564, "learning_rate": 9.201767304860089e-06, "loss": 0.5121, "step": 3124 }, { "epoch": 0.09207017957367825, "grad_norm": 1.7136719122166493, "learning_rate": 9.204712812960235e-06, "loss": 0.4585, "step": 3125 }, { "epoch": 0.09209964203114182, "grad_norm": 2.075955895591835, "learning_rate": 9.207658321060383e-06, "loss": 0.5034, "step": 3126 }, { "epoch": 0.0921291044886054, "grad_norm": 2.2129476839367266, "learning_rate": 9.210603829160531e-06, "loss": 0.5491, "step": 3127 }, { "epoch": 0.09215856694606897, "grad_norm": 1.8991865155161463, "learning_rate": 9.213549337260677e-06, "loss": 0.5831, "step": 3128 }, { "epoch": 0.09218802940353255, "grad_norm": 1.6194704123117947, "learning_rate": 9.216494845360825e-06, "loss": 0.4114, "step": 3129 }, { "epoch": 0.09221749186099612, "grad_norm": 1.7756575191177288, "learning_rate": 9.219440353460973e-06, "loss": 0.5298, "step": 3130 }, { "epoch": 0.09224695431845971, "grad_norm": 2.1151745990941593, "learning_rate": 9.22238586156112e-06, "loss": 0.5647, "step": 3131 }, { "epoch": 0.09227641677592328, "grad_norm": 1.6666877171351449, "learning_rate": 9.225331369661268e-06, "loss": 0.5209, "step": 3132 }, { "epoch": 0.09230587923338686, "grad_norm": 1.7926683182948717, "learning_rate": 9.228276877761416e-06, "loss": 0.6494, "step": 3133 }, { "epoch": 0.09233534169085043, "grad_norm": 1.7359212315916432, "learning_rate": 9.231222385861562e-06, "loss": 0.5799, "step": 3134 }, { "epoch": 0.09236480414831401, "grad_norm": 1.9531032221851474, "learning_rate": 9.23416789396171e-06, "loss": 0.5152, "step": 3135 }, { "epoch": 0.09239426660577758, "grad_norm": 1.9275787121707175, "learning_rate": 9.237113402061856e-06, "loss": 0.6474, "step": 3136 }, { "epoch": 0.09242372906324116, "grad_norm": 2.0205346747930206, "learning_rate": 9.240058910162004e-06, "loss": 0.6097, "step": 3137 }, { "epoch": 0.09245319152070475, "grad_norm": 1.737206098851605, "learning_rate": 9.24300441826215e-06, "loss": 0.5014, "step": 3138 }, { "epoch": 0.09248265397816832, "grad_norm": 1.694087739604559, "learning_rate": 9.245949926362299e-06, "loss": 0.513, "step": 3139 }, { "epoch": 0.0925121164356319, "grad_norm": 1.6045339995040102, "learning_rate": 9.248895434462447e-06, "loss": 0.4378, "step": 3140 }, { "epoch": 0.09254157889309547, "grad_norm": 2.2776000728831587, "learning_rate": 9.251840942562593e-06, "loss": 0.6875, "step": 3141 }, { "epoch": 0.09257104135055905, "grad_norm": 1.715431806854276, "learning_rate": 9.254786450662741e-06, "loss": 0.5495, "step": 3142 }, { "epoch": 0.09260050380802262, "grad_norm": 1.784227633119483, "learning_rate": 9.257731958762887e-06, "loss": 0.5315, "step": 3143 }, { "epoch": 0.09262996626548621, "grad_norm": 1.7858423912904484, "learning_rate": 9.260677466863035e-06, "loss": 0.5598, "step": 3144 }, { "epoch": 0.09265942872294979, "grad_norm": 1.958805562550568, "learning_rate": 9.263622974963182e-06, "loss": 0.5333, "step": 3145 }, { "epoch": 0.09268889118041336, "grad_norm": 1.6681600188590393, "learning_rate": 9.26656848306333e-06, "loss": 0.4693, "step": 3146 }, { "epoch": 0.09271835363787694, "grad_norm": 2.0711845325552263, "learning_rate": 9.269513991163476e-06, "loss": 0.5533, "step": 3147 }, { "epoch": 0.09274781609534051, "grad_norm": 1.8742037322867642, "learning_rate": 9.272459499263624e-06, "loss": 0.5854, "step": 3148 }, { "epoch": 0.09277727855280408, "grad_norm": 2.0370055925061528, "learning_rate": 9.275405007363772e-06, "loss": 0.6388, "step": 3149 }, { "epoch": 0.09280674101026766, "grad_norm": 1.5675345883138216, "learning_rate": 9.278350515463918e-06, "loss": 0.5035, "step": 3150 }, { "epoch": 0.09283620346773125, "grad_norm": 1.7869525066847167, "learning_rate": 9.281296023564066e-06, "loss": 0.6457, "step": 3151 }, { "epoch": 0.09286566592519482, "grad_norm": 1.9438455278094044, "learning_rate": 9.284241531664213e-06, "loss": 0.6881, "step": 3152 }, { "epoch": 0.0928951283826584, "grad_norm": 1.9915186343129019, "learning_rate": 9.28718703976436e-06, "loss": 0.4376, "step": 3153 }, { "epoch": 0.09292459084012197, "grad_norm": 1.9903909583044068, "learning_rate": 9.290132547864507e-06, "loss": 0.6135, "step": 3154 }, { "epoch": 0.09295405329758555, "grad_norm": 1.915296334885784, "learning_rate": 9.293078055964655e-06, "loss": 0.6482, "step": 3155 }, { "epoch": 0.09298351575504912, "grad_norm": 1.689326498254293, "learning_rate": 9.296023564064803e-06, "loss": 0.5529, "step": 3156 }, { "epoch": 0.09301297821251271, "grad_norm": 2.052669749163612, "learning_rate": 9.29896907216495e-06, "loss": 0.5313, "step": 3157 }, { "epoch": 0.09304244066997629, "grad_norm": 2.039263119357276, "learning_rate": 9.301914580265097e-06, "loss": 0.5305, "step": 3158 }, { "epoch": 0.09307190312743986, "grad_norm": 1.7946568143375556, "learning_rate": 9.304860088365244e-06, "loss": 0.5331, "step": 3159 }, { "epoch": 0.09310136558490344, "grad_norm": 1.6557803383667176, "learning_rate": 9.307805596465392e-06, "loss": 0.596, "step": 3160 }, { "epoch": 0.09313082804236701, "grad_norm": 1.7997261727587335, "learning_rate": 9.310751104565538e-06, "loss": 0.5149, "step": 3161 }, { "epoch": 0.09316029049983059, "grad_norm": 1.8166065686053832, "learning_rate": 9.313696612665686e-06, "loss": 0.5649, "step": 3162 }, { "epoch": 0.09318975295729416, "grad_norm": 1.580914378673716, "learning_rate": 9.316642120765832e-06, "loss": 0.5191, "step": 3163 }, { "epoch": 0.09321921541475775, "grad_norm": 1.7323777157661748, "learning_rate": 9.31958762886598e-06, "loss": 0.5079, "step": 3164 }, { "epoch": 0.09324867787222132, "grad_norm": 1.783861673495888, "learning_rate": 9.322533136966128e-06, "loss": 0.5444, "step": 3165 }, { "epoch": 0.0932781403296849, "grad_norm": 1.9023912555006073, "learning_rate": 9.325478645066275e-06, "loss": 0.4872, "step": 3166 }, { "epoch": 0.09330760278714847, "grad_norm": 1.8027836984271874, "learning_rate": 9.328424153166423e-06, "loss": 0.5483, "step": 3167 }, { "epoch": 0.09333706524461205, "grad_norm": 1.7634229641823271, "learning_rate": 9.331369661266569e-06, "loss": 0.6353, "step": 3168 }, { "epoch": 0.09336652770207562, "grad_norm": 1.7767969097946092, "learning_rate": 9.334315169366717e-06, "loss": 0.5643, "step": 3169 }, { "epoch": 0.09339599015953921, "grad_norm": 1.7051353222755867, "learning_rate": 9.337260677466863e-06, "loss": 0.4941, "step": 3170 }, { "epoch": 0.09342545261700279, "grad_norm": 1.7122418959896195, "learning_rate": 9.340206185567011e-06, "loss": 0.4462, "step": 3171 }, { "epoch": 0.09345491507446636, "grad_norm": 1.782909896284004, "learning_rate": 9.343151693667158e-06, "loss": 0.606, "step": 3172 }, { "epoch": 0.09348437753192994, "grad_norm": 1.9799841781887728, "learning_rate": 9.346097201767306e-06, "loss": 0.5391, "step": 3173 }, { "epoch": 0.09351383998939351, "grad_norm": 2.0510412572124066, "learning_rate": 9.349042709867454e-06, "loss": 0.5303, "step": 3174 }, { "epoch": 0.09354330244685709, "grad_norm": 1.7551540015722373, "learning_rate": 9.3519882179676e-06, "loss": 0.3951, "step": 3175 }, { "epoch": 0.09357276490432066, "grad_norm": 1.7444092044655999, "learning_rate": 9.354933726067748e-06, "loss": 0.6185, "step": 3176 }, { "epoch": 0.09360222736178425, "grad_norm": 2.1134226706309556, "learning_rate": 9.357879234167894e-06, "loss": 0.6287, "step": 3177 }, { "epoch": 0.09363168981924783, "grad_norm": 1.8802617053418544, "learning_rate": 9.360824742268042e-06, "loss": 0.6359, "step": 3178 }, { "epoch": 0.0936611522767114, "grad_norm": 1.7159479093924244, "learning_rate": 9.363770250368189e-06, "loss": 0.5538, "step": 3179 }, { "epoch": 0.09369061473417498, "grad_norm": 1.8259772920113484, "learning_rate": 9.366715758468337e-06, "loss": 0.5318, "step": 3180 }, { "epoch": 0.09372007719163855, "grad_norm": 1.693402017260087, "learning_rate": 9.369661266568485e-06, "loss": 0.5488, "step": 3181 }, { "epoch": 0.09374953964910213, "grad_norm": 1.7214332650656798, "learning_rate": 9.372606774668631e-06, "loss": 0.4399, "step": 3182 }, { "epoch": 0.09377900210656571, "grad_norm": 1.9918052353052667, "learning_rate": 9.375552282768779e-06, "loss": 0.7616, "step": 3183 }, { "epoch": 0.09380846456402929, "grad_norm": 1.654679500847601, "learning_rate": 9.378497790868925e-06, "loss": 0.5482, "step": 3184 }, { "epoch": 0.09383792702149286, "grad_norm": 1.7131601295312535, "learning_rate": 9.381443298969073e-06, "loss": 0.4517, "step": 3185 }, { "epoch": 0.09386738947895644, "grad_norm": 1.7536237212116257, "learning_rate": 9.38438880706922e-06, "loss": 0.6069, "step": 3186 }, { "epoch": 0.09389685193642001, "grad_norm": 1.7548502367122094, "learning_rate": 9.387334315169368e-06, "loss": 0.5879, "step": 3187 }, { "epoch": 0.09392631439388359, "grad_norm": 1.9144919429246672, "learning_rate": 9.390279823269514e-06, "loss": 0.689, "step": 3188 }, { "epoch": 0.09395577685134716, "grad_norm": 1.8904223263103725, "learning_rate": 9.393225331369662e-06, "loss": 0.6836, "step": 3189 }, { "epoch": 0.09398523930881075, "grad_norm": 1.828638315359022, "learning_rate": 9.39617083946981e-06, "loss": 0.6243, "step": 3190 }, { "epoch": 0.09401470176627433, "grad_norm": 1.7902375274406261, "learning_rate": 9.399116347569956e-06, "loss": 0.5673, "step": 3191 }, { "epoch": 0.0940441642237379, "grad_norm": 1.9168136105698756, "learning_rate": 9.402061855670104e-06, "loss": 0.5367, "step": 3192 }, { "epoch": 0.09407362668120148, "grad_norm": 1.958440372867769, "learning_rate": 9.40500736377025e-06, "loss": 0.5679, "step": 3193 }, { "epoch": 0.09410308913866505, "grad_norm": 1.6458402281188973, "learning_rate": 9.407952871870399e-06, "loss": 0.4952, "step": 3194 }, { "epoch": 0.09413255159612863, "grad_norm": 1.6607865089683473, "learning_rate": 9.410898379970545e-06, "loss": 0.447, "step": 3195 }, { "epoch": 0.09416201405359222, "grad_norm": 1.925269214491569, "learning_rate": 9.413843888070693e-06, "loss": 0.6755, "step": 3196 }, { "epoch": 0.09419147651105579, "grad_norm": 1.9103760931374736, "learning_rate": 9.416789396170841e-06, "loss": 0.5763, "step": 3197 }, { "epoch": 0.09422093896851937, "grad_norm": 1.6996246846510905, "learning_rate": 9.419734904270987e-06, "loss": 0.5958, "step": 3198 }, { "epoch": 0.09425040142598294, "grad_norm": 1.9455550798642232, "learning_rate": 9.422680412371135e-06, "loss": 0.2969, "step": 3199 }, { "epoch": 0.09427986388344652, "grad_norm": 1.8964671636707433, "learning_rate": 9.425625920471282e-06, "loss": 0.5637, "step": 3200 }, { "epoch": 0.09430932634091009, "grad_norm": 1.8983570197882558, "learning_rate": 9.42857142857143e-06, "loss": 0.7118, "step": 3201 }, { "epoch": 0.09433878879837367, "grad_norm": 2.0126213979693715, "learning_rate": 9.431516936671576e-06, "loss": 0.5931, "step": 3202 }, { "epoch": 0.09436825125583725, "grad_norm": 1.99991786408169, "learning_rate": 9.434462444771724e-06, "loss": 0.6298, "step": 3203 }, { "epoch": 0.09439771371330083, "grad_norm": 1.860045113620666, "learning_rate": 9.43740795287187e-06, "loss": 0.4886, "step": 3204 }, { "epoch": 0.0944271761707644, "grad_norm": 1.7028801799196571, "learning_rate": 9.440353460972018e-06, "loss": 0.6636, "step": 3205 }, { "epoch": 0.09445663862822798, "grad_norm": 1.7895605964125705, "learning_rate": 9.443298969072166e-06, "loss": 0.6233, "step": 3206 }, { "epoch": 0.09448610108569155, "grad_norm": 1.5962474957828443, "learning_rate": 9.446244477172313e-06, "loss": 0.4941, "step": 3207 }, { "epoch": 0.09451556354315513, "grad_norm": 1.909514036147813, "learning_rate": 9.44918998527246e-06, "loss": 0.609, "step": 3208 }, { "epoch": 0.09454502600061872, "grad_norm": 1.837646557334773, "learning_rate": 9.452135493372607e-06, "loss": 0.5276, "step": 3209 }, { "epoch": 0.09457448845808229, "grad_norm": 1.7488168696068163, "learning_rate": 9.455081001472755e-06, "loss": 0.5555, "step": 3210 }, { "epoch": 0.09460395091554587, "grad_norm": 1.951394111849143, "learning_rate": 9.458026509572901e-06, "loss": 0.5612, "step": 3211 }, { "epoch": 0.09463341337300944, "grad_norm": 1.7789222155532158, "learning_rate": 9.46097201767305e-06, "loss": 0.5752, "step": 3212 }, { "epoch": 0.09466287583047302, "grad_norm": 1.7854689737678062, "learning_rate": 9.463917525773197e-06, "loss": 0.5121, "step": 3213 }, { "epoch": 0.09469233828793659, "grad_norm": 1.7685351061992682, "learning_rate": 9.466863033873343e-06, "loss": 0.4817, "step": 3214 }, { "epoch": 0.09472180074540017, "grad_norm": 2.166882022036995, "learning_rate": 9.469808541973491e-06, "loss": 0.6755, "step": 3215 }, { "epoch": 0.09475126320286376, "grad_norm": 1.6911878619637712, "learning_rate": 9.472754050073638e-06, "loss": 0.4271, "step": 3216 }, { "epoch": 0.09478072566032733, "grad_norm": 1.7025519090511727, "learning_rate": 9.475699558173786e-06, "loss": 0.5412, "step": 3217 }, { "epoch": 0.0948101881177909, "grad_norm": 1.871694444970556, "learning_rate": 9.478645066273932e-06, "loss": 0.5746, "step": 3218 }, { "epoch": 0.09483965057525448, "grad_norm": 1.9433508342194257, "learning_rate": 9.48159057437408e-06, "loss": 0.6105, "step": 3219 }, { "epoch": 0.09486911303271806, "grad_norm": 1.861569576773712, "learning_rate": 9.484536082474226e-06, "loss": 0.514, "step": 3220 }, { "epoch": 0.09489857549018163, "grad_norm": 1.8009814845100143, "learning_rate": 9.487481590574374e-06, "loss": 0.4191, "step": 3221 }, { "epoch": 0.09492803794764522, "grad_norm": 1.7838009907143653, "learning_rate": 9.490427098674522e-06, "loss": 0.497, "step": 3222 }, { "epoch": 0.0949575004051088, "grad_norm": 1.997749275009984, "learning_rate": 9.493372606774669e-06, "loss": 0.6217, "step": 3223 }, { "epoch": 0.09498696286257237, "grad_norm": 1.9597931570696419, "learning_rate": 9.496318114874817e-06, "loss": 0.7192, "step": 3224 }, { "epoch": 0.09501642532003594, "grad_norm": 1.7991015735723839, "learning_rate": 9.499263622974963e-06, "loss": 0.5189, "step": 3225 }, { "epoch": 0.09504588777749952, "grad_norm": 1.7805541472893838, "learning_rate": 9.502209131075111e-06, "loss": 0.4472, "step": 3226 }, { "epoch": 0.0950753502349631, "grad_norm": 1.765247113140379, "learning_rate": 9.505154639175257e-06, "loss": 0.602, "step": 3227 }, { "epoch": 0.09510481269242667, "grad_norm": 1.7714635341049576, "learning_rate": 9.508100147275405e-06, "loss": 0.609, "step": 3228 }, { "epoch": 0.09513427514989026, "grad_norm": 1.730247321642253, "learning_rate": 9.511045655375552e-06, "loss": 0.5154, "step": 3229 }, { "epoch": 0.09516373760735383, "grad_norm": 1.8506591107021795, "learning_rate": 9.5139911634757e-06, "loss": 0.6056, "step": 3230 }, { "epoch": 0.09519320006481741, "grad_norm": 1.7912793667922322, "learning_rate": 9.516936671575848e-06, "loss": 0.5827, "step": 3231 }, { "epoch": 0.09522266252228098, "grad_norm": 1.8493403276716318, "learning_rate": 9.519882179675994e-06, "loss": 0.4525, "step": 3232 }, { "epoch": 0.09525212497974456, "grad_norm": 1.8830097886924053, "learning_rate": 9.522827687776142e-06, "loss": 0.7044, "step": 3233 }, { "epoch": 0.09528158743720813, "grad_norm": 1.9224810471153708, "learning_rate": 9.525773195876288e-06, "loss": 0.5044, "step": 3234 }, { "epoch": 0.09531104989467172, "grad_norm": 1.7042256378060774, "learning_rate": 9.528718703976436e-06, "loss": 0.5422, "step": 3235 }, { "epoch": 0.0953405123521353, "grad_norm": 1.6349493762368412, "learning_rate": 9.531664212076584e-06, "loss": 0.4945, "step": 3236 }, { "epoch": 0.09536997480959887, "grad_norm": 1.7536867801126397, "learning_rate": 9.534609720176732e-06, "loss": 0.4367, "step": 3237 }, { "epoch": 0.09539943726706245, "grad_norm": 1.9371599051907031, "learning_rate": 9.537555228276879e-06, "loss": 0.7508, "step": 3238 }, { "epoch": 0.09542889972452602, "grad_norm": 1.9953559822466416, "learning_rate": 9.540500736377027e-06, "loss": 0.485, "step": 3239 }, { "epoch": 0.0954583621819896, "grad_norm": 1.9312385019412535, "learning_rate": 9.543446244477173e-06, "loss": 0.5867, "step": 3240 }, { "epoch": 0.09548782463945317, "grad_norm": 1.8472967663515547, "learning_rate": 9.546391752577321e-06, "loss": 0.6609, "step": 3241 }, { "epoch": 0.09551728709691676, "grad_norm": 1.8057920818891606, "learning_rate": 9.549337260677467e-06, "loss": 0.6085, "step": 3242 }, { "epoch": 0.09554674955438033, "grad_norm": 1.9169377142592048, "learning_rate": 9.552282768777615e-06, "loss": 0.6802, "step": 3243 }, { "epoch": 0.09557621201184391, "grad_norm": 1.6356316134067186, "learning_rate": 9.555228276877763e-06, "loss": 0.3929, "step": 3244 }, { "epoch": 0.09560567446930748, "grad_norm": 1.4691531251210432, "learning_rate": 9.55817378497791e-06, "loss": 0.4629, "step": 3245 }, { "epoch": 0.09563513692677106, "grad_norm": 1.7711610277411718, "learning_rate": 9.561119293078058e-06, "loss": 0.4379, "step": 3246 }, { "epoch": 0.09566459938423463, "grad_norm": 1.723248527394515, "learning_rate": 9.564064801178204e-06, "loss": 0.5421, "step": 3247 }, { "epoch": 0.09569406184169822, "grad_norm": 2.0072881148463724, "learning_rate": 9.567010309278352e-06, "loss": 0.6086, "step": 3248 }, { "epoch": 0.0957235242991618, "grad_norm": 1.7695058706740951, "learning_rate": 9.569955817378498e-06, "loss": 0.5184, "step": 3249 }, { "epoch": 0.09575298675662537, "grad_norm": 1.81480443872046, "learning_rate": 9.572901325478646e-06, "loss": 0.6063, "step": 3250 }, { "epoch": 0.09578244921408895, "grad_norm": 1.8482443522152523, "learning_rate": 9.575846833578793e-06, "loss": 0.588, "step": 3251 }, { "epoch": 0.09581191167155252, "grad_norm": 1.629355633782346, "learning_rate": 9.57879234167894e-06, "loss": 0.5464, "step": 3252 }, { "epoch": 0.0958413741290161, "grad_norm": 1.8734849148503951, "learning_rate": 9.581737849779089e-06, "loss": 0.5465, "step": 3253 }, { "epoch": 0.09587083658647969, "grad_norm": 1.7607767409651727, "learning_rate": 9.584683357879235e-06, "loss": 0.3699, "step": 3254 }, { "epoch": 0.09590029904394326, "grad_norm": 1.784878376894617, "learning_rate": 9.587628865979383e-06, "loss": 0.5848, "step": 3255 }, { "epoch": 0.09592976150140684, "grad_norm": 1.7272561005972054, "learning_rate": 9.59057437407953e-06, "loss": 0.5016, "step": 3256 }, { "epoch": 0.09595922395887041, "grad_norm": 1.86826986927349, "learning_rate": 9.593519882179677e-06, "loss": 0.5358, "step": 3257 }, { "epoch": 0.09598868641633398, "grad_norm": 1.842308121106529, "learning_rate": 9.596465390279824e-06, "loss": 0.4444, "step": 3258 }, { "epoch": 0.09601814887379756, "grad_norm": 1.7264691873197333, "learning_rate": 9.599410898379972e-06, "loss": 0.4999, "step": 3259 }, { "epoch": 0.09604761133126113, "grad_norm": 2.114301494306919, "learning_rate": 9.60235640648012e-06, "loss": 0.6185, "step": 3260 }, { "epoch": 0.09607707378872472, "grad_norm": 1.7625715780183668, "learning_rate": 9.605301914580266e-06, "loss": 0.515, "step": 3261 }, { "epoch": 0.0961065362461883, "grad_norm": 1.7829299971505115, "learning_rate": 9.608247422680414e-06, "loss": 0.5813, "step": 3262 }, { "epoch": 0.09613599870365187, "grad_norm": 1.8612561803672134, "learning_rate": 9.61119293078056e-06, "loss": 0.5374, "step": 3263 }, { "epoch": 0.09616546116111545, "grad_norm": 2.029539139616635, "learning_rate": 9.614138438880708e-06, "loss": 0.6483, "step": 3264 }, { "epoch": 0.09619492361857902, "grad_norm": 1.8747462912245585, "learning_rate": 9.617083946980855e-06, "loss": 0.5235, "step": 3265 }, { "epoch": 0.0962243860760426, "grad_norm": 1.8393007345320225, "learning_rate": 9.620029455081003e-06, "loss": 0.4662, "step": 3266 }, { "epoch": 0.09625384853350619, "grad_norm": 1.632925616775086, "learning_rate": 9.622974963181149e-06, "loss": 0.4458, "step": 3267 }, { "epoch": 0.09628331099096976, "grad_norm": 1.889564156081969, "learning_rate": 9.625920471281297e-06, "loss": 0.5199, "step": 3268 }, { "epoch": 0.09631277344843334, "grad_norm": 1.7852823531898168, "learning_rate": 9.628865979381445e-06, "loss": 0.457, "step": 3269 }, { "epoch": 0.09634223590589691, "grad_norm": 1.7373422058961607, "learning_rate": 9.631811487481591e-06, "loss": 0.5466, "step": 3270 }, { "epoch": 0.09637169836336049, "grad_norm": 1.8718550792227853, "learning_rate": 9.63475699558174e-06, "loss": 0.6371, "step": 3271 }, { "epoch": 0.09640116082082406, "grad_norm": 1.712888797710985, "learning_rate": 9.637702503681886e-06, "loss": 0.5916, "step": 3272 }, { "epoch": 0.09643062327828764, "grad_norm": 2.041317645750366, "learning_rate": 9.640648011782034e-06, "loss": 0.6068, "step": 3273 }, { "epoch": 0.09646008573575122, "grad_norm": 1.9252945566788755, "learning_rate": 9.64359351988218e-06, "loss": 0.64, "step": 3274 }, { "epoch": 0.0964895481932148, "grad_norm": 1.7640430991525546, "learning_rate": 9.646539027982328e-06, "loss": 0.631, "step": 3275 }, { "epoch": 0.09651901065067837, "grad_norm": 1.7185069579137568, "learning_rate": 9.649484536082476e-06, "loss": 0.4696, "step": 3276 }, { "epoch": 0.09654847310814195, "grad_norm": 1.7196319440341001, "learning_rate": 9.652430044182622e-06, "loss": 0.4321, "step": 3277 }, { "epoch": 0.09657793556560552, "grad_norm": 1.7992447447388729, "learning_rate": 9.65537555228277e-06, "loss": 0.484, "step": 3278 }, { "epoch": 0.0966073980230691, "grad_norm": 1.8380809063688974, "learning_rate": 9.658321060382917e-06, "loss": 0.6555, "step": 3279 }, { "epoch": 0.09663686048053269, "grad_norm": 1.8304763001534707, "learning_rate": 9.661266568483065e-06, "loss": 0.5989, "step": 3280 }, { "epoch": 0.09666632293799626, "grad_norm": 1.7872392562185726, "learning_rate": 9.664212076583211e-06, "loss": 0.6197, "step": 3281 }, { "epoch": 0.09669578539545984, "grad_norm": 1.5753053527204182, "learning_rate": 9.667157584683359e-06, "loss": 0.4696, "step": 3282 }, { "epoch": 0.09672524785292341, "grad_norm": 1.81838947020999, "learning_rate": 9.670103092783505e-06, "loss": 0.4369, "step": 3283 }, { "epoch": 0.09675471031038699, "grad_norm": 2.0540489018570836, "learning_rate": 9.673048600883653e-06, "loss": 0.4912, "step": 3284 }, { "epoch": 0.09678417276785056, "grad_norm": 2.320645574638909, "learning_rate": 9.675994108983801e-06, "loss": 0.4878, "step": 3285 }, { "epoch": 0.09681363522531414, "grad_norm": 1.8508840249176393, "learning_rate": 9.678939617083948e-06, "loss": 0.5805, "step": 3286 }, { "epoch": 0.09684309768277773, "grad_norm": 1.710761154197956, "learning_rate": 9.681885125184096e-06, "loss": 0.5327, "step": 3287 }, { "epoch": 0.0968725601402413, "grad_norm": 1.83928932140612, "learning_rate": 9.684830633284242e-06, "loss": 0.6585, "step": 3288 }, { "epoch": 0.09690202259770488, "grad_norm": 1.8201651480986583, "learning_rate": 9.68777614138439e-06, "loss": 0.427, "step": 3289 }, { "epoch": 0.09693148505516845, "grad_norm": 1.8174687955280169, "learning_rate": 9.690721649484536e-06, "loss": 0.6426, "step": 3290 }, { "epoch": 0.09696094751263203, "grad_norm": 2.005613431810859, "learning_rate": 9.693667157584684e-06, "loss": 0.4139, "step": 3291 }, { "epoch": 0.0969904099700956, "grad_norm": 1.815268077595512, "learning_rate": 9.696612665684832e-06, "loss": 0.5866, "step": 3292 }, { "epoch": 0.09701987242755919, "grad_norm": 2.052370650931184, "learning_rate": 9.699558173784979e-06, "loss": 0.6482, "step": 3293 }, { "epoch": 0.09704933488502276, "grad_norm": 2.076452741777799, "learning_rate": 9.702503681885127e-06, "loss": 0.6799, "step": 3294 }, { "epoch": 0.09707879734248634, "grad_norm": 1.8999435331646795, "learning_rate": 9.705449189985273e-06, "loss": 0.5576, "step": 3295 }, { "epoch": 0.09710825979994991, "grad_norm": 1.7658159542353375, "learning_rate": 9.708394698085421e-06, "loss": 0.5011, "step": 3296 }, { "epoch": 0.09713772225741349, "grad_norm": 2.0381841049177867, "learning_rate": 9.711340206185567e-06, "loss": 0.5518, "step": 3297 }, { "epoch": 0.09716718471487706, "grad_norm": 1.817148682114464, "learning_rate": 9.714285714285715e-06, "loss": 0.5627, "step": 3298 }, { "epoch": 0.09719664717234064, "grad_norm": 1.894946147208004, "learning_rate": 9.717231222385861e-06, "loss": 0.4903, "step": 3299 }, { "epoch": 0.09722610962980423, "grad_norm": 1.827528846536072, "learning_rate": 9.72017673048601e-06, "loss": 0.3825, "step": 3300 }, { "epoch": 0.0972555720872678, "grad_norm": 1.641551295238154, "learning_rate": 9.723122238586158e-06, "loss": 0.3831, "step": 3301 }, { "epoch": 0.09728503454473138, "grad_norm": 1.9396638979072087, "learning_rate": 9.726067746686304e-06, "loss": 0.6039, "step": 3302 }, { "epoch": 0.09731449700219495, "grad_norm": 1.6830344976342841, "learning_rate": 9.729013254786452e-06, "loss": 0.5264, "step": 3303 }, { "epoch": 0.09734395945965853, "grad_norm": 2.0833101438393293, "learning_rate": 9.731958762886598e-06, "loss": 0.5726, "step": 3304 }, { "epoch": 0.0973734219171221, "grad_norm": 1.651867688125354, "learning_rate": 9.734904270986746e-06, "loss": 0.4486, "step": 3305 }, { "epoch": 0.09740288437458569, "grad_norm": 1.8011617673709122, "learning_rate": 9.737849779086892e-06, "loss": 0.6621, "step": 3306 }, { "epoch": 0.09743234683204927, "grad_norm": 1.9272883498324649, "learning_rate": 9.74079528718704e-06, "loss": 0.6065, "step": 3307 }, { "epoch": 0.09746180928951284, "grad_norm": 1.5538926587045612, "learning_rate": 9.743740795287188e-06, "loss": 0.4319, "step": 3308 }, { "epoch": 0.09749127174697642, "grad_norm": 1.8141469237934127, "learning_rate": 9.746686303387335e-06, "loss": 0.6622, "step": 3309 }, { "epoch": 0.09752073420443999, "grad_norm": 1.7798337724292732, "learning_rate": 9.749631811487483e-06, "loss": 0.6436, "step": 3310 }, { "epoch": 0.09755019666190357, "grad_norm": 1.7975482729414276, "learning_rate": 9.752577319587629e-06, "loss": 0.5027, "step": 3311 }, { "epoch": 0.09757965911936714, "grad_norm": 1.6522135209827935, "learning_rate": 9.755522827687777e-06, "loss": 0.6141, "step": 3312 }, { "epoch": 0.09760912157683073, "grad_norm": 2.0137315914186367, "learning_rate": 9.758468335787923e-06, "loss": 0.3686, "step": 3313 }, { "epoch": 0.0976385840342943, "grad_norm": 1.7749208513183765, "learning_rate": 9.761413843888071e-06, "loss": 0.6199, "step": 3314 }, { "epoch": 0.09766804649175788, "grad_norm": 1.9148991835924514, "learning_rate": 9.764359351988218e-06, "loss": 0.4073, "step": 3315 }, { "epoch": 0.09769750894922145, "grad_norm": 1.86462313567831, "learning_rate": 9.767304860088366e-06, "loss": 0.6044, "step": 3316 }, { "epoch": 0.09772697140668503, "grad_norm": 1.9743277616311943, "learning_rate": 9.770250368188514e-06, "loss": 0.5761, "step": 3317 }, { "epoch": 0.0977564338641486, "grad_norm": 1.8148182793991916, "learning_rate": 9.77319587628866e-06, "loss": 0.527, "step": 3318 }, { "epoch": 0.09778589632161219, "grad_norm": 1.703259362603112, "learning_rate": 9.776141384388808e-06, "loss": 0.6489, "step": 3319 }, { "epoch": 0.09781535877907577, "grad_norm": 1.8198959788547044, "learning_rate": 9.779086892488954e-06, "loss": 0.5572, "step": 3320 }, { "epoch": 0.09784482123653934, "grad_norm": 2.0213491927522056, "learning_rate": 9.782032400589102e-06, "loss": 0.6068, "step": 3321 }, { "epoch": 0.09787428369400292, "grad_norm": 2.0211528211025036, "learning_rate": 9.784977908689249e-06, "loss": 0.6285, "step": 3322 }, { "epoch": 0.09790374615146649, "grad_norm": 1.8386034226184158, "learning_rate": 9.787923416789397e-06, "loss": 0.3972, "step": 3323 }, { "epoch": 0.09793320860893007, "grad_norm": 1.6828811767666494, "learning_rate": 9.790868924889543e-06, "loss": 0.5098, "step": 3324 }, { "epoch": 0.09796267106639364, "grad_norm": 1.6317058549035097, "learning_rate": 9.793814432989691e-06, "loss": 0.4165, "step": 3325 }, { "epoch": 0.09799213352385723, "grad_norm": 1.8018667815313414, "learning_rate": 9.796759941089839e-06, "loss": 0.547, "step": 3326 }, { "epoch": 0.0980215959813208, "grad_norm": 1.8846868725858017, "learning_rate": 9.799705449189985e-06, "loss": 0.7342, "step": 3327 }, { "epoch": 0.09805105843878438, "grad_norm": 1.7980845934591316, "learning_rate": 9.802650957290133e-06, "loss": 0.5232, "step": 3328 }, { "epoch": 0.09808052089624796, "grad_norm": 1.8038024588332418, "learning_rate": 9.80559646539028e-06, "loss": 0.5162, "step": 3329 }, { "epoch": 0.09810998335371153, "grad_norm": 1.7280092973278638, "learning_rate": 9.808541973490428e-06, "loss": 0.5049, "step": 3330 }, { "epoch": 0.0981394458111751, "grad_norm": 1.7206245152922237, "learning_rate": 9.811487481590574e-06, "loss": 0.5675, "step": 3331 }, { "epoch": 0.0981689082686387, "grad_norm": 1.670773126535838, "learning_rate": 9.814432989690722e-06, "loss": 0.4011, "step": 3332 }, { "epoch": 0.09819837072610227, "grad_norm": 2.0334252041086316, "learning_rate": 9.81737849779087e-06, "loss": 0.5471, "step": 3333 }, { "epoch": 0.09822783318356584, "grad_norm": 1.7495252548812439, "learning_rate": 9.820324005891016e-06, "loss": 0.6503, "step": 3334 }, { "epoch": 0.09825729564102942, "grad_norm": 1.8648571185402902, "learning_rate": 9.823269513991164e-06, "loss": 0.4914, "step": 3335 }, { "epoch": 0.098286758098493, "grad_norm": 1.8024859791469081, "learning_rate": 9.82621502209131e-06, "loss": 0.5065, "step": 3336 }, { "epoch": 0.09831622055595657, "grad_norm": 1.601615412064378, "learning_rate": 9.829160530191459e-06, "loss": 0.4978, "step": 3337 }, { "epoch": 0.09834568301342014, "grad_norm": 1.9348943384930015, "learning_rate": 9.832106038291605e-06, "loss": 0.6246, "step": 3338 }, { "epoch": 0.09837514547088373, "grad_norm": 2.1846055038747814, "learning_rate": 9.835051546391753e-06, "loss": 0.5555, "step": 3339 }, { "epoch": 0.0984046079283473, "grad_norm": 1.666304461105163, "learning_rate": 9.8379970544919e-06, "loss": 0.4059, "step": 3340 }, { "epoch": 0.09843407038581088, "grad_norm": 1.8655684375012092, "learning_rate": 9.840942562592047e-06, "loss": 0.4835, "step": 3341 }, { "epoch": 0.09846353284327446, "grad_norm": 1.8865886117771884, "learning_rate": 9.843888070692195e-06, "loss": 0.6048, "step": 3342 }, { "epoch": 0.09849299530073803, "grad_norm": 1.7921092556636582, "learning_rate": 9.846833578792343e-06, "loss": 0.5232, "step": 3343 }, { "epoch": 0.0985224577582016, "grad_norm": 1.8642395037920791, "learning_rate": 9.84977908689249e-06, "loss": 0.6062, "step": 3344 }, { "epoch": 0.0985519202156652, "grad_norm": 1.9661297769801027, "learning_rate": 9.852724594992638e-06, "loss": 0.5707, "step": 3345 }, { "epoch": 0.09858138267312877, "grad_norm": 1.7939721645146713, "learning_rate": 9.855670103092784e-06, "loss": 0.5994, "step": 3346 }, { "epoch": 0.09861084513059235, "grad_norm": 1.7381088965871094, "learning_rate": 9.858615611192932e-06, "loss": 0.4285, "step": 3347 }, { "epoch": 0.09864030758805592, "grad_norm": 1.8533975296715708, "learning_rate": 9.86156111929308e-06, "loss": 0.5033, "step": 3348 }, { "epoch": 0.0986697700455195, "grad_norm": 1.7613361198199597, "learning_rate": 9.864506627393226e-06, "loss": 0.485, "step": 3349 }, { "epoch": 0.09869923250298307, "grad_norm": 1.7544945452893688, "learning_rate": 9.867452135493374e-06, "loss": 0.4958, "step": 3350 }, { "epoch": 0.09872869496044664, "grad_norm": 1.7599565286848737, "learning_rate": 9.87039764359352e-06, "loss": 0.5958, "step": 3351 }, { "epoch": 0.09875815741791023, "grad_norm": 1.8537732120118702, "learning_rate": 9.873343151693669e-06, "loss": 0.5093, "step": 3352 }, { "epoch": 0.09878761987537381, "grad_norm": 1.8520838541152358, "learning_rate": 9.876288659793815e-06, "loss": 0.5222, "step": 3353 }, { "epoch": 0.09881708233283738, "grad_norm": 1.9473345268997067, "learning_rate": 9.879234167893963e-06, "loss": 0.5243, "step": 3354 }, { "epoch": 0.09884654479030096, "grad_norm": 1.6773478579441454, "learning_rate": 9.882179675994111e-06, "loss": 0.385, "step": 3355 }, { "epoch": 0.09887600724776453, "grad_norm": 1.6888524397136044, "learning_rate": 9.885125184094257e-06, "loss": 0.549, "step": 3356 }, { "epoch": 0.09890546970522811, "grad_norm": 1.7056188818134457, "learning_rate": 9.888070692194405e-06, "loss": 0.4686, "step": 3357 }, { "epoch": 0.0989349321626917, "grad_norm": 2.0983329784217073, "learning_rate": 9.891016200294552e-06, "loss": 0.5994, "step": 3358 }, { "epoch": 0.09896439462015527, "grad_norm": 1.6057528942286359, "learning_rate": 9.8939617083947e-06, "loss": 0.4291, "step": 3359 }, { "epoch": 0.09899385707761885, "grad_norm": 1.9009884001731516, "learning_rate": 9.896907216494846e-06, "loss": 0.691, "step": 3360 }, { "epoch": 0.09902331953508242, "grad_norm": 1.8171044698775598, "learning_rate": 9.899852724594994e-06, "loss": 0.6193, "step": 3361 }, { "epoch": 0.099052781992546, "grad_norm": 1.9591973688887574, "learning_rate": 9.90279823269514e-06, "loss": 0.7073, "step": 3362 }, { "epoch": 0.09908224445000957, "grad_norm": 1.9551707076303086, "learning_rate": 9.905743740795288e-06, "loss": 0.5876, "step": 3363 }, { "epoch": 0.09911170690747315, "grad_norm": 1.9041433482549275, "learning_rate": 9.908689248895436e-06, "loss": 0.5051, "step": 3364 }, { "epoch": 0.09914116936493673, "grad_norm": 1.67078909830806, "learning_rate": 9.911634756995583e-06, "loss": 0.4631, "step": 3365 }, { "epoch": 0.09917063182240031, "grad_norm": 1.8662695892307122, "learning_rate": 9.91458026509573e-06, "loss": 0.7119, "step": 3366 }, { "epoch": 0.09920009427986388, "grad_norm": 2.094254169068774, "learning_rate": 9.917525773195877e-06, "loss": 0.6465, "step": 3367 }, { "epoch": 0.09922955673732746, "grad_norm": 1.8170890160233641, "learning_rate": 9.920471281296025e-06, "loss": 0.5621, "step": 3368 }, { "epoch": 0.09925901919479103, "grad_norm": 1.6800333260861016, "learning_rate": 9.923416789396171e-06, "loss": 0.5167, "step": 3369 }, { "epoch": 0.09928848165225461, "grad_norm": 1.7295592229832912, "learning_rate": 9.92636229749632e-06, "loss": 0.5293, "step": 3370 }, { "epoch": 0.0993179441097182, "grad_norm": 2.262276138458137, "learning_rate": 9.929307805596467e-06, "loss": 0.6562, "step": 3371 }, { "epoch": 0.09934740656718177, "grad_norm": 2.0377798885013885, "learning_rate": 9.932253313696614e-06, "loss": 0.5508, "step": 3372 }, { "epoch": 0.09937686902464535, "grad_norm": 1.7281731840076866, "learning_rate": 9.935198821796762e-06, "loss": 0.6025, "step": 3373 }, { "epoch": 0.09940633148210892, "grad_norm": 1.7181760492744993, "learning_rate": 9.938144329896908e-06, "loss": 0.5108, "step": 3374 }, { "epoch": 0.0994357939395725, "grad_norm": 1.851661778955969, "learning_rate": 9.941089837997056e-06, "loss": 0.5214, "step": 3375 }, { "epoch": 0.09946525639703607, "grad_norm": 1.7599238806894464, "learning_rate": 9.944035346097202e-06, "loss": 0.4677, "step": 3376 }, { "epoch": 0.09949471885449965, "grad_norm": 1.6449824827588504, "learning_rate": 9.94698085419735e-06, "loss": 0.5327, "step": 3377 }, { "epoch": 0.09952418131196324, "grad_norm": 1.954325684486051, "learning_rate": 9.949926362297497e-06, "loss": 0.5119, "step": 3378 }, { "epoch": 0.09955364376942681, "grad_norm": 1.6056336484726987, "learning_rate": 9.952871870397645e-06, "loss": 0.5308, "step": 3379 }, { "epoch": 0.09958310622689039, "grad_norm": 2.1490471809530454, "learning_rate": 9.955817378497793e-06, "loss": 0.5611, "step": 3380 }, { "epoch": 0.09961256868435396, "grad_norm": 1.9848063802925069, "learning_rate": 9.958762886597939e-06, "loss": 0.6654, "step": 3381 }, { "epoch": 0.09964203114181754, "grad_norm": 1.730396180244059, "learning_rate": 9.961708394698087e-06, "loss": 0.5028, "step": 3382 }, { "epoch": 0.09967149359928111, "grad_norm": 1.809217806959038, "learning_rate": 9.964653902798233e-06, "loss": 0.525, "step": 3383 }, { "epoch": 0.0997009560567447, "grad_norm": 1.5496732876369272, "learning_rate": 9.967599410898381e-06, "loss": 0.3924, "step": 3384 }, { "epoch": 0.09973041851420827, "grad_norm": 1.6677646681648084, "learning_rate": 9.970544918998528e-06, "loss": 0.5482, "step": 3385 }, { "epoch": 0.09975988097167185, "grad_norm": 1.659313497680576, "learning_rate": 9.973490427098676e-06, "loss": 0.5467, "step": 3386 }, { "epoch": 0.09978934342913542, "grad_norm": 1.6129060424835937, "learning_rate": 9.976435935198824e-06, "loss": 0.5121, "step": 3387 }, { "epoch": 0.099818805886599, "grad_norm": 1.8608904519711147, "learning_rate": 9.97938144329897e-06, "loss": 0.7669, "step": 3388 }, { "epoch": 0.09984826834406257, "grad_norm": 1.7868378957115896, "learning_rate": 9.982326951399118e-06, "loss": 0.5403, "step": 3389 }, { "epoch": 0.09987773080152615, "grad_norm": 1.7122492833594833, "learning_rate": 9.985272459499264e-06, "loss": 0.5423, "step": 3390 }, { "epoch": 0.09990719325898974, "grad_norm": 1.752925375099907, "learning_rate": 9.988217967599412e-06, "loss": 0.5404, "step": 3391 }, { "epoch": 0.09993665571645331, "grad_norm": 1.9421209577066645, "learning_rate": 9.991163475699558e-06, "loss": 0.6012, "step": 3392 }, { "epoch": 0.09996611817391689, "grad_norm": 2.058782608766608, "learning_rate": 9.994108983799706e-06, "loss": 0.5557, "step": 3393 }, { "epoch": 0.09999558063138046, "grad_norm": 2.3416566223954085, "learning_rate": 9.997054491899853e-06, "loss": 0.4775, "step": 3394 }, { "epoch": 0.10002504308884404, "grad_norm": 1.9339086320034742, "learning_rate": 1e-05, "loss": 0.5486, "step": 3395 }, { "epoch": 0.10005450554630761, "grad_norm": 1.6368336846979494, "learning_rate": 9.999999973555763e-06, "loss": 0.564, "step": 3396 }, { "epoch": 0.1000839680037712, "grad_norm": 1.8448514239704508, "learning_rate": 9.99999989422305e-06, "loss": 0.627, "step": 3397 }, { "epoch": 0.10011343046123478, "grad_norm": 1.9988878112546222, "learning_rate": 9.999999762001859e-06, "loss": 0.567, "step": 3398 }, { "epoch": 0.10014289291869835, "grad_norm": 1.9197783740881504, "learning_rate": 9.999999576892197e-06, "loss": 0.5831, "step": 3399 }, { "epoch": 0.10017235537616193, "grad_norm": 1.9094614240110546, "learning_rate": 9.999999338894062e-06, "loss": 0.5289, "step": 3400 }, { "epoch": 0.1002018178336255, "grad_norm": 1.7058351280794555, "learning_rate": 9.999999048007459e-06, "loss": 0.6685, "step": 3401 }, { "epoch": 0.10023128029108908, "grad_norm": 1.6150324118616795, "learning_rate": 9.999998704232388e-06, "loss": 0.4483, "step": 3402 }, { "epoch": 0.10026074274855265, "grad_norm": 1.5230280800832305, "learning_rate": 9.999998307568856e-06, "loss": 0.3651, "step": 3403 }, { "epoch": 0.10029020520601624, "grad_norm": 1.9589681564536021, "learning_rate": 9.999997858016865e-06, "loss": 0.6049, "step": 3404 }, { "epoch": 0.10031966766347981, "grad_norm": 1.7487744154318892, "learning_rate": 9.999997355576423e-06, "loss": 0.404, "step": 3405 }, { "epoch": 0.10034913012094339, "grad_norm": 1.9665083693681464, "learning_rate": 9.99999680024753e-06, "loss": 0.6996, "step": 3406 }, { "epoch": 0.10037859257840696, "grad_norm": 1.9005348000794848, "learning_rate": 9.999996192030194e-06, "loss": 0.5208, "step": 3407 }, { "epoch": 0.10040805503587054, "grad_norm": 1.8527747104182422, "learning_rate": 9.999995530924423e-06, "loss": 0.7392, "step": 3408 }, { "epoch": 0.10043751749333411, "grad_norm": 1.9210850540244149, "learning_rate": 9.999994816930224e-06, "loss": 0.5797, "step": 3409 }, { "epoch": 0.1004669799507977, "grad_norm": 1.9162670841273282, "learning_rate": 9.999994050047603e-06, "loss": 0.6179, "step": 3410 }, { "epoch": 0.10049644240826128, "grad_norm": 1.8588855601735592, "learning_rate": 9.999993230276569e-06, "loss": 0.6848, "step": 3411 }, { "epoch": 0.10052590486572485, "grad_norm": 1.8490097790266178, "learning_rate": 9.99999235761713e-06, "loss": 0.5957, "step": 3412 }, { "epoch": 0.10055536732318843, "grad_norm": 1.7200128808550403, "learning_rate": 9.999991432069295e-06, "loss": 0.5368, "step": 3413 }, { "epoch": 0.100584829780652, "grad_norm": 1.8300397901934229, "learning_rate": 9.999990453633076e-06, "loss": 0.5675, "step": 3414 }, { "epoch": 0.10061429223811558, "grad_norm": 1.7157770164363568, "learning_rate": 9.99998942230848e-06, "loss": 0.4663, "step": 3415 }, { "epoch": 0.10064375469557915, "grad_norm": 2.0213686042757333, "learning_rate": 9.999988338095521e-06, "loss": 0.5303, "step": 3416 }, { "epoch": 0.10067321715304274, "grad_norm": 1.7600210918962966, "learning_rate": 9.99998720099421e-06, "loss": 0.4644, "step": 3417 }, { "epoch": 0.10070267961050632, "grad_norm": 1.733449082323149, "learning_rate": 9.999986011004556e-06, "loss": 0.5271, "step": 3418 }, { "epoch": 0.10073214206796989, "grad_norm": 1.7413101616721158, "learning_rate": 9.999984768126575e-06, "loss": 0.5267, "step": 3419 }, { "epoch": 0.10076160452543347, "grad_norm": 1.836131887056993, "learning_rate": 9.999983472360279e-06, "loss": 0.4789, "step": 3420 }, { "epoch": 0.10079106698289704, "grad_norm": 1.905169998043434, "learning_rate": 9.999982123705682e-06, "loss": 0.5019, "step": 3421 }, { "epoch": 0.10082052944036062, "grad_norm": 1.7311314781220468, "learning_rate": 9.999980722162796e-06, "loss": 0.6444, "step": 3422 }, { "epoch": 0.1008499918978242, "grad_norm": 1.82368566536617, "learning_rate": 9.99997926773164e-06, "loss": 0.5489, "step": 3423 }, { "epoch": 0.10087945435528778, "grad_norm": 1.9735847190946159, "learning_rate": 9.999977760412225e-06, "loss": 0.5772, "step": 3424 }, { "epoch": 0.10090891681275135, "grad_norm": 1.738754891417113, "learning_rate": 9.99997620020457e-06, "loss": 0.4913, "step": 3425 }, { "epoch": 0.10093837927021493, "grad_norm": 1.7698217221131176, "learning_rate": 9.999974587108691e-06, "loss": 0.5377, "step": 3426 }, { "epoch": 0.1009678417276785, "grad_norm": 1.618902365132497, "learning_rate": 9.999972921124604e-06, "loss": 0.561, "step": 3427 }, { "epoch": 0.10099730418514208, "grad_norm": 2.0430649465220565, "learning_rate": 9.999971202252328e-06, "loss": 0.6284, "step": 3428 }, { "epoch": 0.10102676664260565, "grad_norm": 1.8888086355861458, "learning_rate": 9.99996943049188e-06, "loss": 0.6035, "step": 3429 }, { "epoch": 0.10105622910006924, "grad_norm": 1.9730572499238272, "learning_rate": 9.999967605843279e-06, "loss": 0.6729, "step": 3430 }, { "epoch": 0.10108569155753282, "grad_norm": 1.8043448907197908, "learning_rate": 9.999965728306545e-06, "loss": 0.6197, "step": 3431 }, { "epoch": 0.10111515401499639, "grad_norm": 1.6086930392480971, "learning_rate": 9.999963797881695e-06, "loss": 0.546, "step": 3432 }, { "epoch": 0.10114461647245997, "grad_norm": 2.041302408433629, "learning_rate": 9.999961814568755e-06, "loss": 0.4804, "step": 3433 }, { "epoch": 0.10117407892992354, "grad_norm": 2.0775361083151074, "learning_rate": 9.99995977836774e-06, "loss": 0.6116, "step": 3434 }, { "epoch": 0.10120354138738712, "grad_norm": 1.9163998167285012, "learning_rate": 9.999957689278676e-06, "loss": 0.6146, "step": 3435 }, { "epoch": 0.1012330038448507, "grad_norm": 1.7819237115816442, "learning_rate": 9.999955547301583e-06, "loss": 0.4784, "step": 3436 }, { "epoch": 0.10126246630231428, "grad_norm": 1.763328467580857, "learning_rate": 9.999953352436485e-06, "loss": 0.5752, "step": 3437 }, { "epoch": 0.10129192875977786, "grad_norm": 1.7342180074199338, "learning_rate": 9.999951104683403e-06, "loss": 0.4993, "step": 3438 }, { "epoch": 0.10132139121724143, "grad_norm": 1.9316460268609188, "learning_rate": 9.999948804042361e-06, "loss": 0.6973, "step": 3439 }, { "epoch": 0.101350853674705, "grad_norm": 1.7187156199018567, "learning_rate": 9.999946450513385e-06, "loss": 0.5378, "step": 3440 }, { "epoch": 0.10138031613216858, "grad_norm": 1.8972011908242656, "learning_rate": 9.9999440440965e-06, "loss": 0.5339, "step": 3441 }, { "epoch": 0.10140977858963215, "grad_norm": 1.8769281889164082, "learning_rate": 9.99994158479173e-06, "loss": 0.4058, "step": 3442 }, { "epoch": 0.10143924104709574, "grad_norm": 1.8447466437416777, "learning_rate": 9.999939072599103e-06, "loss": 0.5031, "step": 3443 }, { "epoch": 0.10146870350455932, "grad_norm": 1.5159051153639294, "learning_rate": 9.999936507518643e-06, "loss": 0.4211, "step": 3444 }, { "epoch": 0.1014981659620229, "grad_norm": 1.9380910666939977, "learning_rate": 9.999933889550378e-06, "loss": 0.5488, "step": 3445 }, { "epoch": 0.10152762841948647, "grad_norm": 1.8648596844022758, "learning_rate": 9.999931218694338e-06, "loss": 0.4496, "step": 3446 }, { "epoch": 0.10155709087695004, "grad_norm": 1.9085509394344435, "learning_rate": 9.999928494950549e-06, "loss": 0.465, "step": 3447 }, { "epoch": 0.10158655333441362, "grad_norm": 2.2751882470211124, "learning_rate": 9.999925718319038e-06, "loss": 0.6671, "step": 3448 }, { "epoch": 0.1016160157918772, "grad_norm": 1.9036891142441603, "learning_rate": 9.999922888799838e-06, "loss": 0.6136, "step": 3449 }, { "epoch": 0.10164547824934078, "grad_norm": 1.9734241316548582, "learning_rate": 9.999920006392976e-06, "loss": 0.6998, "step": 3450 }, { "epoch": 0.10167494070680436, "grad_norm": 2.0068275667859656, "learning_rate": 9.999917071098486e-06, "loss": 0.698, "step": 3451 }, { "epoch": 0.10170440316426793, "grad_norm": 1.7004219965990088, "learning_rate": 9.999914082916396e-06, "loss": 0.599, "step": 3452 }, { "epoch": 0.1017338656217315, "grad_norm": 1.7525039887697447, "learning_rate": 9.999911041846738e-06, "loss": 0.6474, "step": 3453 }, { "epoch": 0.10176332807919508, "grad_norm": 1.6634585060695357, "learning_rate": 9.999907947889546e-06, "loss": 0.4859, "step": 3454 }, { "epoch": 0.10179279053665866, "grad_norm": 1.6760085030185177, "learning_rate": 9.999904801044852e-06, "loss": 0.5072, "step": 3455 }, { "epoch": 0.10182225299412224, "grad_norm": 1.9477257111228061, "learning_rate": 9.999901601312687e-06, "loss": 0.4112, "step": 3456 }, { "epoch": 0.10185171545158582, "grad_norm": 1.793504928153503, "learning_rate": 9.999898348693088e-06, "loss": 0.4738, "step": 3457 }, { "epoch": 0.1018811779090494, "grad_norm": 1.6299240198049818, "learning_rate": 9.999895043186087e-06, "loss": 0.4526, "step": 3458 }, { "epoch": 0.10191064036651297, "grad_norm": 2.0603621504790692, "learning_rate": 9.999891684791722e-06, "loss": 0.4062, "step": 3459 }, { "epoch": 0.10194010282397654, "grad_norm": 1.9819050231024078, "learning_rate": 9.999888273510025e-06, "loss": 0.4816, "step": 3460 }, { "epoch": 0.10196956528144012, "grad_norm": 2.044460238500645, "learning_rate": 9.999884809341033e-06, "loss": 0.5105, "step": 3461 }, { "epoch": 0.10199902773890371, "grad_norm": 1.9895855892366825, "learning_rate": 9.999881292284784e-06, "loss": 0.7967, "step": 3462 }, { "epoch": 0.10202849019636728, "grad_norm": 1.8760437096771734, "learning_rate": 9.999877722341315e-06, "loss": 0.5507, "step": 3463 }, { "epoch": 0.10205795265383086, "grad_norm": 1.8941246633870517, "learning_rate": 9.999874099510662e-06, "loss": 0.6781, "step": 3464 }, { "epoch": 0.10208741511129443, "grad_norm": 1.780947668821391, "learning_rate": 9.999870423792866e-06, "loss": 0.444, "step": 3465 }, { "epoch": 0.10211687756875801, "grad_norm": 2.146643018731205, "learning_rate": 9.999866695187962e-06, "loss": 0.6286, "step": 3466 }, { "epoch": 0.10214634002622158, "grad_norm": 1.981623029269692, "learning_rate": 9.999862913695995e-06, "loss": 0.7149, "step": 3467 }, { "epoch": 0.10217580248368516, "grad_norm": 1.7824978079749487, "learning_rate": 9.999859079316999e-06, "loss": 0.4678, "step": 3468 }, { "epoch": 0.10220526494114875, "grad_norm": 2.1252483670217357, "learning_rate": 9.999855192051018e-06, "loss": 0.6888, "step": 3469 }, { "epoch": 0.10223472739861232, "grad_norm": 1.772014068587265, "learning_rate": 9.999851251898095e-06, "loss": 0.5923, "step": 3470 }, { "epoch": 0.1022641898560759, "grad_norm": 1.7388525269567265, "learning_rate": 9.999847258858267e-06, "loss": 0.6406, "step": 3471 }, { "epoch": 0.10229365231353947, "grad_norm": 1.9455406649912583, "learning_rate": 9.999843212931579e-06, "loss": 0.6062, "step": 3472 }, { "epoch": 0.10232311477100305, "grad_norm": 1.8082989204189426, "learning_rate": 9.999839114118073e-06, "loss": 0.5458, "step": 3473 }, { "epoch": 0.10235257722846662, "grad_norm": 1.9131690567878705, "learning_rate": 9.999834962417793e-06, "loss": 0.647, "step": 3474 }, { "epoch": 0.10238203968593021, "grad_norm": 1.747612500106528, "learning_rate": 9.999830757830782e-06, "loss": 0.4909, "step": 3475 }, { "epoch": 0.10241150214339378, "grad_norm": 1.6763805824134248, "learning_rate": 9.999826500357085e-06, "loss": 0.5368, "step": 3476 }, { "epoch": 0.10244096460085736, "grad_norm": 1.6683062781064069, "learning_rate": 9.999822189996748e-06, "loss": 0.5934, "step": 3477 }, { "epoch": 0.10247042705832093, "grad_norm": 1.89406867777503, "learning_rate": 9.999817826749814e-06, "loss": 0.5155, "step": 3478 }, { "epoch": 0.10249988951578451, "grad_norm": 1.546504026353501, "learning_rate": 9.999813410616332e-06, "loss": 0.4735, "step": 3479 }, { "epoch": 0.10252935197324808, "grad_norm": 1.5240170724160975, "learning_rate": 9.999808941596348e-06, "loss": 0.4923, "step": 3480 }, { "epoch": 0.10255881443071166, "grad_norm": 1.8097336932477397, "learning_rate": 9.999804419689907e-06, "loss": 0.6454, "step": 3481 }, { "epoch": 0.10258827688817525, "grad_norm": 1.797699694245301, "learning_rate": 9.99979984489706e-06, "loss": 0.5424, "step": 3482 }, { "epoch": 0.10261773934563882, "grad_norm": 1.8654912381924182, "learning_rate": 9.999795217217852e-06, "loss": 0.6386, "step": 3483 }, { "epoch": 0.1026472018031024, "grad_norm": 1.702784954156834, "learning_rate": 9.999790536652336e-06, "loss": 0.5939, "step": 3484 }, { "epoch": 0.10267666426056597, "grad_norm": 1.7218863527987045, "learning_rate": 9.999785803200559e-06, "loss": 0.4645, "step": 3485 }, { "epoch": 0.10270612671802955, "grad_norm": 1.8787960598349636, "learning_rate": 9.999781016862571e-06, "loss": 0.5486, "step": 3486 }, { "epoch": 0.10273558917549312, "grad_norm": 1.7386683639107172, "learning_rate": 9.999776177638425e-06, "loss": 0.511, "step": 3487 }, { "epoch": 0.10276505163295671, "grad_norm": 2.0803860615504717, "learning_rate": 9.999771285528169e-06, "loss": 0.629, "step": 3488 }, { "epoch": 0.10279451409042029, "grad_norm": 1.7965781431968324, "learning_rate": 9.999766340531855e-06, "loss": 0.5491, "step": 3489 }, { "epoch": 0.10282397654788386, "grad_norm": 1.9359962415380951, "learning_rate": 9.999761342649537e-06, "loss": 0.5307, "step": 3490 }, { "epoch": 0.10285343900534744, "grad_norm": 1.9144471470343987, "learning_rate": 9.999756291881267e-06, "loss": 0.503, "step": 3491 }, { "epoch": 0.10288290146281101, "grad_norm": 1.999934096937146, "learning_rate": 9.999751188227099e-06, "loss": 0.6477, "step": 3492 }, { "epoch": 0.10291236392027459, "grad_norm": 1.7388821106942263, "learning_rate": 9.999746031687087e-06, "loss": 0.5484, "step": 3493 }, { "epoch": 0.10294182637773816, "grad_norm": 1.9068143988852033, "learning_rate": 9.999740822261285e-06, "loss": 0.4434, "step": 3494 }, { "epoch": 0.10297128883520175, "grad_norm": 1.6903226681543067, "learning_rate": 9.999735559949748e-06, "loss": 0.5382, "step": 3495 }, { "epoch": 0.10300075129266532, "grad_norm": 1.7906122628494876, "learning_rate": 9.999730244752532e-06, "loss": 0.4359, "step": 3496 }, { "epoch": 0.1030302137501289, "grad_norm": 1.7341792106254352, "learning_rate": 9.999724876669694e-06, "loss": 0.5328, "step": 3497 }, { "epoch": 0.10305967620759247, "grad_norm": 1.999169048057532, "learning_rate": 9.999719455701288e-06, "loss": 0.5481, "step": 3498 }, { "epoch": 0.10308913866505605, "grad_norm": 1.9845655477834754, "learning_rate": 9.999713981847376e-06, "loss": 0.7617, "step": 3499 }, { "epoch": 0.10311860112251962, "grad_norm": 1.8733614349732468, "learning_rate": 9.999708455108013e-06, "loss": 0.5725, "step": 3500 }, { "epoch": 0.10314806357998321, "grad_norm": 1.631271000755134, "learning_rate": 9.999702875483257e-06, "loss": 0.5361, "step": 3501 }, { "epoch": 0.10317752603744679, "grad_norm": 1.8225217602881825, "learning_rate": 9.999697242973166e-06, "loss": 0.4338, "step": 3502 }, { "epoch": 0.10320698849491036, "grad_norm": 1.7985912475414656, "learning_rate": 9.999691557577803e-06, "loss": 0.3953, "step": 3503 }, { "epoch": 0.10323645095237394, "grad_norm": 1.7511074692096547, "learning_rate": 9.999685819297225e-06, "loss": 0.5178, "step": 3504 }, { "epoch": 0.10326591340983751, "grad_norm": 1.8719304871693954, "learning_rate": 9.999680028131496e-06, "loss": 0.5071, "step": 3505 }, { "epoch": 0.10329537586730109, "grad_norm": 1.7222036203135873, "learning_rate": 9.999674184080674e-06, "loss": 0.518, "step": 3506 }, { "epoch": 0.10332483832476466, "grad_norm": 1.9409085468881149, "learning_rate": 9.999668287144824e-06, "loss": 0.6896, "step": 3507 }, { "epoch": 0.10335430078222825, "grad_norm": 1.9389465108902897, "learning_rate": 9.999662337324005e-06, "loss": 0.5847, "step": 3508 }, { "epoch": 0.10338376323969183, "grad_norm": 1.8581181632621298, "learning_rate": 9.999656334618282e-06, "loss": 0.5707, "step": 3509 }, { "epoch": 0.1034132256971554, "grad_norm": 1.8343317235845296, "learning_rate": 9.999650279027717e-06, "loss": 0.5212, "step": 3510 }, { "epoch": 0.10344268815461898, "grad_norm": 1.889258550865039, "learning_rate": 9.999644170552376e-06, "loss": 0.6273, "step": 3511 }, { "epoch": 0.10347215061208255, "grad_norm": 1.6283667487668556, "learning_rate": 9.999638009192322e-06, "loss": 0.502, "step": 3512 }, { "epoch": 0.10350161306954613, "grad_norm": 1.8277112659106127, "learning_rate": 9.999631794947622e-06, "loss": 0.6809, "step": 3513 }, { "epoch": 0.10353107552700971, "grad_norm": 1.6804990877115735, "learning_rate": 9.999625527818339e-06, "loss": 0.4884, "step": 3514 }, { "epoch": 0.10356053798447329, "grad_norm": 1.9196665060371072, "learning_rate": 9.999619207804541e-06, "loss": 0.6354, "step": 3515 }, { "epoch": 0.10359000044193686, "grad_norm": 1.6229249989770036, "learning_rate": 9.999612834906296e-06, "loss": 0.4701, "step": 3516 }, { "epoch": 0.10361946289940044, "grad_norm": 2.0061133655717875, "learning_rate": 9.999606409123669e-06, "loss": 0.4553, "step": 3517 }, { "epoch": 0.10364892535686401, "grad_norm": 1.7231298065624179, "learning_rate": 9.99959993045673e-06, "loss": 0.4888, "step": 3518 }, { "epoch": 0.10367838781432759, "grad_norm": 1.564223324452775, "learning_rate": 9.999593398905546e-06, "loss": 0.4656, "step": 3519 }, { "epoch": 0.10370785027179116, "grad_norm": 1.84687305619416, "learning_rate": 9.999586814470187e-06, "loss": 0.6003, "step": 3520 }, { "epoch": 0.10373731272925475, "grad_norm": 2.070694974679552, "learning_rate": 9.999580177150722e-06, "loss": 0.5979, "step": 3521 }, { "epoch": 0.10376677518671833, "grad_norm": 1.707712784568356, "learning_rate": 9.999573486947221e-06, "loss": 0.5479, "step": 3522 }, { "epoch": 0.1037962376441819, "grad_norm": 1.9523607682862905, "learning_rate": 9.999566743859757e-06, "loss": 0.6, "step": 3523 }, { "epoch": 0.10382570010164548, "grad_norm": 1.9414716960744711, "learning_rate": 9.999559947888399e-06, "loss": 0.6646, "step": 3524 }, { "epoch": 0.10385516255910905, "grad_norm": 1.74589676717812, "learning_rate": 9.999553099033218e-06, "loss": 0.5728, "step": 3525 }, { "epoch": 0.10388462501657263, "grad_norm": 2.1557924378865625, "learning_rate": 9.99954619729429e-06, "loss": 0.655, "step": 3526 }, { "epoch": 0.10391408747403622, "grad_norm": 1.6941695704602169, "learning_rate": 9.999539242671685e-06, "loss": 0.5006, "step": 3527 }, { "epoch": 0.10394354993149979, "grad_norm": 1.7458290206303662, "learning_rate": 9.999532235165478e-06, "loss": 0.4994, "step": 3528 }, { "epoch": 0.10397301238896337, "grad_norm": 1.8426535849341397, "learning_rate": 9.999525174775741e-06, "loss": 0.6063, "step": 3529 }, { "epoch": 0.10400247484642694, "grad_norm": 1.886674023566145, "learning_rate": 9.999518061502553e-06, "loss": 0.6639, "step": 3530 }, { "epoch": 0.10403193730389051, "grad_norm": 1.820953116001578, "learning_rate": 9.999510895345986e-06, "loss": 0.6036, "step": 3531 }, { "epoch": 0.10406139976135409, "grad_norm": 1.9779735531072602, "learning_rate": 9.999503676306114e-06, "loss": 0.6437, "step": 3532 }, { "epoch": 0.10409086221881766, "grad_norm": 1.5613260295132125, "learning_rate": 9.99949640438302e-06, "loss": 0.4515, "step": 3533 }, { "epoch": 0.10412032467628125, "grad_norm": 1.700712525067199, "learning_rate": 9.999489079576773e-06, "loss": 0.4704, "step": 3534 }, { "epoch": 0.10414978713374483, "grad_norm": 1.7152983850384995, "learning_rate": 9.999481701887456e-06, "loss": 0.6006, "step": 3535 }, { "epoch": 0.1041792495912084, "grad_norm": 1.972571744633773, "learning_rate": 9.999474271315145e-06, "loss": 0.6096, "step": 3536 }, { "epoch": 0.10420871204867198, "grad_norm": 1.7887666041020038, "learning_rate": 9.999466787859917e-06, "loss": 0.4707, "step": 3537 }, { "epoch": 0.10423817450613555, "grad_norm": 2.1091965753216737, "learning_rate": 9.999459251521856e-06, "loss": 0.5752, "step": 3538 }, { "epoch": 0.10426763696359913, "grad_norm": 1.9927469759298182, "learning_rate": 9.999451662301035e-06, "loss": 0.5265, "step": 3539 }, { "epoch": 0.10429709942106272, "grad_norm": 1.8177199580184726, "learning_rate": 9.999444020197541e-06, "loss": 0.5908, "step": 3540 }, { "epoch": 0.10432656187852629, "grad_norm": 1.6058321259077646, "learning_rate": 9.99943632521145e-06, "loss": 0.5487, "step": 3541 }, { "epoch": 0.10435602433598987, "grad_norm": 1.7205107278122784, "learning_rate": 9.999428577342847e-06, "loss": 0.5128, "step": 3542 }, { "epoch": 0.10438548679345344, "grad_norm": 1.8787199499954612, "learning_rate": 9.999420776591811e-06, "loss": 0.4536, "step": 3543 }, { "epoch": 0.10441494925091702, "grad_norm": 1.8996220669234951, "learning_rate": 9.999412922958425e-06, "loss": 0.5958, "step": 3544 }, { "epoch": 0.10444441170838059, "grad_norm": 1.7189929867351674, "learning_rate": 9.999405016442773e-06, "loss": 0.4543, "step": 3545 }, { "epoch": 0.10447387416584418, "grad_norm": 1.966177742960573, "learning_rate": 9.99939705704494e-06, "loss": 0.5254, "step": 3546 }, { "epoch": 0.10450333662330775, "grad_norm": 1.6826152703038448, "learning_rate": 9.999389044765006e-06, "loss": 0.5507, "step": 3547 }, { "epoch": 0.10453279908077133, "grad_norm": 2.011468668177247, "learning_rate": 9.999380979603061e-06, "loss": 0.5566, "step": 3548 }, { "epoch": 0.1045622615382349, "grad_norm": 1.8516626195233508, "learning_rate": 9.999372861559187e-06, "loss": 0.4266, "step": 3549 }, { "epoch": 0.10459172399569848, "grad_norm": 1.7959554886140265, "learning_rate": 9.999364690633468e-06, "loss": 0.5884, "step": 3550 }, { "epoch": 0.10462118645316205, "grad_norm": 1.7612568268861137, "learning_rate": 9.999356466825996e-06, "loss": 0.5711, "step": 3551 }, { "epoch": 0.10465064891062563, "grad_norm": 1.7542773488058023, "learning_rate": 9.999348190136852e-06, "loss": 0.5419, "step": 3552 }, { "epoch": 0.10468011136808922, "grad_norm": 1.7105714945488972, "learning_rate": 9.99933986056613e-06, "loss": 0.5238, "step": 3553 }, { "epoch": 0.10470957382555279, "grad_norm": 1.9309720493711229, "learning_rate": 9.999331478113911e-06, "loss": 0.5168, "step": 3554 }, { "epoch": 0.10473903628301637, "grad_norm": 1.7953169713469321, "learning_rate": 9.999323042780289e-06, "loss": 0.5203, "step": 3555 }, { "epoch": 0.10476849874047994, "grad_norm": 1.7446759691117184, "learning_rate": 9.999314554565351e-06, "loss": 0.4288, "step": 3556 }, { "epoch": 0.10479796119794352, "grad_norm": 1.913921884891989, "learning_rate": 9.999306013469188e-06, "loss": 0.5266, "step": 3557 }, { "epoch": 0.10482742365540709, "grad_norm": 1.6711165266250243, "learning_rate": 9.99929741949189e-06, "loss": 0.4618, "step": 3558 }, { "epoch": 0.10485688611287068, "grad_norm": 1.820382119650686, "learning_rate": 9.999288772633546e-06, "loss": 0.4647, "step": 3559 }, { "epoch": 0.10488634857033426, "grad_norm": 1.8229158895952242, "learning_rate": 9.999280072894249e-06, "loss": 0.5173, "step": 3560 }, { "epoch": 0.10491581102779783, "grad_norm": 1.6338314792896678, "learning_rate": 9.99927132027409e-06, "loss": 0.4928, "step": 3561 }, { "epoch": 0.1049452734852614, "grad_norm": 1.5964197811870162, "learning_rate": 9.999262514773164e-06, "loss": 0.4089, "step": 3562 }, { "epoch": 0.10497473594272498, "grad_norm": 1.7213201106859413, "learning_rate": 9.999253656391563e-06, "loss": 0.4586, "step": 3563 }, { "epoch": 0.10500419840018856, "grad_norm": 1.6284640351183106, "learning_rate": 9.99924474512938e-06, "loss": 0.5507, "step": 3564 }, { "epoch": 0.10503366085765213, "grad_norm": 1.8100157121498117, "learning_rate": 9.99923578098671e-06, "loss": 0.5388, "step": 3565 }, { "epoch": 0.10506312331511572, "grad_norm": 1.8421700278907114, "learning_rate": 9.999226763963647e-06, "loss": 0.6069, "step": 3566 }, { "epoch": 0.1050925857725793, "grad_norm": 1.7160063188070829, "learning_rate": 9.999217694060288e-06, "loss": 0.5307, "step": 3567 }, { "epoch": 0.10512204823004287, "grad_norm": 1.7809840827208545, "learning_rate": 9.999208571276725e-06, "loss": 0.5475, "step": 3568 }, { "epoch": 0.10515151068750644, "grad_norm": 1.4864835391593423, "learning_rate": 9.99919939561306e-06, "loss": 0.4089, "step": 3569 }, { "epoch": 0.10518097314497002, "grad_norm": 1.7600510863031085, "learning_rate": 9.999190167069387e-06, "loss": 0.517, "step": 3570 }, { "epoch": 0.1052104356024336, "grad_norm": 1.863540771475965, "learning_rate": 9.999180885645802e-06, "loss": 0.4582, "step": 3571 }, { "epoch": 0.10523989805989718, "grad_norm": 1.6524493083291936, "learning_rate": 9.999171551342409e-06, "loss": 0.5042, "step": 3572 }, { "epoch": 0.10526936051736076, "grad_norm": 1.8426589216057407, "learning_rate": 9.9991621641593e-06, "loss": 0.6362, "step": 3573 }, { "epoch": 0.10529882297482433, "grad_norm": 1.7292763882607949, "learning_rate": 9.999152724096577e-06, "loss": 0.5351, "step": 3574 }, { "epoch": 0.10532828543228791, "grad_norm": 1.7794163619867085, "learning_rate": 9.99914323115434e-06, "loss": 0.5426, "step": 3575 }, { "epoch": 0.10535774788975148, "grad_norm": 1.6899205829679176, "learning_rate": 9.999133685332691e-06, "loss": 0.6173, "step": 3576 }, { "epoch": 0.10538721034721506, "grad_norm": 1.621758249755572, "learning_rate": 9.999124086631729e-06, "loss": 0.522, "step": 3577 }, { "epoch": 0.10541667280467863, "grad_norm": 2.116584871517573, "learning_rate": 9.999114435051555e-06, "loss": 0.6882, "step": 3578 }, { "epoch": 0.10544613526214222, "grad_norm": 1.7851943000329575, "learning_rate": 9.999104730592273e-06, "loss": 0.559, "step": 3579 }, { "epoch": 0.1054755977196058, "grad_norm": 1.7175361642124554, "learning_rate": 9.999094973253983e-06, "loss": 0.692, "step": 3580 }, { "epoch": 0.10550506017706937, "grad_norm": 1.8009990689713729, "learning_rate": 9.999085163036791e-06, "loss": 0.5525, "step": 3581 }, { "epoch": 0.10553452263453295, "grad_norm": 1.7312854698113682, "learning_rate": 9.9990752999408e-06, "loss": 0.6607, "step": 3582 }, { "epoch": 0.10556398509199652, "grad_norm": 1.6799194628188732, "learning_rate": 9.999065383966113e-06, "loss": 0.551, "step": 3583 }, { "epoch": 0.1055934475494601, "grad_norm": 1.720476711467042, "learning_rate": 9.999055415112837e-06, "loss": 0.5651, "step": 3584 }, { "epoch": 0.10562291000692368, "grad_norm": 2.0519068512940644, "learning_rate": 9.999045393381076e-06, "loss": 0.5089, "step": 3585 }, { "epoch": 0.10565237246438726, "grad_norm": 1.7662582297391352, "learning_rate": 9.999035318770937e-06, "loss": 0.5766, "step": 3586 }, { "epoch": 0.10568183492185083, "grad_norm": 1.710273026878791, "learning_rate": 9.999025191282525e-06, "loss": 0.509, "step": 3587 }, { "epoch": 0.10571129737931441, "grad_norm": 1.7639117026479318, "learning_rate": 9.999015010915947e-06, "loss": 0.5797, "step": 3588 }, { "epoch": 0.10574075983677798, "grad_norm": 1.826681930241118, "learning_rate": 9.999004777671314e-06, "loss": 0.637, "step": 3589 }, { "epoch": 0.10577022229424156, "grad_norm": 1.6898916098194707, "learning_rate": 9.99899449154873e-06, "loss": 0.4838, "step": 3590 }, { "epoch": 0.10579968475170513, "grad_norm": 1.8782991330362206, "learning_rate": 9.998984152548308e-06, "loss": 0.5466, "step": 3591 }, { "epoch": 0.10582914720916872, "grad_norm": 1.8327789562793142, "learning_rate": 9.998973760670154e-06, "loss": 0.5461, "step": 3592 }, { "epoch": 0.1058586096666323, "grad_norm": 1.8588386058626711, "learning_rate": 9.99896331591438e-06, "loss": 0.6448, "step": 3593 }, { "epoch": 0.10588807212409587, "grad_norm": 1.855161109310771, "learning_rate": 9.998952818281092e-06, "loss": 0.5344, "step": 3594 }, { "epoch": 0.10591753458155945, "grad_norm": 1.631918862682082, "learning_rate": 9.998942267770408e-06, "loss": 0.3384, "step": 3595 }, { "epoch": 0.10594699703902302, "grad_norm": 1.7523365451403434, "learning_rate": 9.998931664382433e-06, "loss": 0.5513, "step": 3596 }, { "epoch": 0.1059764594964866, "grad_norm": 1.8101867545683166, "learning_rate": 9.998921008117285e-06, "loss": 0.57, "step": 3597 }, { "epoch": 0.10600592195395019, "grad_norm": 1.666944791382268, "learning_rate": 9.998910298975074e-06, "loss": 0.3878, "step": 3598 }, { "epoch": 0.10603538441141376, "grad_norm": 1.6107380643077267, "learning_rate": 9.998899536955912e-06, "loss": 0.4694, "step": 3599 }, { "epoch": 0.10606484686887734, "grad_norm": 2.1578887803486944, "learning_rate": 9.998888722059916e-06, "loss": 0.4334, "step": 3600 }, { "epoch": 0.10609430932634091, "grad_norm": 1.8650428026356742, "learning_rate": 9.998877854287197e-06, "loss": 0.4969, "step": 3601 }, { "epoch": 0.10612377178380449, "grad_norm": 1.6768885744703639, "learning_rate": 9.998866933637872e-06, "loss": 0.4247, "step": 3602 }, { "epoch": 0.10615323424126806, "grad_norm": 1.6242205301163544, "learning_rate": 9.998855960112056e-06, "loss": 0.485, "step": 3603 }, { "epoch": 0.10618269669873164, "grad_norm": 1.6510173683579628, "learning_rate": 9.998844933709866e-06, "loss": 0.4383, "step": 3604 }, { "epoch": 0.10621215915619522, "grad_norm": 1.7319946852922163, "learning_rate": 9.998833854431416e-06, "loss": 0.6112, "step": 3605 }, { "epoch": 0.1062416216136588, "grad_norm": 1.9923454839305972, "learning_rate": 9.998822722276826e-06, "loss": 0.6975, "step": 3606 }, { "epoch": 0.10627108407112237, "grad_norm": 1.7658855972137113, "learning_rate": 9.998811537246212e-06, "loss": 0.5386, "step": 3607 }, { "epoch": 0.10630054652858595, "grad_norm": 2.104035636894982, "learning_rate": 9.998800299339693e-06, "loss": 0.6724, "step": 3608 }, { "epoch": 0.10633000898604952, "grad_norm": 1.9523648887178713, "learning_rate": 9.998789008557389e-06, "loss": 0.6015, "step": 3609 }, { "epoch": 0.1063594714435131, "grad_norm": 1.7197561965646408, "learning_rate": 9.998777664899417e-06, "loss": 0.5465, "step": 3610 }, { "epoch": 0.10638893390097669, "grad_norm": 1.841609986360957, "learning_rate": 9.998766268365901e-06, "loss": 0.6466, "step": 3611 }, { "epoch": 0.10641839635844026, "grad_norm": 1.863227422509623, "learning_rate": 9.998754818956955e-06, "loss": 0.6165, "step": 3612 }, { "epoch": 0.10644785881590384, "grad_norm": 1.8414120235035119, "learning_rate": 9.998743316672707e-06, "loss": 0.4547, "step": 3613 }, { "epoch": 0.10647732127336741, "grad_norm": 1.6768346141688082, "learning_rate": 9.998731761513273e-06, "loss": 0.5339, "step": 3614 }, { "epoch": 0.10650678373083099, "grad_norm": 1.8585118384773613, "learning_rate": 9.99872015347878e-06, "loss": 0.6435, "step": 3615 }, { "epoch": 0.10653624618829456, "grad_norm": 1.6214165824850224, "learning_rate": 9.998708492569347e-06, "loss": 0.4033, "step": 3616 }, { "epoch": 0.10656570864575814, "grad_norm": 1.6116594709861842, "learning_rate": 9.9986967787851e-06, "loss": 0.4794, "step": 3617 }, { "epoch": 0.10659517110322173, "grad_norm": 1.6743197188045673, "learning_rate": 9.998685012126161e-06, "loss": 0.491, "step": 3618 }, { "epoch": 0.1066246335606853, "grad_norm": 1.8872714268208217, "learning_rate": 9.998673192592657e-06, "loss": 0.5602, "step": 3619 }, { "epoch": 0.10665409601814888, "grad_norm": 1.7473070548989131, "learning_rate": 9.99866132018471e-06, "loss": 0.6284, "step": 3620 }, { "epoch": 0.10668355847561245, "grad_norm": 1.728159043098803, "learning_rate": 9.998649394902447e-06, "loss": 0.5834, "step": 3621 }, { "epoch": 0.10671302093307602, "grad_norm": 1.9795364032371496, "learning_rate": 9.998637416745996e-06, "loss": 0.4765, "step": 3622 }, { "epoch": 0.1067424833905396, "grad_norm": 1.7180860699390108, "learning_rate": 9.99862538571548e-06, "loss": 0.5196, "step": 3623 }, { "epoch": 0.10677194584800319, "grad_norm": 2.050020545115013, "learning_rate": 9.998613301811027e-06, "loss": 0.7198, "step": 3624 }, { "epoch": 0.10680140830546676, "grad_norm": 1.844739967818117, "learning_rate": 9.998601165032768e-06, "loss": 0.4768, "step": 3625 }, { "epoch": 0.10683087076293034, "grad_norm": 1.6923930102860076, "learning_rate": 9.998588975380827e-06, "loss": 0.6072, "step": 3626 }, { "epoch": 0.10686033322039391, "grad_norm": 1.7936547103874372, "learning_rate": 9.998576732855337e-06, "loss": 0.4956, "step": 3627 }, { "epoch": 0.10688979567785749, "grad_norm": 1.6289308555905526, "learning_rate": 9.998564437456426e-06, "loss": 0.5413, "step": 3628 }, { "epoch": 0.10691925813532106, "grad_norm": 1.7728323327148188, "learning_rate": 9.998552089184222e-06, "loss": 0.5887, "step": 3629 }, { "epoch": 0.10694872059278464, "grad_norm": 1.8537564555263373, "learning_rate": 9.99853968803886e-06, "loss": 0.6575, "step": 3630 }, { "epoch": 0.10697818305024823, "grad_norm": 1.90009001179925, "learning_rate": 9.998527234020465e-06, "loss": 0.5998, "step": 3631 }, { "epoch": 0.1070076455077118, "grad_norm": 1.6435214245432799, "learning_rate": 9.998514727129175e-06, "loss": 0.4005, "step": 3632 }, { "epoch": 0.10703710796517538, "grad_norm": 1.8880260579738335, "learning_rate": 9.998502167365118e-06, "loss": 0.6298, "step": 3633 }, { "epoch": 0.10706657042263895, "grad_norm": 2.1030500440697297, "learning_rate": 9.99848955472843e-06, "loss": 0.5161, "step": 3634 }, { "epoch": 0.10709603288010253, "grad_norm": 1.7202808477840326, "learning_rate": 9.998476889219242e-06, "loss": 0.568, "step": 3635 }, { "epoch": 0.1071254953375661, "grad_norm": 1.858499257561254, "learning_rate": 9.998464170837688e-06, "loss": 0.6123, "step": 3636 }, { "epoch": 0.10715495779502969, "grad_norm": 1.7541413786562872, "learning_rate": 9.998451399583903e-06, "loss": 0.5405, "step": 3637 }, { "epoch": 0.10718442025249326, "grad_norm": 1.8505493188726072, "learning_rate": 9.998438575458024e-06, "loss": 0.4944, "step": 3638 }, { "epoch": 0.10721388270995684, "grad_norm": 1.7199935375378885, "learning_rate": 9.998425698460186e-06, "loss": 0.5575, "step": 3639 }, { "epoch": 0.10724334516742041, "grad_norm": 1.681429805010117, "learning_rate": 9.998412768590522e-06, "loss": 0.4007, "step": 3640 }, { "epoch": 0.10727280762488399, "grad_norm": 1.932125424197238, "learning_rate": 9.998399785849172e-06, "loss": 0.685, "step": 3641 }, { "epoch": 0.10730227008234756, "grad_norm": 1.6053371852031526, "learning_rate": 9.998386750236272e-06, "loss": 0.4904, "step": 3642 }, { "epoch": 0.10733173253981114, "grad_norm": 1.7306441621570472, "learning_rate": 9.998373661751961e-06, "loss": 0.6223, "step": 3643 }, { "epoch": 0.10736119499727473, "grad_norm": 1.71492025473406, "learning_rate": 9.998360520396376e-06, "loss": 0.5473, "step": 3644 }, { "epoch": 0.1073906574547383, "grad_norm": 1.8305145063109802, "learning_rate": 9.998347326169658e-06, "loss": 0.6092, "step": 3645 }, { "epoch": 0.10742011991220188, "grad_norm": 1.7833691913896523, "learning_rate": 9.998334079071944e-06, "loss": 0.5004, "step": 3646 }, { "epoch": 0.10744958236966545, "grad_norm": 2.0579803915171473, "learning_rate": 9.998320779103375e-06, "loss": 0.447, "step": 3647 }, { "epoch": 0.10747904482712903, "grad_norm": 2.219508010726901, "learning_rate": 9.998307426264094e-06, "loss": 0.6461, "step": 3648 }, { "epoch": 0.1075085072845926, "grad_norm": 2.150881745803648, "learning_rate": 9.998294020554238e-06, "loss": 0.6561, "step": 3649 }, { "epoch": 0.10753796974205619, "grad_norm": 1.7303203749646243, "learning_rate": 9.998280561973952e-06, "loss": 0.5673, "step": 3650 }, { "epoch": 0.10756743219951977, "grad_norm": 1.669330697246907, "learning_rate": 9.998267050523378e-06, "loss": 0.4737, "step": 3651 }, { "epoch": 0.10759689465698334, "grad_norm": 1.6821022335013438, "learning_rate": 9.998253486202658e-06, "loss": 0.4823, "step": 3652 }, { "epoch": 0.10762635711444692, "grad_norm": 1.861436838562301, "learning_rate": 9.998239869011935e-06, "loss": 0.4725, "step": 3653 }, { "epoch": 0.10765581957191049, "grad_norm": 1.5718901154489673, "learning_rate": 9.998226198951356e-06, "loss": 0.4927, "step": 3654 }, { "epoch": 0.10768528202937407, "grad_norm": 1.7396084450637659, "learning_rate": 9.998212476021063e-06, "loss": 0.4661, "step": 3655 }, { "epoch": 0.10771474448683764, "grad_norm": 1.6995555543710832, "learning_rate": 9.9981987002212e-06, "loss": 0.4752, "step": 3656 }, { "epoch": 0.10774420694430123, "grad_norm": 1.9015252486800744, "learning_rate": 9.998184871551915e-06, "loss": 0.5966, "step": 3657 }, { "epoch": 0.1077736694017648, "grad_norm": 1.6040256998143236, "learning_rate": 9.998170990013354e-06, "loss": 0.5812, "step": 3658 }, { "epoch": 0.10780313185922838, "grad_norm": 1.8155594350365492, "learning_rate": 9.998157055605664e-06, "loss": 0.4899, "step": 3659 }, { "epoch": 0.10783259431669195, "grad_norm": 1.997750180791933, "learning_rate": 9.998143068328992e-06, "loss": 0.6339, "step": 3660 }, { "epoch": 0.10786205677415553, "grad_norm": 1.660934770318424, "learning_rate": 9.998129028183486e-06, "loss": 0.5169, "step": 3661 }, { "epoch": 0.1078915192316191, "grad_norm": 1.6020104100249715, "learning_rate": 9.998114935169294e-06, "loss": 0.4558, "step": 3662 }, { "epoch": 0.10792098168908269, "grad_norm": 2.106591063843007, "learning_rate": 9.998100789286564e-06, "loss": 0.769, "step": 3663 }, { "epoch": 0.10795044414654627, "grad_norm": 1.888562603489267, "learning_rate": 9.99808659053545e-06, "loss": 0.5403, "step": 3664 }, { "epoch": 0.10797990660400984, "grad_norm": 1.7363812234806095, "learning_rate": 9.998072338916097e-06, "loss": 0.5772, "step": 3665 }, { "epoch": 0.10800936906147342, "grad_norm": 1.627694738663635, "learning_rate": 9.998058034428659e-06, "loss": 0.5062, "step": 3666 }, { "epoch": 0.10803883151893699, "grad_norm": 1.7748685061583962, "learning_rate": 9.998043677073285e-06, "loss": 0.5787, "step": 3667 }, { "epoch": 0.10806829397640057, "grad_norm": 1.6384707259579299, "learning_rate": 9.99802926685013e-06, "loss": 0.504, "step": 3668 }, { "epoch": 0.10809775643386414, "grad_norm": 1.8346151246301388, "learning_rate": 9.998014803759343e-06, "loss": 0.4795, "step": 3669 }, { "epoch": 0.10812721889132773, "grad_norm": 2.292012607838486, "learning_rate": 9.99800028780108e-06, "loss": 0.6421, "step": 3670 }, { "epoch": 0.1081566813487913, "grad_norm": 1.7176622347725397, "learning_rate": 9.997985718975491e-06, "loss": 0.503, "step": 3671 }, { "epoch": 0.10818614380625488, "grad_norm": 1.9268990483008415, "learning_rate": 9.997971097282736e-06, "loss": 0.4778, "step": 3672 }, { "epoch": 0.10821560626371846, "grad_norm": 1.9241542318655838, "learning_rate": 9.997956422722962e-06, "loss": 0.6669, "step": 3673 }, { "epoch": 0.10824506872118203, "grad_norm": 1.9254110098445656, "learning_rate": 9.99794169529633e-06, "loss": 0.7037, "step": 3674 }, { "epoch": 0.1082745311786456, "grad_norm": 1.9504344935732563, "learning_rate": 9.997926915002993e-06, "loss": 0.3346, "step": 3675 }, { "epoch": 0.1083039936361092, "grad_norm": 1.9920747818429354, "learning_rate": 9.99791208184311e-06, "loss": 0.5283, "step": 3676 }, { "epoch": 0.10833345609357277, "grad_norm": 1.8744333864582117, "learning_rate": 9.997897195816834e-06, "loss": 0.6586, "step": 3677 }, { "epoch": 0.10836291855103634, "grad_norm": 1.701019114935059, "learning_rate": 9.997882256924326e-06, "loss": 0.6011, "step": 3678 }, { "epoch": 0.10839238100849992, "grad_norm": 1.9091242138984952, "learning_rate": 9.997867265165742e-06, "loss": 0.6518, "step": 3679 }, { "epoch": 0.1084218434659635, "grad_norm": 1.7171715273788404, "learning_rate": 9.997852220541242e-06, "loss": 0.5105, "step": 3680 }, { "epoch": 0.10845130592342707, "grad_norm": 1.675387102356234, "learning_rate": 9.997837123050984e-06, "loss": 0.5315, "step": 3681 }, { "epoch": 0.10848076838089064, "grad_norm": 1.7062688716466523, "learning_rate": 9.997821972695127e-06, "loss": 0.5212, "step": 3682 }, { "epoch": 0.10851023083835423, "grad_norm": 2.0315499777083015, "learning_rate": 9.997806769473834e-06, "loss": 0.545, "step": 3683 }, { "epoch": 0.10853969329581781, "grad_norm": 1.7464230023323855, "learning_rate": 9.997791513387264e-06, "loss": 0.595, "step": 3684 }, { "epoch": 0.10856915575328138, "grad_norm": 2.0011882474159655, "learning_rate": 9.997776204435578e-06, "loss": 0.7406, "step": 3685 }, { "epoch": 0.10859861821074496, "grad_norm": 1.9777658862820182, "learning_rate": 9.997760842618938e-06, "loss": 0.7307, "step": 3686 }, { "epoch": 0.10862808066820853, "grad_norm": 1.7347678950194327, "learning_rate": 9.997745427937507e-06, "loss": 0.5583, "step": 3687 }, { "epoch": 0.1086575431256721, "grad_norm": 1.6434308135129463, "learning_rate": 9.99772996039145e-06, "loss": 0.4495, "step": 3688 }, { "epoch": 0.1086870055831357, "grad_norm": 1.722601795773162, "learning_rate": 9.997714439980926e-06, "loss": 0.5185, "step": 3689 }, { "epoch": 0.10871646804059927, "grad_norm": 1.610326599677367, "learning_rate": 9.997698866706103e-06, "loss": 0.4848, "step": 3690 }, { "epoch": 0.10874593049806285, "grad_norm": 1.6922743810944991, "learning_rate": 9.997683240567143e-06, "loss": 0.6339, "step": 3691 }, { "epoch": 0.10877539295552642, "grad_norm": 1.781471679893332, "learning_rate": 9.997667561564213e-06, "loss": 0.6572, "step": 3692 }, { "epoch": 0.10880485541299, "grad_norm": 1.874664038064951, "learning_rate": 9.997651829697481e-06, "loss": 0.6047, "step": 3693 }, { "epoch": 0.10883431787045357, "grad_norm": 1.9178777266488036, "learning_rate": 9.99763604496711e-06, "loss": 0.665, "step": 3694 }, { "epoch": 0.10886378032791715, "grad_norm": 1.51764015431298, "learning_rate": 9.997620207373266e-06, "loss": 0.4711, "step": 3695 }, { "epoch": 0.10889324278538073, "grad_norm": 2.0075212942599623, "learning_rate": 9.99760431691612e-06, "loss": 0.5183, "step": 3696 }, { "epoch": 0.10892270524284431, "grad_norm": 1.7127283099444086, "learning_rate": 9.99758837359584e-06, "loss": 0.519, "step": 3697 }, { "epoch": 0.10895216770030788, "grad_norm": 2.0681014966874023, "learning_rate": 9.997572377412591e-06, "loss": 0.6326, "step": 3698 }, { "epoch": 0.10898163015777146, "grad_norm": 1.6340792642861675, "learning_rate": 9.997556328366546e-06, "loss": 0.505, "step": 3699 }, { "epoch": 0.10901109261523503, "grad_norm": 1.720970289649215, "learning_rate": 9.997540226457872e-06, "loss": 0.568, "step": 3700 }, { "epoch": 0.10904055507269861, "grad_norm": 1.9835459273351401, "learning_rate": 9.997524071686742e-06, "loss": 0.6584, "step": 3701 }, { "epoch": 0.1090700175301622, "grad_norm": 1.8069582318827182, "learning_rate": 9.997507864053323e-06, "loss": 0.6077, "step": 3702 }, { "epoch": 0.10909947998762577, "grad_norm": 1.7911186216540615, "learning_rate": 9.997491603557791e-06, "loss": 0.4608, "step": 3703 }, { "epoch": 0.10912894244508935, "grad_norm": 1.8067178143532752, "learning_rate": 9.997475290200315e-06, "loss": 0.4776, "step": 3704 }, { "epoch": 0.10915840490255292, "grad_norm": 1.7984524950545353, "learning_rate": 9.99745892398107e-06, "loss": 0.5397, "step": 3705 }, { "epoch": 0.1091878673600165, "grad_norm": 1.6847682017489882, "learning_rate": 9.997442504900226e-06, "loss": 0.5723, "step": 3706 }, { "epoch": 0.10921732981748007, "grad_norm": 1.8720903475438135, "learning_rate": 9.997426032957957e-06, "loss": 0.5264, "step": 3707 }, { "epoch": 0.10924679227494365, "grad_norm": 1.505478605264683, "learning_rate": 9.99740950815444e-06, "loss": 0.4666, "step": 3708 }, { "epoch": 0.10927625473240724, "grad_norm": 2.0712239650806263, "learning_rate": 9.997392930489848e-06, "loss": 0.6644, "step": 3709 }, { "epoch": 0.10930571718987081, "grad_norm": 1.8939105832086631, "learning_rate": 9.997376299964356e-06, "loss": 0.6903, "step": 3710 }, { "epoch": 0.10933517964733439, "grad_norm": 2.010680153175705, "learning_rate": 9.997359616578142e-06, "loss": 0.5305, "step": 3711 }, { "epoch": 0.10936464210479796, "grad_norm": 1.8647355080125827, "learning_rate": 9.99734288033138e-06, "loss": 0.5465, "step": 3712 }, { "epoch": 0.10939410456226153, "grad_norm": 1.850185898975866, "learning_rate": 9.997326091224247e-06, "loss": 0.5437, "step": 3713 }, { "epoch": 0.10942356701972511, "grad_norm": 1.8805414195335077, "learning_rate": 9.997309249256924e-06, "loss": 0.6172, "step": 3714 }, { "epoch": 0.1094530294771887, "grad_norm": 1.8913376272626141, "learning_rate": 9.997292354429585e-06, "loss": 0.5815, "step": 3715 }, { "epoch": 0.10948249193465227, "grad_norm": 1.6290645017570735, "learning_rate": 9.99727540674241e-06, "loss": 0.5035, "step": 3716 }, { "epoch": 0.10951195439211585, "grad_norm": 1.6652262202099122, "learning_rate": 9.99725840619558e-06, "loss": 0.5663, "step": 3717 }, { "epoch": 0.10954141684957942, "grad_norm": 1.651999414992046, "learning_rate": 9.997241352789274e-06, "loss": 0.4633, "step": 3718 }, { "epoch": 0.109570879307043, "grad_norm": 1.646277693685121, "learning_rate": 9.99722424652367e-06, "loss": 0.504, "step": 3719 }, { "epoch": 0.10960034176450657, "grad_norm": 1.9760616108578242, "learning_rate": 9.997207087398952e-06, "loss": 0.5221, "step": 3720 }, { "epoch": 0.10962980422197015, "grad_norm": 1.5311215303275385, "learning_rate": 9.997189875415302e-06, "loss": 0.4458, "step": 3721 }, { "epoch": 0.10965926667943374, "grad_norm": 1.6080251397876255, "learning_rate": 9.997172610572897e-06, "loss": 0.3733, "step": 3722 }, { "epoch": 0.10968872913689731, "grad_norm": 2.1879891697758436, "learning_rate": 9.997155292871924e-06, "loss": 0.8343, "step": 3723 }, { "epoch": 0.10971819159436089, "grad_norm": 1.8224540967814657, "learning_rate": 9.997137922312568e-06, "loss": 0.5524, "step": 3724 }, { "epoch": 0.10974765405182446, "grad_norm": 1.7407773354280671, "learning_rate": 9.997120498895007e-06, "loss": 0.6208, "step": 3725 }, { "epoch": 0.10977711650928804, "grad_norm": 1.696717494144625, "learning_rate": 9.99710302261943e-06, "loss": 0.548, "step": 3726 }, { "epoch": 0.10980657896675161, "grad_norm": 1.7713925193703663, "learning_rate": 9.997085493486018e-06, "loss": 0.6349, "step": 3727 }, { "epoch": 0.1098360414242152, "grad_norm": 1.580588362769396, "learning_rate": 9.997067911494961e-06, "loss": 0.4378, "step": 3728 }, { "epoch": 0.10986550388167877, "grad_norm": 1.815021782311553, "learning_rate": 9.997050276646442e-06, "loss": 0.6005, "step": 3729 }, { "epoch": 0.10989496633914235, "grad_norm": 1.751780587507635, "learning_rate": 9.997032588940646e-06, "loss": 0.6573, "step": 3730 }, { "epoch": 0.10992442879660592, "grad_norm": 1.7349588895687755, "learning_rate": 9.997014848377764e-06, "loss": 0.5025, "step": 3731 }, { "epoch": 0.1099538912540695, "grad_norm": 1.844170468636978, "learning_rate": 9.996997054957982e-06, "loss": 0.5008, "step": 3732 }, { "epoch": 0.10998335371153307, "grad_norm": 1.9070235221318736, "learning_rate": 9.996979208681488e-06, "loss": 0.7233, "step": 3733 }, { "epoch": 0.11001281616899665, "grad_norm": 1.7773573198707837, "learning_rate": 9.996961309548471e-06, "loss": 0.6837, "step": 3734 }, { "epoch": 0.11004227862646024, "grad_norm": 1.7953853570357978, "learning_rate": 9.99694335755912e-06, "loss": 0.3886, "step": 3735 }, { "epoch": 0.11007174108392381, "grad_norm": 1.767249892301876, "learning_rate": 9.996925352713624e-06, "loss": 0.5927, "step": 3736 }, { "epoch": 0.11010120354138739, "grad_norm": 1.5154821135426695, "learning_rate": 9.996907295012175e-06, "loss": 0.4885, "step": 3737 }, { "epoch": 0.11013066599885096, "grad_norm": 1.7956640326574147, "learning_rate": 9.996889184454964e-06, "loss": 0.5923, "step": 3738 }, { "epoch": 0.11016012845631454, "grad_norm": 1.7367713242885414, "learning_rate": 9.99687102104218e-06, "loss": 0.5281, "step": 3739 }, { "epoch": 0.11018959091377811, "grad_norm": 2.134216909737822, "learning_rate": 9.99685280477402e-06, "loss": 0.5814, "step": 3740 }, { "epoch": 0.1102190533712417, "grad_norm": 1.7079397457779757, "learning_rate": 9.996834535650673e-06, "loss": 0.6311, "step": 3741 }, { "epoch": 0.11024851582870528, "grad_norm": 1.4965135712681357, "learning_rate": 9.996816213672332e-06, "loss": 0.4675, "step": 3742 }, { "epoch": 0.11027797828616885, "grad_norm": 1.7874490488268393, "learning_rate": 9.996797838839193e-06, "loss": 0.4372, "step": 3743 }, { "epoch": 0.11030744074363243, "grad_norm": 1.9150267619751624, "learning_rate": 9.99677941115145e-06, "loss": 0.5937, "step": 3744 }, { "epoch": 0.110336903201096, "grad_norm": 1.7437855170073164, "learning_rate": 9.996760930609295e-06, "loss": 0.6925, "step": 3745 }, { "epoch": 0.11036636565855958, "grad_norm": 1.731621847420977, "learning_rate": 9.996742397212928e-06, "loss": 0.5511, "step": 3746 }, { "epoch": 0.11039582811602315, "grad_norm": 1.731132328064573, "learning_rate": 9.99672381096254e-06, "loss": 0.46, "step": 3747 }, { "epoch": 0.11042529057348674, "grad_norm": 1.6682992397886873, "learning_rate": 9.996705171858334e-06, "loss": 0.3798, "step": 3748 }, { "epoch": 0.11045475303095031, "grad_norm": 1.7836727819740819, "learning_rate": 9.996686479900501e-06, "loss": 0.4744, "step": 3749 }, { "epoch": 0.11048421548841389, "grad_norm": 1.6865916469316278, "learning_rate": 9.996667735089242e-06, "loss": 0.4773, "step": 3750 }, { "epoch": 0.11051367794587746, "grad_norm": 1.77199506977393, "learning_rate": 9.996648937424754e-06, "loss": 0.605, "step": 3751 }, { "epoch": 0.11054314040334104, "grad_norm": 2.0201589579613084, "learning_rate": 9.996630086907236e-06, "loss": 0.5987, "step": 3752 }, { "epoch": 0.11057260286080461, "grad_norm": 1.872065983709833, "learning_rate": 9.996611183536888e-06, "loss": 0.7021, "step": 3753 }, { "epoch": 0.1106020653182682, "grad_norm": 1.8562226557857937, "learning_rate": 9.99659222731391e-06, "loss": 0.4777, "step": 3754 }, { "epoch": 0.11063152777573178, "grad_norm": 2.209526888994709, "learning_rate": 9.996573218238503e-06, "loss": 0.6038, "step": 3755 }, { "epoch": 0.11066099023319535, "grad_norm": 1.9140042533066115, "learning_rate": 9.996554156310865e-06, "loss": 0.5878, "step": 3756 }, { "epoch": 0.11069045269065893, "grad_norm": 1.7169607342987103, "learning_rate": 9.996535041531202e-06, "loss": 0.5177, "step": 3757 }, { "epoch": 0.1107199151481225, "grad_norm": 1.8353504225858996, "learning_rate": 9.996515873899712e-06, "loss": 0.3862, "step": 3758 }, { "epoch": 0.11074937760558608, "grad_norm": 1.589125975716689, "learning_rate": 9.996496653416602e-06, "loss": 0.5662, "step": 3759 }, { "epoch": 0.11077884006304965, "grad_norm": 1.8502012874878517, "learning_rate": 9.996477380082073e-06, "loss": 0.6176, "step": 3760 }, { "epoch": 0.11080830252051324, "grad_norm": 1.8086646426948805, "learning_rate": 9.996458053896327e-06, "loss": 0.4057, "step": 3761 }, { "epoch": 0.11083776497797682, "grad_norm": 1.9915883938038867, "learning_rate": 9.996438674859572e-06, "loss": 0.5348, "step": 3762 }, { "epoch": 0.11086722743544039, "grad_norm": 2.0468609816070997, "learning_rate": 9.99641924297201e-06, "loss": 0.7016, "step": 3763 }, { "epoch": 0.11089668989290397, "grad_norm": 1.7363050161961335, "learning_rate": 9.99639975823385e-06, "loss": 0.6218, "step": 3764 }, { "epoch": 0.11092615235036754, "grad_norm": 1.5478081019118228, "learning_rate": 9.996380220645295e-06, "loss": 0.3651, "step": 3765 }, { "epoch": 0.11095561480783112, "grad_norm": 1.7274592783646545, "learning_rate": 9.996360630206553e-06, "loss": 0.5213, "step": 3766 }, { "epoch": 0.1109850772652947, "grad_norm": 1.8853453782772265, "learning_rate": 9.99634098691783e-06, "loss": 0.7609, "step": 3767 }, { "epoch": 0.11101453972275828, "grad_norm": 1.8074595165765233, "learning_rate": 9.996321290779337e-06, "loss": 0.549, "step": 3768 }, { "epoch": 0.11104400218022185, "grad_norm": 1.7982818885779737, "learning_rate": 9.996301541791278e-06, "loss": 0.4679, "step": 3769 }, { "epoch": 0.11107346463768543, "grad_norm": 1.6525926341850694, "learning_rate": 9.996281739953864e-06, "loss": 0.5156, "step": 3770 }, { "epoch": 0.111102927095149, "grad_norm": 1.6690247850265587, "learning_rate": 9.996261885267304e-06, "loss": 0.5868, "step": 3771 }, { "epoch": 0.11113238955261258, "grad_norm": 1.8276125211679195, "learning_rate": 9.996241977731811e-06, "loss": 0.567, "step": 3772 }, { "epoch": 0.11116185201007615, "grad_norm": 2.062319683084471, "learning_rate": 9.99622201734759e-06, "loss": 0.5927, "step": 3773 }, { "epoch": 0.11119131446753974, "grad_norm": 1.7742891083244723, "learning_rate": 9.996202004114857e-06, "loss": 0.5491, "step": 3774 }, { "epoch": 0.11122077692500332, "grad_norm": 1.6278253854109737, "learning_rate": 9.996181938033822e-06, "loss": 0.4862, "step": 3775 }, { "epoch": 0.11125023938246689, "grad_norm": 1.7905348699141501, "learning_rate": 9.996161819104696e-06, "loss": 0.6247, "step": 3776 }, { "epoch": 0.11127970183993047, "grad_norm": 1.648768883420256, "learning_rate": 9.996141647327692e-06, "loss": 0.5988, "step": 3777 }, { "epoch": 0.11130916429739404, "grad_norm": 1.8575954620426982, "learning_rate": 9.996121422703027e-06, "loss": 0.6454, "step": 3778 }, { "epoch": 0.11133862675485762, "grad_norm": 1.6797412878700413, "learning_rate": 9.99610114523091e-06, "loss": 0.4842, "step": 3779 }, { "epoch": 0.1113680892123212, "grad_norm": 1.7057671490401514, "learning_rate": 9.99608081491156e-06, "loss": 0.5727, "step": 3780 }, { "epoch": 0.11139755166978478, "grad_norm": 1.8841263154876884, "learning_rate": 9.996060431745187e-06, "loss": 0.6931, "step": 3781 }, { "epoch": 0.11142701412724836, "grad_norm": 1.844179302331904, "learning_rate": 9.996039995732011e-06, "loss": 0.5123, "step": 3782 }, { "epoch": 0.11145647658471193, "grad_norm": 1.7027609976411866, "learning_rate": 9.996019506872246e-06, "loss": 0.6028, "step": 3783 }, { "epoch": 0.1114859390421755, "grad_norm": 1.8009626450219482, "learning_rate": 9.99599896516611e-06, "loss": 0.6665, "step": 3784 }, { "epoch": 0.11151540149963908, "grad_norm": 1.8504381924063296, "learning_rate": 9.99597837061382e-06, "loss": 0.6947, "step": 3785 }, { "epoch": 0.11154486395710266, "grad_norm": 1.8630677151432578, "learning_rate": 9.99595772321559e-06, "loss": 0.5031, "step": 3786 }, { "epoch": 0.11157432641456624, "grad_norm": 1.6354500171263395, "learning_rate": 9.995937022971645e-06, "loss": 0.4419, "step": 3787 }, { "epoch": 0.11160378887202982, "grad_norm": 2.1327522582100307, "learning_rate": 9.995916269882199e-06, "loss": 0.6951, "step": 3788 }, { "epoch": 0.1116332513294934, "grad_norm": 1.817976292182587, "learning_rate": 9.995895463947475e-06, "loss": 0.5357, "step": 3789 }, { "epoch": 0.11166271378695697, "grad_norm": 1.9735878587526952, "learning_rate": 9.995874605167691e-06, "loss": 0.5379, "step": 3790 }, { "epoch": 0.11169217624442054, "grad_norm": 2.0489192624294783, "learning_rate": 9.995853693543067e-06, "loss": 0.6664, "step": 3791 }, { "epoch": 0.11172163870188412, "grad_norm": 1.507654084656246, "learning_rate": 9.995832729073824e-06, "loss": 0.4254, "step": 3792 }, { "epoch": 0.11175110115934771, "grad_norm": 1.8524549426429961, "learning_rate": 9.995811711760187e-06, "loss": 0.575, "step": 3793 }, { "epoch": 0.11178056361681128, "grad_norm": 1.8471355890782974, "learning_rate": 9.995790641602376e-06, "loss": 0.5531, "step": 3794 }, { "epoch": 0.11181002607427486, "grad_norm": 1.9395906250506076, "learning_rate": 9.995769518600613e-06, "loss": 0.4768, "step": 3795 }, { "epoch": 0.11183948853173843, "grad_norm": 1.9034209621631004, "learning_rate": 9.995748342755124e-06, "loss": 0.5353, "step": 3796 }, { "epoch": 0.111868950989202, "grad_norm": 2.3081202380444967, "learning_rate": 9.995727114066131e-06, "loss": 0.5888, "step": 3797 }, { "epoch": 0.11189841344666558, "grad_norm": 2.0368661800532486, "learning_rate": 9.99570583253386e-06, "loss": 0.4648, "step": 3798 }, { "epoch": 0.11192787590412916, "grad_norm": 1.6007327672219895, "learning_rate": 9.995684498158533e-06, "loss": 0.4347, "step": 3799 }, { "epoch": 0.11195733836159275, "grad_norm": 1.9106236494449003, "learning_rate": 9.99566311094038e-06, "loss": 0.6722, "step": 3800 }, { "epoch": 0.11198680081905632, "grad_norm": 1.6868066156053618, "learning_rate": 9.995641670879624e-06, "loss": 0.5609, "step": 3801 }, { "epoch": 0.1120162632765199, "grad_norm": 1.624845781560438, "learning_rate": 9.995620177976492e-06, "loss": 0.4577, "step": 3802 }, { "epoch": 0.11204572573398347, "grad_norm": 1.567734990244406, "learning_rate": 9.995598632231213e-06, "loss": 0.5784, "step": 3803 }, { "epoch": 0.11207518819144704, "grad_norm": 1.779768162259165, "learning_rate": 9.995577033644015e-06, "loss": 0.6325, "step": 3804 }, { "epoch": 0.11210465064891062, "grad_norm": 1.5566353348294517, "learning_rate": 9.995555382215123e-06, "loss": 0.5418, "step": 3805 }, { "epoch": 0.11213411310637421, "grad_norm": 1.6447247457296592, "learning_rate": 9.995533677944772e-06, "loss": 0.5926, "step": 3806 }, { "epoch": 0.11216357556383778, "grad_norm": 1.5433216916646582, "learning_rate": 9.995511920833186e-06, "loss": 0.5063, "step": 3807 }, { "epoch": 0.11219303802130136, "grad_norm": 1.6830294578825458, "learning_rate": 9.9954901108806e-06, "loss": 0.5456, "step": 3808 }, { "epoch": 0.11222250047876493, "grad_norm": 1.8466108514955162, "learning_rate": 9.99546824808724e-06, "loss": 0.5242, "step": 3809 }, { "epoch": 0.11225196293622851, "grad_norm": 1.7003802978554858, "learning_rate": 9.995446332453339e-06, "loss": 0.4981, "step": 3810 }, { "epoch": 0.11228142539369208, "grad_norm": 2.027693603664979, "learning_rate": 9.995424363979129e-06, "loss": 0.6632, "step": 3811 }, { "epoch": 0.11231088785115566, "grad_norm": 2.140026118231997, "learning_rate": 9.995402342664842e-06, "loss": 0.7406, "step": 3812 }, { "epoch": 0.11234035030861925, "grad_norm": 1.8070055048141986, "learning_rate": 9.995380268510713e-06, "loss": 0.5264, "step": 3813 }, { "epoch": 0.11236981276608282, "grad_norm": 2.128154935179715, "learning_rate": 9.995358141516974e-06, "loss": 0.5088, "step": 3814 }, { "epoch": 0.1123992752235464, "grad_norm": 1.8825814656876434, "learning_rate": 9.995335961683856e-06, "loss": 0.5798, "step": 3815 }, { "epoch": 0.11242873768100997, "grad_norm": 1.5392403788070104, "learning_rate": 9.9953137290116e-06, "loss": 0.3284, "step": 3816 }, { "epoch": 0.11245820013847355, "grad_norm": 1.8983724106280897, "learning_rate": 9.995291443500437e-06, "loss": 0.6255, "step": 3817 }, { "epoch": 0.11248766259593712, "grad_norm": 1.9203210567427786, "learning_rate": 9.995269105150601e-06, "loss": 0.6521, "step": 3818 }, { "epoch": 0.11251712505340071, "grad_norm": 1.7736385937132368, "learning_rate": 9.995246713962334e-06, "loss": 0.4017, "step": 3819 }, { "epoch": 0.11254658751086428, "grad_norm": 1.6436721556305336, "learning_rate": 9.995224269935868e-06, "loss": 0.5854, "step": 3820 }, { "epoch": 0.11257604996832786, "grad_norm": 1.8472444540707382, "learning_rate": 9.995201773071443e-06, "loss": 0.6748, "step": 3821 }, { "epoch": 0.11260551242579143, "grad_norm": 1.9742451623443904, "learning_rate": 9.995179223369296e-06, "loss": 0.7275, "step": 3822 }, { "epoch": 0.11263497488325501, "grad_norm": 1.7341740323091535, "learning_rate": 9.995156620829665e-06, "loss": 0.4998, "step": 3823 }, { "epoch": 0.11266443734071858, "grad_norm": 1.921529813059086, "learning_rate": 9.995133965452789e-06, "loss": 0.5267, "step": 3824 }, { "epoch": 0.11269389979818217, "grad_norm": 1.7257339021392661, "learning_rate": 9.995111257238909e-06, "loss": 0.7078, "step": 3825 }, { "epoch": 0.11272336225564575, "grad_norm": 1.748902645502187, "learning_rate": 9.995088496188264e-06, "loss": 0.4492, "step": 3826 }, { "epoch": 0.11275282471310932, "grad_norm": 1.727530650102361, "learning_rate": 9.995065682301095e-06, "loss": 0.4229, "step": 3827 }, { "epoch": 0.1127822871705729, "grad_norm": 1.7274303971059086, "learning_rate": 9.995042815577645e-06, "loss": 0.5636, "step": 3828 }, { "epoch": 0.11281174962803647, "grad_norm": 1.976579252294594, "learning_rate": 9.995019896018152e-06, "loss": 0.5113, "step": 3829 }, { "epoch": 0.11284121208550005, "grad_norm": 1.8333038253604836, "learning_rate": 9.994996923622861e-06, "loss": 0.6938, "step": 3830 }, { "epoch": 0.11287067454296362, "grad_norm": 2.1160927497183257, "learning_rate": 9.994973898392017e-06, "loss": 0.651, "step": 3831 }, { "epoch": 0.11290013700042721, "grad_norm": 2.008841464686776, "learning_rate": 9.994950820325861e-06, "loss": 0.7957, "step": 3832 }, { "epoch": 0.11292959945789079, "grad_norm": 1.681168200800567, "learning_rate": 9.994927689424636e-06, "loss": 0.6341, "step": 3833 }, { "epoch": 0.11295906191535436, "grad_norm": 1.8312216703622142, "learning_rate": 9.994904505688589e-06, "loss": 0.4887, "step": 3834 }, { "epoch": 0.11298852437281794, "grad_norm": 1.603051966839525, "learning_rate": 9.994881269117964e-06, "loss": 0.5464, "step": 3835 }, { "epoch": 0.11301798683028151, "grad_norm": 1.600777168229518, "learning_rate": 9.994857979713007e-06, "loss": 0.4776, "step": 3836 }, { "epoch": 0.11304744928774509, "grad_norm": 1.999166301583631, "learning_rate": 9.994834637473966e-06, "loss": 0.6676, "step": 3837 }, { "epoch": 0.11307691174520867, "grad_norm": 1.6510752815741523, "learning_rate": 9.994811242401087e-06, "loss": 0.3706, "step": 3838 }, { "epoch": 0.11310637420267225, "grad_norm": 1.9053573758585869, "learning_rate": 9.994787794494613e-06, "loss": 0.5918, "step": 3839 }, { "epoch": 0.11313583666013582, "grad_norm": 1.5809853775392066, "learning_rate": 9.994764293754798e-06, "loss": 0.554, "step": 3840 }, { "epoch": 0.1131652991175994, "grad_norm": 1.8484282994341998, "learning_rate": 9.99474074018189e-06, "loss": 0.4639, "step": 3841 }, { "epoch": 0.11319476157506297, "grad_norm": 1.9000656619671257, "learning_rate": 9.994717133776136e-06, "loss": 0.522, "step": 3842 }, { "epoch": 0.11322422403252655, "grad_norm": 1.654235744295977, "learning_rate": 9.994693474537785e-06, "loss": 0.4842, "step": 3843 }, { "epoch": 0.11325368648999012, "grad_norm": 1.7512395023407445, "learning_rate": 9.994669762467088e-06, "loss": 0.6319, "step": 3844 }, { "epoch": 0.11328314894745371, "grad_norm": 1.8752447787082434, "learning_rate": 9.994645997564299e-06, "loss": 0.6466, "step": 3845 }, { "epoch": 0.11331261140491729, "grad_norm": 1.7005718219865074, "learning_rate": 9.994622179829665e-06, "loss": 0.4698, "step": 3846 }, { "epoch": 0.11334207386238086, "grad_norm": 1.9635145063932005, "learning_rate": 9.99459830926344e-06, "loss": 0.5605, "step": 3847 }, { "epoch": 0.11337153631984444, "grad_norm": 1.7949429584356646, "learning_rate": 9.994574385865876e-06, "loss": 0.7028, "step": 3848 }, { "epoch": 0.11340099877730801, "grad_norm": 1.7087406728328873, "learning_rate": 9.994550409637226e-06, "loss": 0.6217, "step": 3849 }, { "epoch": 0.11343046123477159, "grad_norm": 1.7541053636208284, "learning_rate": 9.994526380577746e-06, "loss": 0.5354, "step": 3850 }, { "epoch": 0.11345992369223518, "grad_norm": 1.5845946814805143, "learning_rate": 9.994502298687688e-06, "loss": 0.5784, "step": 3851 }, { "epoch": 0.11348938614969875, "grad_norm": 2.0307854656994433, "learning_rate": 9.994478163967305e-06, "loss": 0.7671, "step": 3852 }, { "epoch": 0.11351884860716233, "grad_norm": 1.7176840391197639, "learning_rate": 9.994453976416854e-06, "loss": 0.5092, "step": 3853 }, { "epoch": 0.1135483110646259, "grad_norm": 1.7868278612031825, "learning_rate": 9.994429736036593e-06, "loss": 0.5422, "step": 3854 }, { "epoch": 0.11357777352208948, "grad_norm": 1.8417651188887605, "learning_rate": 9.994405442826774e-06, "loss": 0.4603, "step": 3855 }, { "epoch": 0.11360723597955305, "grad_norm": 1.7811252660356782, "learning_rate": 9.994381096787658e-06, "loss": 0.6988, "step": 3856 }, { "epoch": 0.11363669843701663, "grad_norm": 1.8614649401304182, "learning_rate": 9.994356697919499e-06, "loss": 0.4545, "step": 3857 }, { "epoch": 0.11366616089448021, "grad_norm": 1.6476601420249264, "learning_rate": 9.994332246222558e-06, "loss": 0.4706, "step": 3858 }, { "epoch": 0.11369562335194379, "grad_norm": 1.6102265294125127, "learning_rate": 9.994307741697093e-06, "loss": 0.4814, "step": 3859 }, { "epoch": 0.11372508580940736, "grad_norm": 1.7594910185995896, "learning_rate": 9.994283184343364e-06, "loss": 0.4979, "step": 3860 }, { "epoch": 0.11375454826687094, "grad_norm": 1.6758416536139749, "learning_rate": 9.994258574161626e-06, "loss": 0.5376, "step": 3861 }, { "epoch": 0.11378401072433451, "grad_norm": 1.8679711560574344, "learning_rate": 9.994233911152144e-06, "loss": 0.6255, "step": 3862 }, { "epoch": 0.11381347318179809, "grad_norm": 1.7992149596411704, "learning_rate": 9.99420919531518e-06, "loss": 0.5315, "step": 3863 }, { "epoch": 0.11384293563926168, "grad_norm": 1.8576363518970362, "learning_rate": 9.994184426650993e-06, "loss": 0.4863, "step": 3864 }, { "epoch": 0.11387239809672525, "grad_norm": 1.9267889999253756, "learning_rate": 9.994159605159843e-06, "loss": 0.5227, "step": 3865 }, { "epoch": 0.11390186055418883, "grad_norm": 1.7431341842089174, "learning_rate": 9.994134730841995e-06, "loss": 0.5739, "step": 3866 }, { "epoch": 0.1139313230116524, "grad_norm": 1.7183938138262456, "learning_rate": 9.994109803697713e-06, "loss": 0.575, "step": 3867 }, { "epoch": 0.11396078546911598, "grad_norm": 1.6959350511186713, "learning_rate": 9.994084823727259e-06, "loss": 0.5821, "step": 3868 }, { "epoch": 0.11399024792657955, "grad_norm": 1.7720230501863112, "learning_rate": 9.994059790930897e-06, "loss": 0.6021, "step": 3869 }, { "epoch": 0.11401971038404313, "grad_norm": 1.7101455434249413, "learning_rate": 9.994034705308893e-06, "loss": 0.3807, "step": 3870 }, { "epoch": 0.11404917284150672, "grad_norm": 1.7311046217086954, "learning_rate": 9.994009566861514e-06, "loss": 0.5999, "step": 3871 }, { "epoch": 0.11407863529897029, "grad_norm": 1.790379389859887, "learning_rate": 9.993984375589021e-06, "loss": 0.5166, "step": 3872 }, { "epoch": 0.11410809775643387, "grad_norm": 1.9683674186752396, "learning_rate": 9.993959131491684e-06, "loss": 0.4783, "step": 3873 }, { "epoch": 0.11413756021389744, "grad_norm": 1.7578900124286074, "learning_rate": 9.993933834569768e-06, "loss": 0.6679, "step": 3874 }, { "epoch": 0.11416702267136102, "grad_norm": 2.1181882058544916, "learning_rate": 9.993908484823544e-06, "loss": 0.6642, "step": 3875 }, { "epoch": 0.11419648512882459, "grad_norm": 1.7288569600622976, "learning_rate": 9.993883082253277e-06, "loss": 0.6046, "step": 3876 }, { "epoch": 0.11422594758628818, "grad_norm": 1.742184606500008, "learning_rate": 9.993857626859237e-06, "loss": 0.6049, "step": 3877 }, { "epoch": 0.11425541004375175, "grad_norm": 1.7722261337460232, "learning_rate": 9.993832118641692e-06, "loss": 0.6167, "step": 3878 }, { "epoch": 0.11428487250121533, "grad_norm": 1.921036528800878, "learning_rate": 9.993806557600914e-06, "loss": 0.6653, "step": 3879 }, { "epoch": 0.1143143349586789, "grad_norm": 1.7742374697516994, "learning_rate": 9.993780943737172e-06, "loss": 0.5807, "step": 3880 }, { "epoch": 0.11434379741614248, "grad_norm": 1.993813048508197, "learning_rate": 9.993755277050736e-06, "loss": 0.7058, "step": 3881 }, { "epoch": 0.11437325987360605, "grad_norm": 1.5664699695750628, "learning_rate": 9.99372955754188e-06, "loss": 0.5033, "step": 3882 }, { "epoch": 0.11440272233106963, "grad_norm": 2.042216186752327, "learning_rate": 9.993703785210874e-06, "loss": 0.5087, "step": 3883 }, { "epoch": 0.11443218478853322, "grad_norm": 1.9028841376396075, "learning_rate": 9.99367796005799e-06, "loss": 0.463, "step": 3884 }, { "epoch": 0.11446164724599679, "grad_norm": 1.8591330127422807, "learning_rate": 9.993652082083504e-06, "loss": 0.4909, "step": 3885 }, { "epoch": 0.11449110970346037, "grad_norm": 1.7996904282203525, "learning_rate": 9.99362615128769e-06, "loss": 0.5724, "step": 3886 }, { "epoch": 0.11452057216092394, "grad_norm": 1.4948580875675195, "learning_rate": 9.993600167670818e-06, "loss": 0.3665, "step": 3887 }, { "epoch": 0.11455003461838752, "grad_norm": 1.9092352196443714, "learning_rate": 9.993574131233165e-06, "loss": 0.5871, "step": 3888 }, { "epoch": 0.11457949707585109, "grad_norm": 1.6324699092195958, "learning_rate": 9.993548041975007e-06, "loss": 0.5007, "step": 3889 }, { "epoch": 0.11460895953331468, "grad_norm": 1.7758385087906499, "learning_rate": 9.99352189989662e-06, "loss": 0.5516, "step": 3890 }, { "epoch": 0.11463842199077826, "grad_norm": 1.5330856685785217, "learning_rate": 9.99349570499828e-06, "loss": 0.4139, "step": 3891 }, { "epoch": 0.11466788444824183, "grad_norm": 1.773318131091451, "learning_rate": 9.993469457280264e-06, "loss": 0.6003, "step": 3892 }, { "epoch": 0.1146973469057054, "grad_norm": 1.6754134217822807, "learning_rate": 9.99344315674285e-06, "loss": 0.4081, "step": 3893 }, { "epoch": 0.11472680936316898, "grad_norm": 1.8016006711193024, "learning_rate": 9.993416803386317e-06, "loss": 0.5266, "step": 3894 }, { "epoch": 0.11475627182063255, "grad_norm": 1.6988736904279358, "learning_rate": 9.993390397210941e-06, "loss": 0.5097, "step": 3895 }, { "epoch": 0.11478573427809613, "grad_norm": 1.8991733926758059, "learning_rate": 9.993363938217005e-06, "loss": 0.4986, "step": 3896 }, { "epoch": 0.11481519673555972, "grad_norm": 1.9156322935896841, "learning_rate": 9.993337426404786e-06, "loss": 0.5779, "step": 3897 }, { "epoch": 0.1148446591930233, "grad_norm": 1.809491298753648, "learning_rate": 9.993310861774565e-06, "loss": 0.619, "step": 3898 }, { "epoch": 0.11487412165048687, "grad_norm": 1.724877550803556, "learning_rate": 9.993284244326626e-06, "loss": 0.4913, "step": 3899 }, { "epoch": 0.11490358410795044, "grad_norm": 1.7888990090368944, "learning_rate": 9.993257574061246e-06, "loss": 0.476, "step": 3900 }, { "epoch": 0.11493304656541402, "grad_norm": 2.0055944441032016, "learning_rate": 9.993230850978708e-06, "loss": 0.6524, "step": 3901 }, { "epoch": 0.1149625090228776, "grad_norm": 2.0225616812522214, "learning_rate": 9.993204075079298e-06, "loss": 0.6165, "step": 3902 }, { "epoch": 0.11499197148034118, "grad_norm": 1.6557883949537608, "learning_rate": 9.993177246363295e-06, "loss": 0.4435, "step": 3903 }, { "epoch": 0.11502143393780476, "grad_norm": 1.6248444093509014, "learning_rate": 9.993150364830986e-06, "loss": 0.544, "step": 3904 }, { "epoch": 0.11505089639526833, "grad_norm": 1.5185274169467944, "learning_rate": 9.993123430482654e-06, "loss": 0.4009, "step": 3905 }, { "epoch": 0.1150803588527319, "grad_norm": 1.6921819211960056, "learning_rate": 9.993096443318586e-06, "loss": 0.5528, "step": 3906 }, { "epoch": 0.11510982131019548, "grad_norm": 1.8519080335887834, "learning_rate": 9.993069403339062e-06, "loss": 0.5208, "step": 3907 }, { "epoch": 0.11513928376765906, "grad_norm": 1.7628401273448198, "learning_rate": 9.993042310544375e-06, "loss": 0.5673, "step": 3908 }, { "epoch": 0.11516874622512263, "grad_norm": 2.034004287515713, "learning_rate": 9.993015164934804e-06, "loss": 0.5543, "step": 3909 }, { "epoch": 0.11519820868258622, "grad_norm": 1.775098708367284, "learning_rate": 9.992987966510644e-06, "loss": 0.521, "step": 3910 }, { "epoch": 0.1152276711400498, "grad_norm": 1.9681687097748302, "learning_rate": 9.992960715272178e-06, "loss": 0.501, "step": 3911 }, { "epoch": 0.11525713359751337, "grad_norm": 1.8651780820742763, "learning_rate": 9.992933411219694e-06, "loss": 0.6628, "step": 3912 }, { "epoch": 0.11528659605497694, "grad_norm": 1.9028633440636096, "learning_rate": 9.992906054353484e-06, "loss": 0.5283, "step": 3913 }, { "epoch": 0.11531605851244052, "grad_norm": 1.6580130999907337, "learning_rate": 9.992878644673834e-06, "loss": 0.4665, "step": 3914 }, { "epoch": 0.1153455209699041, "grad_norm": 1.8644898803008456, "learning_rate": 9.992851182181035e-06, "loss": 0.5993, "step": 3915 }, { "epoch": 0.11537498342736768, "grad_norm": 1.854425601213909, "learning_rate": 9.992823666875378e-06, "loss": 0.566, "step": 3916 }, { "epoch": 0.11540444588483126, "grad_norm": 1.7549252210519057, "learning_rate": 9.992796098757155e-06, "loss": 0.5192, "step": 3917 }, { "epoch": 0.11543390834229483, "grad_norm": 1.6951353513415681, "learning_rate": 9.992768477826655e-06, "loss": 0.515, "step": 3918 }, { "epoch": 0.11546337079975841, "grad_norm": 1.6084490482281404, "learning_rate": 9.992740804084173e-06, "loss": 0.4715, "step": 3919 }, { "epoch": 0.11549283325722198, "grad_norm": 1.7870728479470894, "learning_rate": 9.99271307753e-06, "loss": 0.535, "step": 3920 }, { "epoch": 0.11552229571468556, "grad_norm": 1.811363468831628, "learning_rate": 9.992685298164428e-06, "loss": 0.458, "step": 3921 }, { "epoch": 0.11555175817214913, "grad_norm": 1.8935528402350412, "learning_rate": 9.992657465987753e-06, "loss": 0.4476, "step": 3922 }, { "epoch": 0.11558122062961272, "grad_norm": 1.6841423187932731, "learning_rate": 9.99262958100027e-06, "loss": 0.4791, "step": 3923 }, { "epoch": 0.1156106830870763, "grad_norm": 1.7482739491408827, "learning_rate": 9.992601643202274e-06, "loss": 0.5877, "step": 3924 }, { "epoch": 0.11564014554453987, "grad_norm": 1.754701686673697, "learning_rate": 9.992573652594057e-06, "loss": 0.6466, "step": 3925 }, { "epoch": 0.11566960800200345, "grad_norm": 1.9228908888357052, "learning_rate": 9.992545609175919e-06, "loss": 0.6697, "step": 3926 }, { "epoch": 0.11569907045946702, "grad_norm": 1.6220738176043277, "learning_rate": 9.992517512948155e-06, "loss": 0.5624, "step": 3927 }, { "epoch": 0.1157285329169306, "grad_norm": 1.6717191888861176, "learning_rate": 9.992489363911061e-06, "loss": 0.4201, "step": 3928 }, { "epoch": 0.11575799537439418, "grad_norm": 1.6953629387694336, "learning_rate": 9.992461162064938e-06, "loss": 0.4897, "step": 3929 }, { "epoch": 0.11578745783185776, "grad_norm": 1.6048390144343772, "learning_rate": 9.99243290741008e-06, "loss": 0.5326, "step": 3930 }, { "epoch": 0.11581692028932133, "grad_norm": 1.6381733518463972, "learning_rate": 9.99240459994679e-06, "loss": 0.5208, "step": 3931 }, { "epoch": 0.11584638274678491, "grad_norm": 1.6695786263241081, "learning_rate": 9.992376239675362e-06, "loss": 0.4261, "step": 3932 }, { "epoch": 0.11587584520424848, "grad_norm": 1.6841613890031548, "learning_rate": 9.992347826596105e-06, "loss": 0.4609, "step": 3933 }, { "epoch": 0.11590530766171206, "grad_norm": 1.7729909156876777, "learning_rate": 9.99231936070931e-06, "loss": 0.6183, "step": 3934 }, { "epoch": 0.11593477011917563, "grad_norm": 1.782446357785654, "learning_rate": 9.992290842015283e-06, "loss": 0.5819, "step": 3935 }, { "epoch": 0.11596423257663922, "grad_norm": 1.5281449171152388, "learning_rate": 9.992262270514327e-06, "loss": 0.4224, "step": 3936 }, { "epoch": 0.1159936950341028, "grad_norm": 1.5920149142265843, "learning_rate": 9.992233646206742e-06, "loss": 0.4744, "step": 3937 }, { "epoch": 0.11602315749156637, "grad_norm": 1.9162146626913756, "learning_rate": 9.99220496909283e-06, "loss": 0.5704, "step": 3938 }, { "epoch": 0.11605261994902995, "grad_norm": 1.8741619784433436, "learning_rate": 9.992176239172895e-06, "loss": 0.5687, "step": 3939 }, { "epoch": 0.11608208240649352, "grad_norm": 1.69441420151471, "learning_rate": 9.992147456447242e-06, "loss": 0.4665, "step": 3940 }, { "epoch": 0.1161115448639571, "grad_norm": 1.7635812772552362, "learning_rate": 9.992118620916173e-06, "loss": 0.6034, "step": 3941 }, { "epoch": 0.11614100732142069, "grad_norm": 1.6558423052971403, "learning_rate": 9.992089732579994e-06, "loss": 0.5629, "step": 3942 }, { "epoch": 0.11617046977888426, "grad_norm": 1.4699217246031435, "learning_rate": 9.992060791439015e-06, "loss": 0.4751, "step": 3943 }, { "epoch": 0.11619993223634784, "grad_norm": 1.7621387229314356, "learning_rate": 9.992031797493536e-06, "loss": 0.5989, "step": 3944 }, { "epoch": 0.11622939469381141, "grad_norm": 1.5344910092135156, "learning_rate": 9.992002750743865e-06, "loss": 0.465, "step": 3945 }, { "epoch": 0.11625885715127499, "grad_norm": 1.712919149719561, "learning_rate": 9.991973651190312e-06, "loss": 0.6665, "step": 3946 }, { "epoch": 0.11628831960873856, "grad_norm": 1.7943008198750254, "learning_rate": 9.991944498833182e-06, "loss": 0.455, "step": 3947 }, { "epoch": 0.11631778206620214, "grad_norm": 1.7678021447525492, "learning_rate": 9.991915293672785e-06, "loss": 0.5928, "step": 3948 }, { "epoch": 0.11634724452366572, "grad_norm": 1.5415846695910316, "learning_rate": 9.991886035709428e-06, "loss": 0.5214, "step": 3949 }, { "epoch": 0.1163767069811293, "grad_norm": 1.7616275857256072, "learning_rate": 9.991856724943424e-06, "loss": 0.5854, "step": 3950 }, { "epoch": 0.11640616943859287, "grad_norm": 1.7045223778004994, "learning_rate": 9.991827361375079e-06, "loss": 0.5994, "step": 3951 }, { "epoch": 0.11643563189605645, "grad_norm": 1.7054897364783945, "learning_rate": 9.991797945004708e-06, "loss": 0.5665, "step": 3952 }, { "epoch": 0.11646509435352002, "grad_norm": 1.80836327829808, "learning_rate": 9.991768475832617e-06, "loss": 0.5937, "step": 3953 }, { "epoch": 0.1164945568109836, "grad_norm": 1.756131513929423, "learning_rate": 9.991738953859123e-06, "loss": 0.5568, "step": 3954 }, { "epoch": 0.11652401926844719, "grad_norm": 1.6873324031832964, "learning_rate": 9.991709379084533e-06, "loss": 0.6034, "step": 3955 }, { "epoch": 0.11655348172591076, "grad_norm": 1.5379661365431714, "learning_rate": 9.991679751509163e-06, "loss": 0.4529, "step": 3956 }, { "epoch": 0.11658294418337434, "grad_norm": 1.7533474201412083, "learning_rate": 9.991650071133328e-06, "loss": 0.6367, "step": 3957 }, { "epoch": 0.11661240664083791, "grad_norm": 1.8367900699746758, "learning_rate": 9.991620337957338e-06, "loss": 0.5712, "step": 3958 }, { "epoch": 0.11664186909830149, "grad_norm": 2.03289395706271, "learning_rate": 9.991590551981507e-06, "loss": 0.6083, "step": 3959 }, { "epoch": 0.11667133155576506, "grad_norm": 1.8506527406047457, "learning_rate": 9.991560713206156e-06, "loss": 0.6835, "step": 3960 }, { "epoch": 0.11670079401322864, "grad_norm": 1.6872239613296252, "learning_rate": 9.991530821631596e-06, "loss": 0.5013, "step": 3961 }, { "epoch": 0.11673025647069223, "grad_norm": 1.8479897819232207, "learning_rate": 9.991500877258145e-06, "loss": 0.6453, "step": 3962 }, { "epoch": 0.1167597189281558, "grad_norm": 2.1641162015839415, "learning_rate": 9.991470880086116e-06, "loss": 0.6308, "step": 3963 }, { "epoch": 0.11678918138561938, "grad_norm": 1.7501183839138477, "learning_rate": 9.991440830115832e-06, "loss": 0.497, "step": 3964 }, { "epoch": 0.11681864384308295, "grad_norm": 1.8117969516090384, "learning_rate": 9.991410727347608e-06, "loss": 0.624, "step": 3965 }, { "epoch": 0.11684810630054653, "grad_norm": 1.645601272828284, "learning_rate": 9.991380571781762e-06, "loss": 0.4744, "step": 3966 }, { "epoch": 0.1168775687580101, "grad_norm": 1.6819731839061312, "learning_rate": 9.991350363418612e-06, "loss": 0.499, "step": 3967 }, { "epoch": 0.11690703121547369, "grad_norm": 1.7779340263026893, "learning_rate": 9.991320102258481e-06, "loss": 0.5129, "step": 3968 }, { "epoch": 0.11693649367293726, "grad_norm": 1.8252486464015316, "learning_rate": 9.991289788301685e-06, "loss": 0.5962, "step": 3969 }, { "epoch": 0.11696595613040084, "grad_norm": 1.6778176811776682, "learning_rate": 9.99125942154855e-06, "loss": 0.4987, "step": 3970 }, { "epoch": 0.11699541858786441, "grad_norm": 1.7684985474012205, "learning_rate": 9.991229001999392e-06, "loss": 0.5496, "step": 3971 }, { "epoch": 0.11702488104532799, "grad_norm": 1.6934281492637342, "learning_rate": 9.991198529654535e-06, "loss": 0.4845, "step": 3972 }, { "epoch": 0.11705434350279156, "grad_norm": 2.0084448321431254, "learning_rate": 9.9911680045143e-06, "loss": 0.5205, "step": 3973 }, { "epoch": 0.11708380596025514, "grad_norm": 1.8311021091132835, "learning_rate": 9.991137426579012e-06, "loss": 0.5308, "step": 3974 }, { "epoch": 0.11711326841771873, "grad_norm": 1.831795968271944, "learning_rate": 9.991106795848992e-06, "loss": 0.5551, "step": 3975 }, { "epoch": 0.1171427308751823, "grad_norm": 1.550339194171478, "learning_rate": 9.991076112324567e-06, "loss": 0.4678, "step": 3976 }, { "epoch": 0.11717219333264588, "grad_norm": 1.6572943784906498, "learning_rate": 9.991045376006061e-06, "loss": 0.5042, "step": 3977 }, { "epoch": 0.11720165579010945, "grad_norm": 1.6873053353768628, "learning_rate": 9.991014586893798e-06, "loss": 0.4974, "step": 3978 }, { "epoch": 0.11723111824757303, "grad_norm": 2.0056350703642685, "learning_rate": 9.990983744988101e-06, "loss": 0.6123, "step": 3979 }, { "epoch": 0.1172605807050366, "grad_norm": 1.5140279742358815, "learning_rate": 9.990952850289301e-06, "loss": 0.3937, "step": 3980 }, { "epoch": 0.11729004316250019, "grad_norm": 1.7400658173981116, "learning_rate": 9.99092190279772e-06, "loss": 0.6203, "step": 3981 }, { "epoch": 0.11731950561996377, "grad_norm": 1.9268274229154918, "learning_rate": 9.990890902513692e-06, "loss": 0.5447, "step": 3982 }, { "epoch": 0.11734896807742734, "grad_norm": 1.8514262665109298, "learning_rate": 9.990859849437539e-06, "loss": 0.593, "step": 3983 }, { "epoch": 0.11737843053489092, "grad_norm": 2.2783898048157507, "learning_rate": 9.990828743569593e-06, "loss": 0.6292, "step": 3984 }, { "epoch": 0.11740789299235449, "grad_norm": 1.9880915876281902, "learning_rate": 9.99079758491018e-06, "loss": 0.5813, "step": 3985 }, { "epoch": 0.11743735544981806, "grad_norm": 1.796422240975368, "learning_rate": 9.990766373459633e-06, "loss": 0.5162, "step": 3986 }, { "epoch": 0.11746681790728164, "grad_norm": 1.7689755015047623, "learning_rate": 9.990735109218278e-06, "loss": 0.4995, "step": 3987 }, { "epoch": 0.11749628036474523, "grad_norm": 1.9553696249565844, "learning_rate": 9.99070379218645e-06, "loss": 0.5723, "step": 3988 }, { "epoch": 0.1175257428222088, "grad_norm": 1.733623906360646, "learning_rate": 9.990672422364476e-06, "loss": 0.5312, "step": 3989 }, { "epoch": 0.11755520527967238, "grad_norm": 1.692345620842327, "learning_rate": 9.990640999752693e-06, "loss": 0.4636, "step": 3990 }, { "epoch": 0.11758466773713595, "grad_norm": 1.701003268593849, "learning_rate": 9.990609524351426e-06, "loss": 0.5972, "step": 3991 }, { "epoch": 0.11761413019459953, "grad_norm": 1.9758801876870717, "learning_rate": 9.990577996161017e-06, "loss": 0.597, "step": 3992 }, { "epoch": 0.1176435926520631, "grad_norm": 1.6262901297916164, "learning_rate": 9.990546415181793e-06, "loss": 0.5909, "step": 3993 }, { "epoch": 0.11767305510952669, "grad_norm": 1.8449778738394758, "learning_rate": 9.99051478141409e-06, "loss": 0.5038, "step": 3994 }, { "epoch": 0.11770251756699027, "grad_norm": 1.9559815072622295, "learning_rate": 9.990483094858243e-06, "loss": 0.6757, "step": 3995 }, { "epoch": 0.11773198002445384, "grad_norm": 1.7760028017710308, "learning_rate": 9.990451355514586e-06, "loss": 0.5158, "step": 3996 }, { "epoch": 0.11776144248191742, "grad_norm": 1.8512427712850406, "learning_rate": 9.990419563383455e-06, "loss": 0.5492, "step": 3997 }, { "epoch": 0.11779090493938099, "grad_norm": 1.7633652015070975, "learning_rate": 9.990387718465187e-06, "loss": 0.5412, "step": 3998 }, { "epoch": 0.11782036739684457, "grad_norm": 1.8495509674407828, "learning_rate": 9.99035582076012e-06, "loss": 0.5128, "step": 3999 }, { "epoch": 0.11784982985430814, "grad_norm": 1.7052709066724834, "learning_rate": 9.990323870268588e-06, "loss": 0.5437, "step": 4000 }, { "epoch": 0.11787929231177173, "grad_norm": 1.6280903988366644, "learning_rate": 9.990291866990933e-06, "loss": 0.5381, "step": 4001 }, { "epoch": 0.1179087547692353, "grad_norm": 1.859813127571119, "learning_rate": 9.99025981092749e-06, "loss": 0.5698, "step": 4002 }, { "epoch": 0.11793821722669888, "grad_norm": 1.9945844774112491, "learning_rate": 9.9902277020786e-06, "loss": 0.6672, "step": 4003 }, { "epoch": 0.11796767968416245, "grad_norm": 1.7112576729387086, "learning_rate": 9.990195540444603e-06, "loss": 0.4702, "step": 4004 }, { "epoch": 0.11799714214162603, "grad_norm": 1.5475125010861717, "learning_rate": 9.990163326025836e-06, "loss": 0.3856, "step": 4005 }, { "epoch": 0.1180266045990896, "grad_norm": 1.570081769702699, "learning_rate": 9.990131058822644e-06, "loss": 0.4263, "step": 4006 }, { "epoch": 0.1180560670565532, "grad_norm": 2.0819350037266333, "learning_rate": 9.990098738835366e-06, "loss": 0.7132, "step": 4007 }, { "epoch": 0.11808552951401677, "grad_norm": 1.9653311891519247, "learning_rate": 9.990066366064344e-06, "loss": 0.6588, "step": 4008 }, { "epoch": 0.11811499197148034, "grad_norm": 1.6186288729268383, "learning_rate": 9.990033940509922e-06, "loss": 0.511, "step": 4009 }, { "epoch": 0.11814445442894392, "grad_norm": 1.7822775665138695, "learning_rate": 9.99000146217244e-06, "loss": 0.4807, "step": 4010 }, { "epoch": 0.11817391688640749, "grad_norm": 1.6927696633033529, "learning_rate": 9.989968931052246e-06, "loss": 0.6267, "step": 4011 }, { "epoch": 0.11820337934387107, "grad_norm": 1.9041971160583677, "learning_rate": 9.98993634714968e-06, "loss": 0.492, "step": 4012 }, { "epoch": 0.11823284180133464, "grad_norm": 1.7221067960452396, "learning_rate": 9.989903710465087e-06, "loss": 0.5905, "step": 4013 }, { "epoch": 0.11826230425879823, "grad_norm": 1.683220458412424, "learning_rate": 9.989871020998814e-06, "loss": 0.5403, "step": 4014 }, { "epoch": 0.1182917667162618, "grad_norm": 1.617431592199721, "learning_rate": 9.989838278751205e-06, "loss": 0.4797, "step": 4015 }, { "epoch": 0.11832122917372538, "grad_norm": 1.660126870416332, "learning_rate": 9.98980548372261e-06, "loss": 0.4909, "step": 4016 }, { "epoch": 0.11835069163118896, "grad_norm": 1.548556179083741, "learning_rate": 9.98977263591337e-06, "loss": 0.5355, "step": 4017 }, { "epoch": 0.11838015408865253, "grad_norm": 1.5709192180476788, "learning_rate": 9.989739735323839e-06, "loss": 0.4163, "step": 4018 }, { "epoch": 0.1184096165461161, "grad_norm": 1.5852804559824567, "learning_rate": 9.98970678195436e-06, "loss": 0.4693, "step": 4019 }, { "epoch": 0.1184390790035797, "grad_norm": 1.8001293592181915, "learning_rate": 9.989673775805282e-06, "loss": 0.6341, "step": 4020 }, { "epoch": 0.11846854146104327, "grad_norm": 1.8296175219894013, "learning_rate": 9.989640716876958e-06, "loss": 0.6269, "step": 4021 }, { "epoch": 0.11849800391850684, "grad_norm": 1.5726983238962813, "learning_rate": 9.989607605169734e-06, "loss": 0.4961, "step": 4022 }, { "epoch": 0.11852746637597042, "grad_norm": 1.5913609224946215, "learning_rate": 9.989574440683962e-06, "loss": 0.5559, "step": 4023 }, { "epoch": 0.118556928833434, "grad_norm": 1.675529975514, "learning_rate": 9.989541223419992e-06, "loss": 0.4588, "step": 4024 }, { "epoch": 0.11858639129089757, "grad_norm": 1.6504529386433837, "learning_rate": 9.989507953378175e-06, "loss": 0.5826, "step": 4025 }, { "epoch": 0.11861585374836114, "grad_norm": 1.6067566798955528, "learning_rate": 9.989474630558863e-06, "loss": 0.4603, "step": 4026 }, { "epoch": 0.11864531620582473, "grad_norm": 1.895650077527263, "learning_rate": 9.98944125496241e-06, "loss": 0.5513, "step": 4027 }, { "epoch": 0.11867477866328831, "grad_norm": 1.6147602003012724, "learning_rate": 9.989407826589169e-06, "loss": 0.4214, "step": 4028 }, { "epoch": 0.11870424112075188, "grad_norm": 1.8338457263089083, "learning_rate": 9.989374345439489e-06, "loss": 0.6259, "step": 4029 }, { "epoch": 0.11873370357821546, "grad_norm": 1.6983005728637741, "learning_rate": 9.98934081151373e-06, "loss": 0.5247, "step": 4030 }, { "epoch": 0.11876316603567903, "grad_norm": 1.8753726566235676, "learning_rate": 9.989307224812245e-06, "loss": 0.6001, "step": 4031 }, { "epoch": 0.11879262849314261, "grad_norm": 1.7810697883097513, "learning_rate": 9.989273585335388e-06, "loss": 0.5465, "step": 4032 }, { "epoch": 0.1188220909506062, "grad_norm": 1.8421201837302437, "learning_rate": 9.989239893083516e-06, "loss": 0.6206, "step": 4033 }, { "epoch": 0.11885155340806977, "grad_norm": 1.645537523222164, "learning_rate": 9.989206148056984e-06, "loss": 0.5456, "step": 4034 }, { "epoch": 0.11888101586553335, "grad_norm": 1.6422047090111382, "learning_rate": 9.989172350256151e-06, "loss": 0.5189, "step": 4035 }, { "epoch": 0.11891047832299692, "grad_norm": 1.7543885078356931, "learning_rate": 9.989138499681373e-06, "loss": 0.6583, "step": 4036 }, { "epoch": 0.1189399407804605, "grad_norm": 1.5699618193559781, "learning_rate": 9.989104596333008e-06, "loss": 0.4975, "step": 4037 }, { "epoch": 0.11896940323792407, "grad_norm": 1.6374091085574503, "learning_rate": 9.989070640211415e-06, "loss": 0.5119, "step": 4038 }, { "epoch": 0.11899886569538765, "grad_norm": 1.5636232216116897, "learning_rate": 9.989036631316955e-06, "loss": 0.4808, "step": 4039 }, { "epoch": 0.11902832815285123, "grad_norm": 2.007310452660753, "learning_rate": 9.989002569649983e-06, "loss": 0.615, "step": 4040 }, { "epoch": 0.11905779061031481, "grad_norm": 1.7318398633429075, "learning_rate": 9.988968455210864e-06, "loss": 0.6253, "step": 4041 }, { "epoch": 0.11908725306777838, "grad_norm": 1.8809966545550696, "learning_rate": 9.988934287999957e-06, "loss": 0.5496, "step": 4042 }, { "epoch": 0.11911671552524196, "grad_norm": 1.8948692047978268, "learning_rate": 9.988900068017623e-06, "loss": 0.66, "step": 4043 }, { "epoch": 0.11914617798270553, "grad_norm": 1.7810804290128979, "learning_rate": 9.988865795264226e-06, "loss": 0.5531, "step": 4044 }, { "epoch": 0.11917564044016911, "grad_norm": 1.7961300136628717, "learning_rate": 9.988831469740125e-06, "loss": 0.6634, "step": 4045 }, { "epoch": 0.1192051028976327, "grad_norm": 1.6770073079792789, "learning_rate": 9.988797091445687e-06, "loss": 0.6533, "step": 4046 }, { "epoch": 0.11923456535509627, "grad_norm": 1.7936209003604182, "learning_rate": 9.988762660381273e-06, "loss": 0.5017, "step": 4047 }, { "epoch": 0.11926402781255985, "grad_norm": 1.5933144342005907, "learning_rate": 9.988728176547246e-06, "loss": 0.5699, "step": 4048 }, { "epoch": 0.11929349027002342, "grad_norm": 1.7120271004591239, "learning_rate": 9.988693639943977e-06, "loss": 0.6043, "step": 4049 }, { "epoch": 0.119322952727487, "grad_norm": 1.8684983034123226, "learning_rate": 9.988659050571824e-06, "loss": 0.57, "step": 4050 }, { "epoch": 0.11935241518495057, "grad_norm": 1.873129961088638, "learning_rate": 9.988624408431156e-06, "loss": 0.6726, "step": 4051 }, { "epoch": 0.11938187764241415, "grad_norm": 1.8771078966722705, "learning_rate": 9.98858971352234e-06, "loss": 0.5002, "step": 4052 }, { "epoch": 0.11941134009987774, "grad_norm": 1.646933942895407, "learning_rate": 9.98855496584574e-06, "loss": 0.4628, "step": 4053 }, { "epoch": 0.11944080255734131, "grad_norm": 1.769119776062018, "learning_rate": 9.988520165401729e-06, "loss": 0.6524, "step": 4054 }, { "epoch": 0.11947026501480489, "grad_norm": 1.8488772535048559, "learning_rate": 9.988485312190672e-06, "loss": 0.5681, "step": 4055 }, { "epoch": 0.11949972747226846, "grad_norm": 1.943197148609157, "learning_rate": 9.988450406212937e-06, "loss": 0.6222, "step": 4056 }, { "epoch": 0.11952918992973204, "grad_norm": 1.8333573420267328, "learning_rate": 9.988415447468893e-06, "loss": 0.529, "step": 4057 }, { "epoch": 0.11955865238719561, "grad_norm": 1.8321380898975403, "learning_rate": 9.988380435958911e-06, "loss": 0.6006, "step": 4058 }, { "epoch": 0.1195881148446592, "grad_norm": 1.70258850730348, "learning_rate": 9.988345371683362e-06, "loss": 0.4777, "step": 4059 }, { "epoch": 0.11961757730212277, "grad_norm": 1.818066866846776, "learning_rate": 9.988310254642615e-06, "loss": 0.658, "step": 4060 }, { "epoch": 0.11964703975958635, "grad_norm": 1.6082832369254094, "learning_rate": 9.988275084837042e-06, "loss": 0.5183, "step": 4061 }, { "epoch": 0.11967650221704992, "grad_norm": 1.6104059759811322, "learning_rate": 9.988239862267015e-06, "loss": 0.4904, "step": 4062 }, { "epoch": 0.1197059646745135, "grad_norm": 1.7448922363633481, "learning_rate": 9.988204586932908e-06, "loss": 0.5923, "step": 4063 }, { "epoch": 0.11973542713197707, "grad_norm": 1.7193290226743099, "learning_rate": 9.988169258835094e-06, "loss": 0.5465, "step": 4064 }, { "epoch": 0.11976488958944065, "grad_norm": 1.7717642097615174, "learning_rate": 9.988133877973945e-06, "loss": 0.6054, "step": 4065 }, { "epoch": 0.11979435204690424, "grad_norm": 1.8475550795174807, "learning_rate": 9.988098444349835e-06, "loss": 0.6269, "step": 4066 }, { "epoch": 0.11982381450436781, "grad_norm": 1.8987502784127999, "learning_rate": 9.988062957963141e-06, "loss": 0.5475, "step": 4067 }, { "epoch": 0.11985327696183139, "grad_norm": 1.7670266785873971, "learning_rate": 9.988027418814235e-06, "loss": 0.5592, "step": 4068 }, { "epoch": 0.11988273941929496, "grad_norm": 1.6897590521086736, "learning_rate": 9.987991826903498e-06, "loss": 0.532, "step": 4069 }, { "epoch": 0.11991220187675854, "grad_norm": 1.6731356286482424, "learning_rate": 9.987956182231301e-06, "loss": 0.5276, "step": 4070 }, { "epoch": 0.11994166433422211, "grad_norm": 1.8109928465180456, "learning_rate": 9.987920484798026e-06, "loss": 0.5955, "step": 4071 }, { "epoch": 0.1199711267916857, "grad_norm": 1.9522604968421817, "learning_rate": 9.987884734604048e-06, "loss": 0.4932, "step": 4072 }, { "epoch": 0.12000058924914928, "grad_norm": 1.7439311131645738, "learning_rate": 9.987848931649744e-06, "loss": 0.444, "step": 4073 }, { "epoch": 0.12003005170661285, "grad_norm": 1.7651481611301267, "learning_rate": 9.987813075935496e-06, "loss": 0.5619, "step": 4074 }, { "epoch": 0.12005951416407643, "grad_norm": 1.7628695952196267, "learning_rate": 9.987777167461678e-06, "loss": 0.4039, "step": 4075 }, { "epoch": 0.12008897662154, "grad_norm": 1.6338993491223965, "learning_rate": 9.987741206228677e-06, "loss": 0.4553, "step": 4076 }, { "epoch": 0.12011843907900357, "grad_norm": 1.7988466124016775, "learning_rate": 9.987705192236868e-06, "loss": 0.5814, "step": 4077 }, { "epoch": 0.12014790153646715, "grad_norm": 1.8593715024097133, "learning_rate": 9.987669125486631e-06, "loss": 0.5552, "step": 4078 }, { "epoch": 0.12017736399393074, "grad_norm": 1.9493623866357443, "learning_rate": 9.987633005978354e-06, "loss": 0.5908, "step": 4079 }, { "epoch": 0.12020682645139431, "grad_norm": 1.7731403062611577, "learning_rate": 9.987596833712414e-06, "loss": 0.5358, "step": 4080 }, { "epoch": 0.12023628890885789, "grad_norm": 1.5892818945153881, "learning_rate": 9.987560608689194e-06, "loss": 0.4859, "step": 4081 }, { "epoch": 0.12026575136632146, "grad_norm": 1.6023394929272337, "learning_rate": 9.987524330909077e-06, "loss": 0.5168, "step": 4082 }, { "epoch": 0.12029521382378504, "grad_norm": 1.7879832406342802, "learning_rate": 9.98748800037245e-06, "loss": 0.5203, "step": 4083 }, { "epoch": 0.12032467628124861, "grad_norm": 1.6434244712126278, "learning_rate": 9.987451617079694e-06, "loss": 0.5104, "step": 4084 }, { "epoch": 0.1203541387387122, "grad_norm": 1.6841016231983457, "learning_rate": 9.987415181031193e-06, "loss": 0.5334, "step": 4085 }, { "epoch": 0.12038360119617578, "grad_norm": 1.8629449035147085, "learning_rate": 9.987378692227336e-06, "loss": 0.7269, "step": 4086 }, { "epoch": 0.12041306365363935, "grad_norm": 1.6793105175709497, "learning_rate": 9.987342150668508e-06, "loss": 0.5408, "step": 4087 }, { "epoch": 0.12044252611110293, "grad_norm": 1.6360898287874763, "learning_rate": 9.987305556355094e-06, "loss": 0.4468, "step": 4088 }, { "epoch": 0.1204719885685665, "grad_norm": 1.9814698573668912, "learning_rate": 9.98726890928748e-06, "loss": 0.5271, "step": 4089 }, { "epoch": 0.12050145102603008, "grad_norm": 1.6521703152070026, "learning_rate": 9.987232209466056e-06, "loss": 0.3975, "step": 4090 }, { "epoch": 0.12053091348349365, "grad_norm": 1.60615221270988, "learning_rate": 9.987195456891211e-06, "loss": 0.5227, "step": 4091 }, { "epoch": 0.12056037594095724, "grad_norm": 1.5921027945205564, "learning_rate": 9.987158651563332e-06, "loss": 0.4533, "step": 4092 }, { "epoch": 0.12058983839842081, "grad_norm": 1.6664751812767786, "learning_rate": 9.987121793482808e-06, "loss": 0.4561, "step": 4093 }, { "epoch": 0.12061930085588439, "grad_norm": 1.8744560624718076, "learning_rate": 9.98708488265003e-06, "loss": 0.4681, "step": 4094 }, { "epoch": 0.12064876331334796, "grad_norm": 1.8278616569829782, "learning_rate": 9.987047919065388e-06, "loss": 0.5318, "step": 4095 }, { "epoch": 0.12067822577081154, "grad_norm": 1.9381238313941205, "learning_rate": 9.987010902729274e-06, "loss": 0.4323, "step": 4096 }, { "epoch": 0.12070768822827511, "grad_norm": 1.83192018743171, "learning_rate": 9.986973833642075e-06, "loss": 0.4536, "step": 4097 }, { "epoch": 0.1207371506857387, "grad_norm": 1.6918493788450535, "learning_rate": 9.986936711804189e-06, "loss": 0.4735, "step": 4098 }, { "epoch": 0.12076661314320228, "grad_norm": 1.842505393894067, "learning_rate": 9.986899537216006e-06, "loss": 0.6891, "step": 4099 }, { "epoch": 0.12079607560066585, "grad_norm": 1.7849531089548698, "learning_rate": 9.98686230987792e-06, "loss": 0.6282, "step": 4100 }, { "epoch": 0.12082553805812943, "grad_norm": 1.8145859204150632, "learning_rate": 9.986825029790324e-06, "loss": 0.5701, "step": 4101 }, { "epoch": 0.120855000515593, "grad_norm": 1.6890122705276263, "learning_rate": 9.986787696953612e-06, "loss": 0.4702, "step": 4102 }, { "epoch": 0.12088446297305658, "grad_norm": 1.7458314296926043, "learning_rate": 9.98675031136818e-06, "loss": 0.6251, "step": 4103 }, { "epoch": 0.12091392543052017, "grad_norm": 1.763043734678041, "learning_rate": 9.986712873034423e-06, "loss": 0.472, "step": 4104 }, { "epoch": 0.12094338788798374, "grad_norm": 1.8461633120114842, "learning_rate": 9.986675381952737e-06, "loss": 0.4841, "step": 4105 }, { "epoch": 0.12097285034544732, "grad_norm": 1.6632383770903612, "learning_rate": 9.986637838123519e-06, "loss": 0.6122, "step": 4106 }, { "epoch": 0.12100231280291089, "grad_norm": 2.009321018139206, "learning_rate": 9.986600241547166e-06, "loss": 0.6077, "step": 4107 }, { "epoch": 0.12103177526037447, "grad_norm": 1.8094121861368881, "learning_rate": 9.986562592224074e-06, "loss": 0.6079, "step": 4108 }, { "epoch": 0.12106123771783804, "grad_norm": 1.6896799756321748, "learning_rate": 9.986524890154643e-06, "loss": 0.5329, "step": 4109 }, { "epoch": 0.12109070017530162, "grad_norm": 1.846514639266833, "learning_rate": 9.986487135339271e-06, "loss": 0.5902, "step": 4110 }, { "epoch": 0.1211201626327652, "grad_norm": 1.8282103956754883, "learning_rate": 9.986449327778358e-06, "loss": 0.5175, "step": 4111 }, { "epoch": 0.12114962509022878, "grad_norm": 1.659028138012949, "learning_rate": 9.986411467472305e-06, "loss": 0.4601, "step": 4112 }, { "epoch": 0.12117908754769235, "grad_norm": 1.855658501714073, "learning_rate": 9.98637355442151e-06, "loss": 0.5085, "step": 4113 }, { "epoch": 0.12120855000515593, "grad_norm": 1.7232011474518725, "learning_rate": 9.986335588626375e-06, "loss": 0.5426, "step": 4114 }, { "epoch": 0.1212380124626195, "grad_norm": 1.7998581283757842, "learning_rate": 9.986297570087302e-06, "loss": 0.5231, "step": 4115 }, { "epoch": 0.12126747492008308, "grad_norm": 1.799773494581496, "learning_rate": 9.986259498804693e-06, "loss": 0.6858, "step": 4116 }, { "epoch": 0.12129693737754667, "grad_norm": 1.6285648481169088, "learning_rate": 9.98622137477895e-06, "loss": 0.4624, "step": 4117 }, { "epoch": 0.12132639983501024, "grad_norm": 1.6927677122621156, "learning_rate": 9.98618319801048e-06, "loss": 0.546, "step": 4118 }, { "epoch": 0.12135586229247382, "grad_norm": 1.7569282870299556, "learning_rate": 9.98614496849968e-06, "loss": 0.5567, "step": 4119 }, { "epoch": 0.12138532474993739, "grad_norm": 1.7882357597763576, "learning_rate": 9.986106686246958e-06, "loss": 0.5945, "step": 4120 }, { "epoch": 0.12141478720740097, "grad_norm": 2.0054037070528965, "learning_rate": 9.986068351252722e-06, "loss": 0.6893, "step": 4121 }, { "epoch": 0.12144424966486454, "grad_norm": 1.8440860485866366, "learning_rate": 9.986029963517374e-06, "loss": 0.6278, "step": 4122 }, { "epoch": 0.12147371212232812, "grad_norm": 1.816813561478601, "learning_rate": 9.985991523041318e-06, "loss": 0.7321, "step": 4123 }, { "epoch": 0.1215031745797917, "grad_norm": 1.9011722032355853, "learning_rate": 9.985953029824966e-06, "loss": 0.4307, "step": 4124 }, { "epoch": 0.12153263703725528, "grad_norm": 1.8065060334458707, "learning_rate": 9.98591448386872e-06, "loss": 0.7036, "step": 4125 }, { "epoch": 0.12156209949471886, "grad_norm": 1.8820982448653405, "learning_rate": 9.985875885172991e-06, "loss": 0.6159, "step": 4126 }, { "epoch": 0.12159156195218243, "grad_norm": 1.8911470491819304, "learning_rate": 9.985837233738188e-06, "loss": 0.5857, "step": 4127 }, { "epoch": 0.121621024409646, "grad_norm": 1.6773770732341675, "learning_rate": 9.985798529564717e-06, "loss": 0.6193, "step": 4128 }, { "epoch": 0.12165048686710958, "grad_norm": 1.765737248504652, "learning_rate": 9.985759772652988e-06, "loss": 0.5308, "step": 4129 }, { "epoch": 0.12167994932457317, "grad_norm": 1.8143962964894034, "learning_rate": 9.985720963003411e-06, "loss": 0.6612, "step": 4130 }, { "epoch": 0.12170941178203674, "grad_norm": 1.9594097328012323, "learning_rate": 9.985682100616398e-06, "loss": 0.5831, "step": 4131 }, { "epoch": 0.12173887423950032, "grad_norm": 1.8636847231992446, "learning_rate": 9.985643185492359e-06, "loss": 0.6136, "step": 4132 }, { "epoch": 0.1217683366969639, "grad_norm": 1.5439096581302987, "learning_rate": 9.985604217631708e-06, "loss": 0.5155, "step": 4133 }, { "epoch": 0.12179779915442747, "grad_norm": 1.7895974319145451, "learning_rate": 9.985565197034852e-06, "loss": 0.5536, "step": 4134 }, { "epoch": 0.12182726161189104, "grad_norm": 1.9348885573792087, "learning_rate": 9.98552612370221e-06, "loss": 0.5926, "step": 4135 }, { "epoch": 0.12185672406935462, "grad_norm": 1.71407738830347, "learning_rate": 9.985486997634188e-06, "loss": 0.4378, "step": 4136 }, { "epoch": 0.12188618652681821, "grad_norm": 1.95067903165332, "learning_rate": 9.985447818831207e-06, "loss": 0.7751, "step": 4137 }, { "epoch": 0.12191564898428178, "grad_norm": 1.9216692340337467, "learning_rate": 9.985408587293677e-06, "loss": 0.5904, "step": 4138 }, { "epoch": 0.12194511144174536, "grad_norm": 1.7744896916112245, "learning_rate": 9.985369303022015e-06, "loss": 0.5122, "step": 4139 }, { "epoch": 0.12197457389920893, "grad_norm": 1.4978114394419313, "learning_rate": 9.985329966016634e-06, "loss": 0.4132, "step": 4140 }, { "epoch": 0.12200403635667251, "grad_norm": 1.838965238755262, "learning_rate": 9.985290576277954e-06, "loss": 0.5911, "step": 4141 }, { "epoch": 0.12203349881413608, "grad_norm": 1.5683863459662164, "learning_rate": 9.985251133806388e-06, "loss": 0.4295, "step": 4142 }, { "epoch": 0.12206296127159967, "grad_norm": 1.8194529156126185, "learning_rate": 9.985211638602356e-06, "loss": 0.4926, "step": 4143 }, { "epoch": 0.12209242372906325, "grad_norm": 2.1193737487504674, "learning_rate": 9.985172090666275e-06, "loss": 0.6819, "step": 4144 }, { "epoch": 0.12212188618652682, "grad_norm": 1.6553512867474722, "learning_rate": 9.985132489998561e-06, "loss": 0.4903, "step": 4145 }, { "epoch": 0.1221513486439904, "grad_norm": 1.708551503222352, "learning_rate": 9.985092836599638e-06, "loss": 0.5125, "step": 4146 }, { "epoch": 0.12218081110145397, "grad_norm": 1.6709354083408992, "learning_rate": 9.98505313046992e-06, "loss": 0.4162, "step": 4147 }, { "epoch": 0.12221027355891755, "grad_norm": 1.79805737843308, "learning_rate": 9.985013371609828e-06, "loss": 0.6283, "step": 4148 }, { "epoch": 0.12223973601638112, "grad_norm": 1.6209176369675387, "learning_rate": 9.984973560019787e-06, "loss": 0.5601, "step": 4149 }, { "epoch": 0.12226919847384471, "grad_norm": 1.8917592576420645, "learning_rate": 9.984933695700211e-06, "loss": 0.545, "step": 4150 }, { "epoch": 0.12229866093130828, "grad_norm": 1.6646549980666743, "learning_rate": 9.98489377865153e-06, "loss": 0.5458, "step": 4151 }, { "epoch": 0.12232812338877186, "grad_norm": 2.0179631929640633, "learning_rate": 9.984853808874158e-06, "loss": 0.4871, "step": 4152 }, { "epoch": 0.12235758584623543, "grad_norm": 1.5940796987371078, "learning_rate": 9.984813786368524e-06, "loss": 0.5772, "step": 4153 }, { "epoch": 0.12238704830369901, "grad_norm": 1.5548892598343953, "learning_rate": 9.984773711135048e-06, "loss": 0.3903, "step": 4154 }, { "epoch": 0.12241651076116258, "grad_norm": 1.51862032481815, "learning_rate": 9.984733583174154e-06, "loss": 0.4312, "step": 4155 }, { "epoch": 0.12244597321862617, "grad_norm": 1.6804666811025286, "learning_rate": 9.984693402486269e-06, "loss": 0.458, "step": 4156 }, { "epoch": 0.12247543567608975, "grad_norm": 1.7561576170093764, "learning_rate": 9.984653169071816e-06, "loss": 0.5506, "step": 4157 }, { "epoch": 0.12250489813355332, "grad_norm": 1.9504430451750385, "learning_rate": 9.984612882931219e-06, "loss": 0.6297, "step": 4158 }, { "epoch": 0.1225343605910169, "grad_norm": 1.8189267333161476, "learning_rate": 9.984572544064908e-06, "loss": 0.4962, "step": 4159 }, { "epoch": 0.12256382304848047, "grad_norm": 1.805999058982339, "learning_rate": 9.984532152473308e-06, "loss": 0.6192, "step": 4160 }, { "epoch": 0.12259328550594405, "grad_norm": 1.865932402846973, "learning_rate": 9.984491708156845e-06, "loss": 0.553, "step": 4161 }, { "epoch": 0.12262274796340762, "grad_norm": 1.5754351649997684, "learning_rate": 9.984451211115947e-06, "loss": 0.3916, "step": 4162 }, { "epoch": 0.12265221042087121, "grad_norm": 1.7742327621111975, "learning_rate": 9.984410661351044e-06, "loss": 0.4126, "step": 4163 }, { "epoch": 0.12268167287833479, "grad_norm": 1.732238229607611, "learning_rate": 9.984370058862564e-06, "loss": 0.5957, "step": 4164 }, { "epoch": 0.12271113533579836, "grad_norm": 1.7094939427056635, "learning_rate": 9.984329403650937e-06, "loss": 0.5994, "step": 4165 }, { "epoch": 0.12274059779326194, "grad_norm": 1.6149982078073173, "learning_rate": 9.984288695716594e-06, "loss": 0.4024, "step": 4166 }, { "epoch": 0.12277006025072551, "grad_norm": 1.6160933332783411, "learning_rate": 9.98424793505996e-06, "loss": 0.5033, "step": 4167 }, { "epoch": 0.12279952270818908, "grad_norm": 1.8760015263713594, "learning_rate": 9.984207121681474e-06, "loss": 0.6031, "step": 4168 }, { "epoch": 0.12282898516565267, "grad_norm": 1.677793170443996, "learning_rate": 9.984166255581564e-06, "loss": 0.4378, "step": 4169 }, { "epoch": 0.12285844762311625, "grad_norm": 1.648154408553719, "learning_rate": 9.98412533676066e-06, "loss": 0.6244, "step": 4170 }, { "epoch": 0.12288791008057982, "grad_norm": 1.6700370503844328, "learning_rate": 9.984084365219198e-06, "loss": 0.5127, "step": 4171 }, { "epoch": 0.1229173725380434, "grad_norm": 1.6684220469193018, "learning_rate": 9.984043340957613e-06, "loss": 0.6015, "step": 4172 }, { "epoch": 0.12294683499550697, "grad_norm": 1.766348274511248, "learning_rate": 9.984002263976334e-06, "loss": 0.6067, "step": 4173 }, { "epoch": 0.12297629745297055, "grad_norm": 1.5505802729858102, "learning_rate": 9.983961134275798e-06, "loss": 0.4577, "step": 4174 }, { "epoch": 0.12300575991043412, "grad_norm": 1.8432564703997465, "learning_rate": 9.98391995185644e-06, "loss": 0.5865, "step": 4175 }, { "epoch": 0.12303522236789771, "grad_norm": 1.764485790088046, "learning_rate": 9.983878716718696e-06, "loss": 0.6234, "step": 4176 }, { "epoch": 0.12306468482536129, "grad_norm": 1.6169061358768648, "learning_rate": 9.983837428863002e-06, "loss": 0.633, "step": 4177 }, { "epoch": 0.12309414728282486, "grad_norm": 1.5045071384794697, "learning_rate": 9.983796088289795e-06, "loss": 0.5639, "step": 4178 }, { "epoch": 0.12312360974028844, "grad_norm": 1.720735516310925, "learning_rate": 9.98375469499951e-06, "loss": 0.5266, "step": 4179 }, { "epoch": 0.12315307219775201, "grad_norm": 1.7894243468536504, "learning_rate": 9.983713248992588e-06, "loss": 0.4521, "step": 4180 }, { "epoch": 0.12318253465521559, "grad_norm": 1.5739830994543336, "learning_rate": 9.983671750269467e-06, "loss": 0.4414, "step": 4181 }, { "epoch": 0.12321199711267918, "grad_norm": 1.7276567727893464, "learning_rate": 9.983630198830584e-06, "loss": 0.4122, "step": 4182 }, { "epoch": 0.12324145957014275, "grad_norm": 1.6336872495011947, "learning_rate": 9.983588594676378e-06, "loss": 0.5185, "step": 4183 }, { "epoch": 0.12327092202760632, "grad_norm": 1.7692296702948083, "learning_rate": 9.983546937807294e-06, "loss": 0.6503, "step": 4184 }, { "epoch": 0.1233003844850699, "grad_norm": 1.9267433688984823, "learning_rate": 9.983505228223767e-06, "loss": 0.6163, "step": 4185 }, { "epoch": 0.12332984694253347, "grad_norm": 1.5850455354618986, "learning_rate": 9.98346346592624e-06, "loss": 0.4828, "step": 4186 }, { "epoch": 0.12335930939999705, "grad_norm": 1.6722243452186956, "learning_rate": 9.983421650915158e-06, "loss": 0.6012, "step": 4187 }, { "epoch": 0.12338877185746062, "grad_norm": 1.6686450557308876, "learning_rate": 9.983379783190959e-06, "loss": 0.5307, "step": 4188 }, { "epoch": 0.12341823431492421, "grad_norm": 1.8272002443891182, "learning_rate": 9.983337862754085e-06, "loss": 0.4394, "step": 4189 }, { "epoch": 0.12344769677238779, "grad_norm": 1.7589648920482428, "learning_rate": 9.983295889604985e-06, "loss": 0.5437, "step": 4190 }, { "epoch": 0.12347715922985136, "grad_norm": 1.717041350136578, "learning_rate": 9.983253863744099e-06, "loss": 0.3651, "step": 4191 }, { "epoch": 0.12350662168731494, "grad_norm": 1.8224741932490145, "learning_rate": 9.983211785171871e-06, "loss": 0.5642, "step": 4192 }, { "epoch": 0.12353608414477851, "grad_norm": 1.7526835691459193, "learning_rate": 9.983169653888749e-06, "loss": 0.6063, "step": 4193 }, { "epoch": 0.12356554660224209, "grad_norm": 1.7636554625057863, "learning_rate": 9.983127469895178e-06, "loss": 0.4877, "step": 4194 }, { "epoch": 0.12359500905970568, "grad_norm": 1.7346974571047684, "learning_rate": 9.9830852331916e-06, "loss": 0.7376, "step": 4195 }, { "epoch": 0.12362447151716925, "grad_norm": 1.742236942804492, "learning_rate": 9.983042943778468e-06, "loss": 0.5991, "step": 4196 }, { "epoch": 0.12365393397463283, "grad_norm": 1.8290665643524033, "learning_rate": 9.983000601656226e-06, "loss": 0.545, "step": 4197 }, { "epoch": 0.1236833964320964, "grad_norm": 1.452241488810025, "learning_rate": 9.982958206825321e-06, "loss": 0.3722, "step": 4198 }, { "epoch": 0.12371285888955998, "grad_norm": 1.7508167932162935, "learning_rate": 9.982915759286202e-06, "loss": 0.6163, "step": 4199 }, { "epoch": 0.12374232134702355, "grad_norm": 1.842380730514958, "learning_rate": 9.982873259039321e-06, "loss": 0.5834, "step": 4200 }, { "epoch": 0.12377178380448713, "grad_norm": 1.7134460761108299, "learning_rate": 9.982830706085124e-06, "loss": 0.616, "step": 4201 }, { "epoch": 0.12380124626195071, "grad_norm": 1.6561370243559488, "learning_rate": 9.982788100424063e-06, "loss": 0.5968, "step": 4202 }, { "epoch": 0.12383070871941429, "grad_norm": 1.5166397148178419, "learning_rate": 9.982745442056586e-06, "loss": 0.4613, "step": 4203 }, { "epoch": 0.12386017117687786, "grad_norm": 1.7434321634120484, "learning_rate": 9.982702730983149e-06, "loss": 0.5845, "step": 4204 }, { "epoch": 0.12388963363434144, "grad_norm": 1.6857944837948313, "learning_rate": 9.9826599672042e-06, "loss": 0.626, "step": 4205 }, { "epoch": 0.12391909609180501, "grad_norm": 1.772571373820681, "learning_rate": 9.982617150720192e-06, "loss": 0.5129, "step": 4206 }, { "epoch": 0.12394855854926859, "grad_norm": 1.9810189729151773, "learning_rate": 9.982574281531579e-06, "loss": 0.6586, "step": 4207 }, { "epoch": 0.12397802100673218, "grad_norm": 1.7387112491892247, "learning_rate": 9.982531359638812e-06, "loss": 0.5283, "step": 4208 }, { "epoch": 0.12400748346419575, "grad_norm": 1.705167072414877, "learning_rate": 9.982488385042349e-06, "loss": 0.6243, "step": 4209 }, { "epoch": 0.12403694592165933, "grad_norm": 1.828229089627098, "learning_rate": 9.98244535774264e-06, "loss": 0.6996, "step": 4210 }, { "epoch": 0.1240664083791229, "grad_norm": 1.8604250839783953, "learning_rate": 9.982402277740143e-06, "loss": 0.5193, "step": 4211 }, { "epoch": 0.12409587083658648, "grad_norm": 1.9055386731427684, "learning_rate": 9.982359145035316e-06, "loss": 0.6663, "step": 4212 }, { "epoch": 0.12412533329405005, "grad_norm": 1.635010931921762, "learning_rate": 9.98231595962861e-06, "loss": 0.4213, "step": 4213 }, { "epoch": 0.12415479575151363, "grad_norm": 1.9898400858967669, "learning_rate": 9.982272721520484e-06, "loss": 0.6247, "step": 4214 }, { "epoch": 0.12418425820897722, "grad_norm": 1.6838709094687396, "learning_rate": 9.982229430711397e-06, "loss": 0.5171, "step": 4215 }, { "epoch": 0.12421372066644079, "grad_norm": 1.7157894027651763, "learning_rate": 9.982186087201804e-06, "loss": 0.3514, "step": 4216 }, { "epoch": 0.12424318312390437, "grad_norm": 1.6772681421355078, "learning_rate": 9.982142690992165e-06, "loss": 0.6071, "step": 4217 }, { "epoch": 0.12427264558136794, "grad_norm": 1.8011903292589242, "learning_rate": 9.982099242082939e-06, "loss": 0.587, "step": 4218 }, { "epoch": 0.12430210803883152, "grad_norm": 1.7470390269492782, "learning_rate": 9.982055740474587e-06, "loss": 0.6542, "step": 4219 }, { "epoch": 0.12433157049629509, "grad_norm": 1.6209318841219855, "learning_rate": 9.982012186167567e-06, "loss": 0.5903, "step": 4220 }, { "epoch": 0.12436103295375868, "grad_norm": 1.66871488531197, "learning_rate": 9.98196857916234e-06, "loss": 0.585, "step": 4221 }, { "epoch": 0.12439049541122225, "grad_norm": 1.880811589139292, "learning_rate": 9.981924919459368e-06, "loss": 0.4886, "step": 4222 }, { "epoch": 0.12441995786868583, "grad_norm": 1.5809321739144242, "learning_rate": 9.981881207059111e-06, "loss": 0.4441, "step": 4223 }, { "epoch": 0.1244494203261494, "grad_norm": 1.7296727190496746, "learning_rate": 9.981837441962035e-06, "loss": 0.4959, "step": 4224 }, { "epoch": 0.12447888278361298, "grad_norm": 1.6876996945151548, "learning_rate": 9.981793624168598e-06, "loss": 0.6108, "step": 4225 }, { "epoch": 0.12450834524107655, "grad_norm": 1.736950928921161, "learning_rate": 9.98174975367927e-06, "loss": 0.5854, "step": 4226 }, { "epoch": 0.12453780769854013, "grad_norm": 1.4971792780053095, "learning_rate": 9.981705830494509e-06, "loss": 0.4743, "step": 4227 }, { "epoch": 0.12456727015600372, "grad_norm": 1.5750300075142203, "learning_rate": 9.981661854614783e-06, "loss": 0.5216, "step": 4228 }, { "epoch": 0.12459673261346729, "grad_norm": 1.6707054315342103, "learning_rate": 9.981617826040555e-06, "loss": 0.4877, "step": 4229 }, { "epoch": 0.12462619507093087, "grad_norm": 1.5998626102654676, "learning_rate": 9.981573744772293e-06, "loss": 0.6022, "step": 4230 }, { "epoch": 0.12465565752839444, "grad_norm": 1.6687653419864685, "learning_rate": 9.981529610810461e-06, "loss": 0.4592, "step": 4231 }, { "epoch": 0.12468511998585802, "grad_norm": 1.738072025210125, "learning_rate": 9.981485424155528e-06, "loss": 0.4718, "step": 4232 }, { "epoch": 0.12471458244332159, "grad_norm": 1.8209916992841035, "learning_rate": 9.98144118480796e-06, "loss": 0.5936, "step": 4233 }, { "epoch": 0.12474404490078518, "grad_norm": 1.8247009668658611, "learning_rate": 9.981396892768225e-06, "loss": 0.5975, "step": 4234 }, { "epoch": 0.12477350735824876, "grad_norm": 1.8729414364627082, "learning_rate": 9.981352548036791e-06, "loss": 0.5597, "step": 4235 }, { "epoch": 0.12480296981571233, "grad_norm": 1.7783509532385224, "learning_rate": 9.981308150614129e-06, "loss": 0.4695, "step": 4236 }, { "epoch": 0.1248324322731759, "grad_norm": 1.8967002391369987, "learning_rate": 9.981263700500707e-06, "loss": 0.4891, "step": 4237 }, { "epoch": 0.12486189473063948, "grad_norm": 1.994722194608309, "learning_rate": 9.981219197696996e-06, "loss": 0.614, "step": 4238 }, { "epoch": 0.12489135718810306, "grad_norm": 2.1751245008236677, "learning_rate": 9.981174642203468e-06, "loss": 0.68, "step": 4239 }, { "epoch": 0.12492081964556663, "grad_norm": 1.7417349829788455, "learning_rate": 9.98113003402059e-06, "loss": 0.4859, "step": 4240 }, { "epoch": 0.12495028210303022, "grad_norm": 1.7211203141190685, "learning_rate": 9.981085373148837e-06, "loss": 0.6013, "step": 4241 }, { "epoch": 0.1249797445604938, "grad_norm": 1.9601881503666119, "learning_rate": 9.981040659588684e-06, "loss": 0.7335, "step": 4242 }, { "epoch": 0.12500920701795737, "grad_norm": 1.877889014519046, "learning_rate": 9.980995893340597e-06, "loss": 0.6092, "step": 4243 }, { "epoch": 0.12503866947542094, "grad_norm": 1.8877107427842608, "learning_rate": 9.980951074405054e-06, "loss": 0.8004, "step": 4244 }, { "epoch": 0.12506813193288452, "grad_norm": 1.863460184824403, "learning_rate": 9.98090620278253e-06, "loss": 0.714, "step": 4245 }, { "epoch": 0.1250975943903481, "grad_norm": 1.5906164989226135, "learning_rate": 9.980861278473499e-06, "loss": 0.5044, "step": 4246 }, { "epoch": 0.12512705684781167, "grad_norm": 1.857588471041421, "learning_rate": 9.980816301478433e-06, "loss": 0.6424, "step": 4247 }, { "epoch": 0.12515651930527524, "grad_norm": 1.651087228797476, "learning_rate": 9.980771271797811e-06, "loss": 0.4547, "step": 4248 }, { "epoch": 0.12518598176273882, "grad_norm": 1.9152152519367718, "learning_rate": 9.980726189432109e-06, "loss": 0.5437, "step": 4249 }, { "epoch": 0.12521544422020242, "grad_norm": 1.6059042113973363, "learning_rate": 9.980681054381803e-06, "loss": 0.4205, "step": 4250 }, { "epoch": 0.125244906677666, "grad_norm": 1.67021998826722, "learning_rate": 9.98063586664737e-06, "loss": 0.4542, "step": 4251 }, { "epoch": 0.12527436913512957, "grad_norm": 1.8644992336570698, "learning_rate": 9.98059062622929e-06, "loss": 0.6726, "step": 4252 }, { "epoch": 0.12530383159259315, "grad_norm": 1.9668745262655203, "learning_rate": 9.98054533312804e-06, "loss": 0.5481, "step": 4253 }, { "epoch": 0.12533329405005672, "grad_norm": 1.9355889277743872, "learning_rate": 9.980499987344098e-06, "loss": 0.3629, "step": 4254 }, { "epoch": 0.1253627565075203, "grad_norm": 1.5592632871685832, "learning_rate": 9.980454588877945e-06, "loss": 0.5144, "step": 4255 }, { "epoch": 0.12539221896498387, "grad_norm": 1.8488678054593555, "learning_rate": 9.980409137730063e-06, "loss": 0.5925, "step": 4256 }, { "epoch": 0.12542168142244745, "grad_norm": 1.9292887620017263, "learning_rate": 9.980363633900929e-06, "loss": 0.6099, "step": 4257 }, { "epoch": 0.12545114387991102, "grad_norm": 1.5294453438087587, "learning_rate": 9.980318077391028e-06, "loss": 0.4733, "step": 4258 }, { "epoch": 0.1254806063373746, "grad_norm": 1.6044338856278313, "learning_rate": 9.98027246820084e-06, "loss": 0.4392, "step": 4259 }, { "epoch": 0.12551006879483817, "grad_norm": 1.7228158056250236, "learning_rate": 9.980226806330847e-06, "loss": 0.6354, "step": 4260 }, { "epoch": 0.12553953125230174, "grad_norm": 1.6288710940936715, "learning_rate": 9.980181091781532e-06, "loss": 0.5524, "step": 4261 }, { "epoch": 0.12556899370976532, "grad_norm": 1.55772435394615, "learning_rate": 9.98013532455338e-06, "loss": 0.514, "step": 4262 }, { "epoch": 0.12559845616722892, "grad_norm": 1.6782063562002327, "learning_rate": 9.980089504646874e-06, "loss": 0.4472, "step": 4263 }, { "epoch": 0.1256279186246925, "grad_norm": 1.7416204461258145, "learning_rate": 9.9800436320625e-06, "loss": 0.4691, "step": 4264 }, { "epoch": 0.12565738108215607, "grad_norm": 1.6048025076956272, "learning_rate": 9.97999770680074e-06, "loss": 0.4467, "step": 4265 }, { "epoch": 0.12568684353961965, "grad_norm": 1.4500011010699074, "learning_rate": 9.979951728862083e-06, "loss": 0.4063, "step": 4266 }, { "epoch": 0.12571630599708322, "grad_norm": 1.5895039474921933, "learning_rate": 9.979905698247015e-06, "loss": 0.4595, "step": 4267 }, { "epoch": 0.1257457684545468, "grad_norm": 1.5889708272425112, "learning_rate": 9.979859614956022e-06, "loss": 0.5463, "step": 4268 }, { "epoch": 0.12577523091201037, "grad_norm": 2.069507283995902, "learning_rate": 9.979813478989593e-06, "loss": 0.5598, "step": 4269 }, { "epoch": 0.12580469336947395, "grad_norm": 1.6654563733648602, "learning_rate": 9.979767290348212e-06, "loss": 0.3829, "step": 4270 }, { "epoch": 0.12583415582693752, "grad_norm": 1.5499750583422847, "learning_rate": 9.979721049032372e-06, "loss": 0.445, "step": 4271 }, { "epoch": 0.1258636182844011, "grad_norm": 1.5571559764779093, "learning_rate": 9.97967475504256e-06, "loss": 0.5408, "step": 4272 }, { "epoch": 0.12589308074186467, "grad_norm": 1.5227806611005792, "learning_rate": 9.979628408379265e-06, "loss": 0.3916, "step": 4273 }, { "epoch": 0.12592254319932825, "grad_norm": 1.6939453249514391, "learning_rate": 9.979582009042979e-06, "loss": 0.4144, "step": 4274 }, { "epoch": 0.12595200565679182, "grad_norm": 1.6402940511854345, "learning_rate": 9.979535557034191e-06, "loss": 0.5555, "step": 4275 }, { "epoch": 0.12598146811425542, "grad_norm": 1.7474073432173776, "learning_rate": 9.979489052353395e-06, "loss": 0.727, "step": 4276 }, { "epoch": 0.126010930571719, "grad_norm": 1.7347812643734495, "learning_rate": 9.979442495001082e-06, "loss": 0.4919, "step": 4277 }, { "epoch": 0.12604039302918257, "grad_norm": 2.024003888784948, "learning_rate": 9.979395884977742e-06, "loss": 0.6702, "step": 4278 }, { "epoch": 0.12606985548664615, "grad_norm": 1.9481069912396238, "learning_rate": 9.97934922228387e-06, "loss": 0.6459, "step": 4279 }, { "epoch": 0.12609931794410972, "grad_norm": 1.6324038929259705, "learning_rate": 9.97930250691996e-06, "loss": 0.5847, "step": 4280 }, { "epoch": 0.1261287804015733, "grad_norm": 1.7907142715517819, "learning_rate": 9.979255738886506e-06, "loss": 0.5699, "step": 4281 }, { "epoch": 0.12615824285903687, "grad_norm": 1.6207345142626206, "learning_rate": 9.979208918184e-06, "loss": 0.5792, "step": 4282 }, { "epoch": 0.12618770531650045, "grad_norm": 1.8630136283729322, "learning_rate": 9.979162044812942e-06, "loss": 0.4886, "step": 4283 }, { "epoch": 0.12621716777396402, "grad_norm": 1.6571396109727419, "learning_rate": 9.979115118773826e-06, "loss": 0.6034, "step": 4284 }, { "epoch": 0.1262466302314276, "grad_norm": 1.6594790621682813, "learning_rate": 9.979068140067145e-06, "loss": 0.5089, "step": 4285 }, { "epoch": 0.12627609268889117, "grad_norm": 1.7397195960218832, "learning_rate": 9.9790211086934e-06, "loss": 0.5472, "step": 4286 }, { "epoch": 0.12630555514635475, "grad_norm": 1.7857961419295851, "learning_rate": 9.978974024653088e-06, "loss": 0.5378, "step": 4287 }, { "epoch": 0.12633501760381832, "grad_norm": 1.709637151538604, "learning_rate": 9.978926887946705e-06, "loss": 0.6657, "step": 4288 }, { "epoch": 0.12636448006128193, "grad_norm": 1.7947064327230218, "learning_rate": 9.978879698574751e-06, "loss": 0.5048, "step": 4289 }, { "epoch": 0.1263939425187455, "grad_norm": 1.8597753057293624, "learning_rate": 9.978832456537725e-06, "loss": 0.6059, "step": 4290 }, { "epoch": 0.12642340497620908, "grad_norm": 1.6724764192476882, "learning_rate": 9.978785161836126e-06, "loss": 0.3977, "step": 4291 }, { "epoch": 0.12645286743367265, "grad_norm": 1.4863800019323883, "learning_rate": 9.978737814470456e-06, "loss": 0.5376, "step": 4292 }, { "epoch": 0.12648232989113622, "grad_norm": 1.758080561974698, "learning_rate": 9.978690414441214e-06, "loss": 0.6955, "step": 4293 }, { "epoch": 0.1265117923485998, "grad_norm": 1.4139415487456781, "learning_rate": 9.978642961748904e-06, "loss": 0.3315, "step": 4294 }, { "epoch": 0.12654125480606337, "grad_norm": 1.4431999642003195, "learning_rate": 9.978595456394023e-06, "loss": 0.3724, "step": 4295 }, { "epoch": 0.12657071726352695, "grad_norm": 1.6985992650013366, "learning_rate": 9.978547898377077e-06, "loss": 0.5677, "step": 4296 }, { "epoch": 0.12660017972099052, "grad_norm": 1.8889171271729404, "learning_rate": 9.978500287698572e-06, "loss": 0.548, "step": 4297 }, { "epoch": 0.1266296421784541, "grad_norm": 1.702150302360508, "learning_rate": 9.978452624359003e-06, "loss": 0.469, "step": 4298 }, { "epoch": 0.12665910463591767, "grad_norm": 1.9626642627135726, "learning_rate": 9.978404908358883e-06, "loss": 0.7997, "step": 4299 }, { "epoch": 0.12668856709338125, "grad_norm": 1.7279239385700615, "learning_rate": 9.978357139698713e-06, "loss": 0.6064, "step": 4300 }, { "epoch": 0.12671802955084482, "grad_norm": 1.6533723295843505, "learning_rate": 9.978309318378997e-06, "loss": 0.5286, "step": 4301 }, { "epoch": 0.12674749200830843, "grad_norm": 1.7008677967575612, "learning_rate": 9.978261444400243e-06, "loss": 0.5971, "step": 4302 }, { "epoch": 0.126776954465772, "grad_norm": 2.0845255172950745, "learning_rate": 9.978213517762956e-06, "loss": 0.6581, "step": 4303 }, { "epoch": 0.12680641692323558, "grad_norm": 1.647537303086881, "learning_rate": 9.978165538467643e-06, "loss": 0.6346, "step": 4304 }, { "epoch": 0.12683587938069915, "grad_norm": 2.035075265400199, "learning_rate": 9.978117506514815e-06, "loss": 0.5885, "step": 4305 }, { "epoch": 0.12686534183816273, "grad_norm": 1.5548411037563217, "learning_rate": 9.978069421904975e-06, "loss": 0.5515, "step": 4306 }, { "epoch": 0.1268948042956263, "grad_norm": 1.7995367804803362, "learning_rate": 9.978021284638633e-06, "loss": 0.6784, "step": 4307 }, { "epoch": 0.12692426675308988, "grad_norm": 1.8065866531729766, "learning_rate": 9.9779730947163e-06, "loss": 0.5907, "step": 4308 }, { "epoch": 0.12695372921055345, "grad_norm": 1.6490601317140083, "learning_rate": 9.977924852138485e-06, "loss": 0.5125, "step": 4309 }, { "epoch": 0.12698319166801703, "grad_norm": 1.6384495459019865, "learning_rate": 9.977876556905697e-06, "loss": 0.5369, "step": 4310 }, { "epoch": 0.1270126541254806, "grad_norm": 1.70245257693715, "learning_rate": 9.977828209018449e-06, "loss": 0.584, "step": 4311 }, { "epoch": 0.12704211658294418, "grad_norm": 1.7298834800206235, "learning_rate": 9.97777980847725e-06, "loss": 0.6812, "step": 4312 }, { "epoch": 0.12707157904040775, "grad_norm": 1.8410632198928643, "learning_rate": 9.977731355282614e-06, "loss": 0.6272, "step": 4313 }, { "epoch": 0.12710104149787133, "grad_norm": 1.8350542470277176, "learning_rate": 9.977682849435053e-06, "loss": 0.4715, "step": 4314 }, { "epoch": 0.12713050395533493, "grad_norm": 1.8856873643858423, "learning_rate": 9.977634290935079e-06, "loss": 0.6426, "step": 4315 }, { "epoch": 0.1271599664127985, "grad_norm": 1.9499892989989676, "learning_rate": 9.977585679783207e-06, "loss": 0.5029, "step": 4316 }, { "epoch": 0.12718942887026208, "grad_norm": 1.6942518704651635, "learning_rate": 9.97753701597995e-06, "loss": 0.4274, "step": 4317 }, { "epoch": 0.12721889132772565, "grad_norm": 1.6688489770803383, "learning_rate": 9.977488299525823e-06, "loss": 0.4641, "step": 4318 }, { "epoch": 0.12724835378518923, "grad_norm": 1.758661020330968, "learning_rate": 9.977439530421344e-06, "loss": 0.6335, "step": 4319 }, { "epoch": 0.1272778162426528, "grad_norm": 1.9187879253518552, "learning_rate": 9.977390708667025e-06, "loss": 0.5947, "step": 4320 }, { "epoch": 0.12730727870011638, "grad_norm": 1.7436403622283265, "learning_rate": 9.977341834263384e-06, "loss": 0.5165, "step": 4321 }, { "epoch": 0.12733674115757995, "grad_norm": 1.8277969678920385, "learning_rate": 9.977292907210938e-06, "loss": 0.5991, "step": 4322 }, { "epoch": 0.12736620361504353, "grad_norm": 1.8476456132013652, "learning_rate": 9.977243927510206e-06, "loss": 0.6955, "step": 4323 }, { "epoch": 0.1273956660725071, "grad_norm": 1.6636383206607046, "learning_rate": 9.977194895161703e-06, "loss": 0.5994, "step": 4324 }, { "epoch": 0.12742512852997068, "grad_norm": 1.7338478382732623, "learning_rate": 9.97714581016595e-06, "loss": 0.5917, "step": 4325 }, { "epoch": 0.12745459098743425, "grad_norm": 1.611933115576724, "learning_rate": 9.977096672523464e-06, "loss": 0.558, "step": 4326 }, { "epoch": 0.12748405344489783, "grad_norm": 1.8738368284309643, "learning_rate": 9.977047482234767e-06, "loss": 0.6401, "step": 4327 }, { "epoch": 0.12751351590236143, "grad_norm": 1.682281968011274, "learning_rate": 9.97699823930038e-06, "loss": 0.5636, "step": 4328 }, { "epoch": 0.127542978359825, "grad_norm": 1.5963536584724547, "learning_rate": 9.97694894372082e-06, "loss": 0.4792, "step": 4329 }, { "epoch": 0.12757244081728858, "grad_norm": 1.7554206511925472, "learning_rate": 9.976899595496612e-06, "loss": 0.538, "step": 4330 }, { "epoch": 0.12760190327475215, "grad_norm": 1.9410190648006096, "learning_rate": 9.976850194628277e-06, "loss": 0.6349, "step": 4331 }, { "epoch": 0.12763136573221573, "grad_norm": 1.8951107427560807, "learning_rate": 9.976800741116337e-06, "loss": 0.6664, "step": 4332 }, { "epoch": 0.1276608281896793, "grad_norm": 1.7424830992620262, "learning_rate": 9.976751234961315e-06, "loss": 0.5037, "step": 4333 }, { "epoch": 0.12769029064714288, "grad_norm": 2.062534997027782, "learning_rate": 9.976701676163735e-06, "loss": 0.4975, "step": 4334 }, { "epoch": 0.12771975310460645, "grad_norm": 1.7044754434875764, "learning_rate": 9.976652064724125e-06, "loss": 0.5937, "step": 4335 }, { "epoch": 0.12774921556207003, "grad_norm": 1.8463962294583782, "learning_rate": 9.976602400643004e-06, "loss": 0.5927, "step": 4336 }, { "epoch": 0.1277786780195336, "grad_norm": 1.7475581254991135, "learning_rate": 9.976552683920898e-06, "loss": 0.4887, "step": 4337 }, { "epoch": 0.12780814047699718, "grad_norm": 1.6340333266385343, "learning_rate": 9.976502914558335e-06, "loss": 0.4544, "step": 4338 }, { "epoch": 0.12783760293446075, "grad_norm": 1.8211130764677559, "learning_rate": 9.97645309255584e-06, "loss": 0.5737, "step": 4339 }, { "epoch": 0.12786706539192433, "grad_norm": 1.6284673169672716, "learning_rate": 9.976403217913942e-06, "loss": 0.4644, "step": 4340 }, { "epoch": 0.12789652784938793, "grad_norm": 1.6708268083696212, "learning_rate": 9.976353290633167e-06, "loss": 0.3974, "step": 4341 }, { "epoch": 0.1279259903068515, "grad_norm": 1.576365951984445, "learning_rate": 9.976303310714045e-06, "loss": 0.5165, "step": 4342 }, { "epoch": 0.12795545276431508, "grad_norm": 1.593219391803689, "learning_rate": 9.976253278157102e-06, "loss": 0.541, "step": 4343 }, { "epoch": 0.12798491522177866, "grad_norm": 1.5694313384603735, "learning_rate": 9.976203192962866e-06, "loss": 0.5029, "step": 4344 }, { "epoch": 0.12801437767924223, "grad_norm": 1.6004650770620337, "learning_rate": 9.976153055131871e-06, "loss": 0.4836, "step": 4345 }, { "epoch": 0.1280438401367058, "grad_norm": 1.8541097149556327, "learning_rate": 9.976102864664648e-06, "loss": 0.6469, "step": 4346 }, { "epoch": 0.12807330259416938, "grad_norm": 1.7561647119877266, "learning_rate": 9.976052621561723e-06, "loss": 0.674, "step": 4347 }, { "epoch": 0.12810276505163296, "grad_norm": 1.7405286340130863, "learning_rate": 9.97600232582363e-06, "loss": 0.5491, "step": 4348 }, { "epoch": 0.12813222750909653, "grad_norm": 1.9716847135345992, "learning_rate": 9.9759519774509e-06, "loss": 0.4741, "step": 4349 }, { "epoch": 0.1281616899665601, "grad_norm": 1.8253911622726289, "learning_rate": 9.975901576444067e-06, "loss": 0.5639, "step": 4350 }, { "epoch": 0.12819115242402368, "grad_norm": 2.031956784867657, "learning_rate": 9.975851122803667e-06, "loss": 0.5511, "step": 4351 }, { "epoch": 0.12822061488148725, "grad_norm": 1.679391694542525, "learning_rate": 9.975800616530226e-06, "loss": 0.5286, "step": 4352 }, { "epoch": 0.12825007733895083, "grad_norm": 1.571663967589132, "learning_rate": 9.975750057624283e-06, "loss": 0.4244, "step": 4353 }, { "epoch": 0.12827953979641443, "grad_norm": 1.6401650892319053, "learning_rate": 9.975699446086374e-06, "loss": 0.5411, "step": 4354 }, { "epoch": 0.128309002253878, "grad_norm": 1.6787418584088067, "learning_rate": 9.975648781917031e-06, "loss": 0.4028, "step": 4355 }, { "epoch": 0.12833846471134158, "grad_norm": 1.5673594215826219, "learning_rate": 9.975598065116792e-06, "loss": 0.5144, "step": 4356 }, { "epoch": 0.12836792716880516, "grad_norm": 1.9049219412360947, "learning_rate": 9.975547295686195e-06, "loss": 0.5088, "step": 4357 }, { "epoch": 0.12839738962626873, "grad_norm": 1.8657137325337771, "learning_rate": 9.975496473625773e-06, "loss": 0.5663, "step": 4358 }, { "epoch": 0.1284268520837323, "grad_norm": 1.8455191658017698, "learning_rate": 9.975445598936066e-06, "loss": 0.6555, "step": 4359 }, { "epoch": 0.12845631454119588, "grad_norm": 1.7756344288093808, "learning_rate": 9.97539467161761e-06, "loss": 0.485, "step": 4360 }, { "epoch": 0.12848577699865946, "grad_norm": 1.8025221124911337, "learning_rate": 9.975343691670949e-06, "loss": 0.5132, "step": 4361 }, { "epoch": 0.12851523945612303, "grad_norm": 1.727858020730479, "learning_rate": 9.975292659096617e-06, "loss": 0.4727, "step": 4362 }, { "epoch": 0.1285447019135866, "grad_norm": 1.8593655974692322, "learning_rate": 9.975241573895155e-06, "loss": 0.4962, "step": 4363 }, { "epoch": 0.12857416437105018, "grad_norm": 1.7482113601432727, "learning_rate": 9.975190436067105e-06, "loss": 0.5313, "step": 4364 }, { "epoch": 0.12860362682851376, "grad_norm": 1.9556252440611284, "learning_rate": 9.975139245613006e-06, "loss": 0.6365, "step": 4365 }, { "epoch": 0.12863308928597733, "grad_norm": 1.6749219109517421, "learning_rate": 9.9750880025334e-06, "loss": 0.6194, "step": 4366 }, { "epoch": 0.12866255174344093, "grad_norm": 1.9407806606040903, "learning_rate": 9.97503670682883e-06, "loss": 0.6537, "step": 4367 }, { "epoch": 0.1286920142009045, "grad_norm": 1.8719491536053234, "learning_rate": 9.974985358499838e-06, "loss": 0.5219, "step": 4368 }, { "epoch": 0.12872147665836808, "grad_norm": 1.7113373690116218, "learning_rate": 9.974933957546968e-06, "loss": 0.6221, "step": 4369 }, { "epoch": 0.12875093911583166, "grad_norm": 1.7059159667069317, "learning_rate": 9.97488250397076e-06, "loss": 0.5195, "step": 4370 }, { "epoch": 0.12878040157329523, "grad_norm": 1.7614907255350962, "learning_rate": 9.974830997771763e-06, "loss": 0.5837, "step": 4371 }, { "epoch": 0.1288098640307588, "grad_norm": 1.752197476307753, "learning_rate": 9.974779438950519e-06, "loss": 0.592, "step": 4372 }, { "epoch": 0.12883932648822238, "grad_norm": 2.0183263589650013, "learning_rate": 9.974727827507576e-06, "loss": 0.5773, "step": 4373 }, { "epoch": 0.12886878894568596, "grad_norm": 1.7413776192211121, "learning_rate": 9.974676163443476e-06, "loss": 0.4956, "step": 4374 }, { "epoch": 0.12889825140314953, "grad_norm": 1.6477827667824043, "learning_rate": 9.97462444675877e-06, "loss": 0.5495, "step": 4375 }, { "epoch": 0.1289277138606131, "grad_norm": 1.5946238759921103, "learning_rate": 9.974572677454e-06, "loss": 0.5158, "step": 4376 }, { "epoch": 0.12895717631807668, "grad_norm": 1.671424040722461, "learning_rate": 9.974520855529717e-06, "loss": 0.4575, "step": 4377 }, { "epoch": 0.12898663877554026, "grad_norm": 1.6612154232114988, "learning_rate": 9.97446898098647e-06, "loss": 0.3805, "step": 4378 }, { "epoch": 0.12901610123300383, "grad_norm": 1.7953278234807617, "learning_rate": 9.974417053824802e-06, "loss": 0.5161, "step": 4379 }, { "epoch": 0.12904556369046744, "grad_norm": 1.801711147330202, "learning_rate": 9.974365074045272e-06, "loss": 0.7841, "step": 4380 }, { "epoch": 0.129075026147931, "grad_norm": 1.7560082019317815, "learning_rate": 9.97431304164842e-06, "loss": 0.4893, "step": 4381 }, { "epoch": 0.12910448860539459, "grad_norm": 2.1404447659075707, "learning_rate": 9.974260956634802e-06, "loss": 0.5354, "step": 4382 }, { "epoch": 0.12913395106285816, "grad_norm": 1.622818601850698, "learning_rate": 9.97420881900497e-06, "loss": 0.6404, "step": 4383 }, { "epoch": 0.12916341352032173, "grad_norm": 1.6833345889383393, "learning_rate": 9.974156628759468e-06, "loss": 0.6012, "step": 4384 }, { "epoch": 0.1291928759777853, "grad_norm": 1.5681146590080528, "learning_rate": 9.974104385898857e-06, "loss": 0.5215, "step": 4385 }, { "epoch": 0.12922233843524888, "grad_norm": 1.6726165896307372, "learning_rate": 9.974052090423685e-06, "loss": 0.6387, "step": 4386 }, { "epoch": 0.12925180089271246, "grad_norm": 1.7243121066129914, "learning_rate": 9.973999742334505e-06, "loss": 0.4643, "step": 4387 }, { "epoch": 0.12928126335017603, "grad_norm": 1.557809498670521, "learning_rate": 9.973947341631872e-06, "loss": 0.4278, "step": 4388 }, { "epoch": 0.1293107258076396, "grad_norm": 1.4800917045988606, "learning_rate": 9.97389488831634e-06, "loss": 0.4094, "step": 4389 }, { "epoch": 0.12934018826510318, "grad_norm": 1.8168195222637085, "learning_rate": 9.973842382388463e-06, "loss": 0.4705, "step": 4390 }, { "epoch": 0.12936965072256676, "grad_norm": 1.7224632026499818, "learning_rate": 9.9737898238488e-06, "loss": 0.5712, "step": 4391 }, { "epoch": 0.12939911318003033, "grad_norm": 1.7756769635623286, "learning_rate": 9.9737372126979e-06, "loss": 0.465, "step": 4392 }, { "epoch": 0.12942857563749394, "grad_norm": 1.9299020975429508, "learning_rate": 9.973684548936326e-06, "loss": 0.5486, "step": 4393 }, { "epoch": 0.1294580380949575, "grad_norm": 1.8478856686274452, "learning_rate": 9.973631832564632e-06, "loss": 0.535, "step": 4394 }, { "epoch": 0.1294875005524211, "grad_norm": 1.7984548728244552, "learning_rate": 9.973579063583377e-06, "loss": 0.524, "step": 4395 }, { "epoch": 0.12951696300988466, "grad_norm": 1.6150913405062268, "learning_rate": 9.973526241993117e-06, "loss": 0.4622, "step": 4396 }, { "epoch": 0.12954642546734824, "grad_norm": 1.7078201889552047, "learning_rate": 9.973473367794413e-06, "loss": 0.3466, "step": 4397 }, { "epoch": 0.1295758879248118, "grad_norm": 1.8104279008991329, "learning_rate": 9.973420440987822e-06, "loss": 0.4578, "step": 4398 }, { "epoch": 0.12960535038227539, "grad_norm": 1.5939954686101976, "learning_rate": 9.973367461573906e-06, "loss": 0.6, "step": 4399 }, { "epoch": 0.12963481283973896, "grad_norm": 1.5388112876890667, "learning_rate": 9.973314429553226e-06, "loss": 0.4134, "step": 4400 }, { "epoch": 0.12966427529720254, "grad_norm": 1.497078383613504, "learning_rate": 9.973261344926341e-06, "loss": 0.3542, "step": 4401 }, { "epoch": 0.1296937377546661, "grad_norm": 2.018192258592279, "learning_rate": 9.973208207693812e-06, "loss": 0.6319, "step": 4402 }, { "epoch": 0.12972320021212969, "grad_norm": 1.8422320959563254, "learning_rate": 9.973155017856205e-06, "loss": 0.5753, "step": 4403 }, { "epoch": 0.12975266266959326, "grad_norm": 1.838289888677029, "learning_rate": 9.973101775414078e-06, "loss": 0.5883, "step": 4404 }, { "epoch": 0.12978212512705684, "grad_norm": 2.236044344280895, "learning_rate": 9.973048480367996e-06, "loss": 0.4333, "step": 4405 }, { "epoch": 0.12981158758452044, "grad_norm": 1.718235092492733, "learning_rate": 9.972995132718522e-06, "loss": 0.4077, "step": 4406 }, { "epoch": 0.129841050041984, "grad_norm": 1.7896866288254651, "learning_rate": 9.972941732466221e-06, "loss": 0.5884, "step": 4407 }, { "epoch": 0.1298705124994476, "grad_norm": 1.775242971472368, "learning_rate": 9.972888279611659e-06, "loss": 0.435, "step": 4408 }, { "epoch": 0.12989997495691116, "grad_norm": 1.8398998284970383, "learning_rate": 9.9728347741554e-06, "loss": 0.5428, "step": 4409 }, { "epoch": 0.12992943741437474, "grad_norm": 1.8036768035652702, "learning_rate": 9.97278121609801e-06, "loss": 0.4636, "step": 4410 }, { "epoch": 0.1299588998718383, "grad_norm": 1.649266206688584, "learning_rate": 9.972727605440056e-06, "loss": 0.3734, "step": 4411 }, { "epoch": 0.1299883623293019, "grad_norm": 1.6669678756381328, "learning_rate": 9.972673942182104e-06, "loss": 0.4481, "step": 4412 }, { "epoch": 0.13001782478676546, "grad_norm": 1.637001681068833, "learning_rate": 9.972620226324725e-06, "loss": 0.5354, "step": 4413 }, { "epoch": 0.13004728724422904, "grad_norm": 1.7875989196312927, "learning_rate": 9.972566457868483e-06, "loss": 0.5253, "step": 4414 }, { "epoch": 0.1300767497016926, "grad_norm": 1.996475789055375, "learning_rate": 9.97251263681395e-06, "loss": 0.5307, "step": 4415 }, { "epoch": 0.1301062121591562, "grad_norm": 1.6124062625117235, "learning_rate": 9.972458763161691e-06, "loss": 0.4098, "step": 4416 }, { "epoch": 0.13013567461661976, "grad_norm": 1.7442121538596556, "learning_rate": 9.972404836912281e-06, "loss": 0.476, "step": 4417 }, { "epoch": 0.13016513707408334, "grad_norm": 2.1423160384883695, "learning_rate": 9.972350858066286e-06, "loss": 0.6317, "step": 4418 }, { "epoch": 0.13019459953154694, "grad_norm": 1.7870669460031792, "learning_rate": 9.97229682662428e-06, "loss": 0.5672, "step": 4419 }, { "epoch": 0.13022406198901051, "grad_norm": 1.7037625851631983, "learning_rate": 9.972242742586835e-06, "loss": 0.4578, "step": 4420 }, { "epoch": 0.1302535244464741, "grad_norm": 1.654331942138837, "learning_rate": 9.972188605954519e-06, "loss": 0.4859, "step": 4421 }, { "epoch": 0.13028298690393766, "grad_norm": 1.690474146928825, "learning_rate": 9.972134416727908e-06, "loss": 0.554, "step": 4422 }, { "epoch": 0.13031244936140124, "grad_norm": 1.9035721888719446, "learning_rate": 9.972080174907576e-06, "loss": 0.5346, "step": 4423 }, { "epoch": 0.13034191181886481, "grad_norm": 1.7429922634042598, "learning_rate": 9.972025880494093e-06, "loss": 0.5517, "step": 4424 }, { "epoch": 0.1303713742763284, "grad_norm": 1.7064766579843589, "learning_rate": 9.971971533488038e-06, "loss": 0.4937, "step": 4425 }, { "epoch": 0.13040083673379196, "grad_norm": 1.7382766349155874, "learning_rate": 9.971917133889981e-06, "loss": 0.5655, "step": 4426 }, { "epoch": 0.13043029919125554, "grad_norm": 1.5480735900453026, "learning_rate": 9.971862681700501e-06, "loss": 0.4625, "step": 4427 }, { "epoch": 0.1304597616487191, "grad_norm": 1.6030754467826236, "learning_rate": 9.971808176920172e-06, "loss": 0.4112, "step": 4428 }, { "epoch": 0.1304892241061827, "grad_norm": 1.8195113484389607, "learning_rate": 9.971753619549572e-06, "loss": 0.6495, "step": 4429 }, { "epoch": 0.13051868656364626, "grad_norm": 1.744729098003766, "learning_rate": 9.971699009589277e-06, "loss": 0.6354, "step": 4430 }, { "epoch": 0.13054814902110984, "grad_norm": 1.7577165144447242, "learning_rate": 9.971644347039866e-06, "loss": 0.5554, "step": 4431 }, { "epoch": 0.13057761147857344, "grad_norm": 1.7847306542278627, "learning_rate": 9.971589631901916e-06, "loss": 0.5326, "step": 4432 }, { "epoch": 0.13060707393603702, "grad_norm": 1.561370433079285, "learning_rate": 9.971534864176005e-06, "loss": 0.5278, "step": 4433 }, { "epoch": 0.1306365363935006, "grad_norm": 1.6961180558357787, "learning_rate": 9.971480043862713e-06, "loss": 0.5845, "step": 4434 }, { "epoch": 0.13066599885096417, "grad_norm": 1.8275984141050436, "learning_rate": 9.971425170962622e-06, "loss": 0.6278, "step": 4435 }, { "epoch": 0.13069546130842774, "grad_norm": 1.6198590315142989, "learning_rate": 9.97137024547631e-06, "loss": 0.4762, "step": 4436 }, { "epoch": 0.13072492376589132, "grad_norm": 1.4693291671901654, "learning_rate": 9.971315267404358e-06, "loss": 0.4551, "step": 4437 }, { "epoch": 0.1307543862233549, "grad_norm": 1.5167444423686969, "learning_rate": 9.971260236747348e-06, "loss": 0.3968, "step": 4438 }, { "epoch": 0.13078384868081847, "grad_norm": 2.148171180307492, "learning_rate": 9.971205153505862e-06, "loss": 0.6579, "step": 4439 }, { "epoch": 0.13081331113828204, "grad_norm": 1.8122035846773017, "learning_rate": 9.971150017680485e-06, "loss": 0.3511, "step": 4440 }, { "epoch": 0.13084277359574562, "grad_norm": 1.9267648229062098, "learning_rate": 9.971094829271797e-06, "loss": 0.7098, "step": 4441 }, { "epoch": 0.1308722360532092, "grad_norm": 1.566530475881813, "learning_rate": 9.971039588280382e-06, "loss": 0.528, "step": 4442 }, { "epoch": 0.13090169851067276, "grad_norm": 1.577258167692862, "learning_rate": 9.970984294706828e-06, "loss": 0.4066, "step": 4443 }, { "epoch": 0.13093116096813634, "grad_norm": 1.656450941514512, "learning_rate": 9.970928948551714e-06, "loss": 0.5329, "step": 4444 }, { "epoch": 0.13096062342559994, "grad_norm": 1.4719964192659907, "learning_rate": 9.97087354981563e-06, "loss": 0.4148, "step": 4445 }, { "epoch": 0.13099008588306352, "grad_norm": 1.6026536975305807, "learning_rate": 9.970818098499158e-06, "loss": 0.5167, "step": 4446 }, { "epoch": 0.1310195483405271, "grad_norm": 2.196491134072005, "learning_rate": 9.97076259460289e-06, "loss": 0.7031, "step": 4447 }, { "epoch": 0.13104901079799067, "grad_norm": 1.738533994314695, "learning_rate": 9.97070703812741e-06, "loss": 0.4223, "step": 4448 }, { "epoch": 0.13107847325545424, "grad_norm": 1.905360729951111, "learning_rate": 9.970651429073303e-06, "loss": 0.6603, "step": 4449 }, { "epoch": 0.13110793571291782, "grad_norm": 1.8800579279447909, "learning_rate": 9.970595767441163e-06, "loss": 0.6701, "step": 4450 }, { "epoch": 0.1311373981703814, "grad_norm": 1.703295740995699, "learning_rate": 9.970540053231575e-06, "loss": 0.5445, "step": 4451 }, { "epoch": 0.13116686062784497, "grad_norm": 1.718499396103287, "learning_rate": 9.97048428644513e-06, "loss": 0.5082, "step": 4452 }, { "epoch": 0.13119632308530854, "grad_norm": 1.7837525517441626, "learning_rate": 9.970428467082416e-06, "loss": 0.5281, "step": 4453 }, { "epoch": 0.13122578554277212, "grad_norm": 1.7809514422610746, "learning_rate": 9.970372595144024e-06, "loss": 0.5991, "step": 4454 }, { "epoch": 0.1312552480002357, "grad_norm": 1.5945740333027396, "learning_rate": 9.970316670630547e-06, "loss": 0.5475, "step": 4455 }, { "epoch": 0.13128471045769927, "grad_norm": 1.746237486127934, "learning_rate": 9.970260693542573e-06, "loss": 0.6398, "step": 4456 }, { "epoch": 0.13131417291516284, "grad_norm": 1.8577621166997371, "learning_rate": 9.970204663880698e-06, "loss": 0.5115, "step": 4457 }, { "epoch": 0.13134363537262644, "grad_norm": 1.5335514514264894, "learning_rate": 9.970148581645512e-06, "loss": 0.5537, "step": 4458 }, { "epoch": 0.13137309783009002, "grad_norm": 1.733536626079386, "learning_rate": 9.970092446837608e-06, "loss": 0.4828, "step": 4459 }, { "epoch": 0.1314025602875536, "grad_norm": 1.5949838090707915, "learning_rate": 9.970036259457582e-06, "loss": 0.5287, "step": 4460 }, { "epoch": 0.13143202274501717, "grad_norm": 1.8136872212259867, "learning_rate": 9.969980019506027e-06, "loss": 0.6603, "step": 4461 }, { "epoch": 0.13146148520248074, "grad_norm": 1.5022077791944122, "learning_rate": 9.969923726983537e-06, "loss": 0.3649, "step": 4462 }, { "epoch": 0.13149094765994432, "grad_norm": 1.8449341486472648, "learning_rate": 9.96986738189071e-06, "loss": 0.4931, "step": 4463 }, { "epoch": 0.1315204101174079, "grad_norm": 1.957599158307666, "learning_rate": 9.969810984228139e-06, "loss": 0.6001, "step": 4464 }, { "epoch": 0.13154987257487147, "grad_norm": 1.6882560492714183, "learning_rate": 9.969754533996422e-06, "loss": 0.495, "step": 4465 }, { "epoch": 0.13157933503233504, "grad_norm": 1.6405594845398863, "learning_rate": 9.969698031196157e-06, "loss": 0.3916, "step": 4466 }, { "epoch": 0.13160879748979862, "grad_norm": 1.7578301456810694, "learning_rate": 9.969641475827942e-06, "loss": 0.5279, "step": 4467 }, { "epoch": 0.1316382599472622, "grad_norm": 1.8084236923377983, "learning_rate": 9.969584867892372e-06, "loss": 0.6727, "step": 4468 }, { "epoch": 0.13166772240472577, "grad_norm": 1.8207196326614263, "learning_rate": 9.969528207390049e-06, "loss": 0.5815, "step": 4469 }, { "epoch": 0.13169718486218934, "grad_norm": 1.9597041118754448, "learning_rate": 9.96947149432157e-06, "loss": 0.5867, "step": 4470 }, { "epoch": 0.13172664731965295, "grad_norm": 1.7233410320010167, "learning_rate": 9.969414728687536e-06, "loss": 0.4469, "step": 4471 }, { "epoch": 0.13175610977711652, "grad_norm": 1.7505189743077008, "learning_rate": 9.969357910488548e-06, "loss": 0.5015, "step": 4472 }, { "epoch": 0.1317855722345801, "grad_norm": 1.7766952953396373, "learning_rate": 9.969301039725207e-06, "loss": 0.6384, "step": 4473 }, { "epoch": 0.13181503469204367, "grad_norm": 1.6353733064422844, "learning_rate": 9.969244116398114e-06, "loss": 0.5565, "step": 4474 }, { "epoch": 0.13184449714950724, "grad_norm": 1.702566941235932, "learning_rate": 9.96918714050787e-06, "loss": 0.4608, "step": 4475 }, { "epoch": 0.13187395960697082, "grad_norm": 1.7514680020771833, "learning_rate": 9.96913011205508e-06, "loss": 0.5456, "step": 4476 }, { "epoch": 0.1319034220644344, "grad_norm": 2.168137243966413, "learning_rate": 9.969073031040346e-06, "loss": 0.6899, "step": 4477 }, { "epoch": 0.13193288452189797, "grad_norm": 1.8708038520059225, "learning_rate": 9.96901589746427e-06, "loss": 0.5588, "step": 4478 }, { "epoch": 0.13196234697936154, "grad_norm": 1.7133032615279125, "learning_rate": 9.968958711327462e-06, "loss": 0.5426, "step": 4479 }, { "epoch": 0.13199180943682512, "grad_norm": 1.7942070856655117, "learning_rate": 9.96890147263052e-06, "loss": 0.4832, "step": 4480 }, { "epoch": 0.1320212718942887, "grad_norm": 1.538292000751427, "learning_rate": 9.968844181374053e-06, "loss": 0.396, "step": 4481 }, { "epoch": 0.13205073435175227, "grad_norm": 1.5962022421980915, "learning_rate": 9.968786837558667e-06, "loss": 0.5508, "step": 4482 }, { "epoch": 0.13208019680921584, "grad_norm": 1.8692384783042533, "learning_rate": 9.968729441184969e-06, "loss": 0.699, "step": 4483 }, { "epoch": 0.13210965926667945, "grad_norm": 1.7031573806513287, "learning_rate": 9.968671992253564e-06, "loss": 0.4066, "step": 4484 }, { "epoch": 0.13213912172414302, "grad_norm": 1.776534396911266, "learning_rate": 9.96861449076506e-06, "loss": 0.5062, "step": 4485 }, { "epoch": 0.1321685841816066, "grad_norm": 1.7618340737276177, "learning_rate": 9.968556936720068e-06, "loss": 0.2984, "step": 4486 }, { "epoch": 0.13219804663907017, "grad_norm": 1.8233772451760353, "learning_rate": 9.968499330119194e-06, "loss": 0.6187, "step": 4487 }, { "epoch": 0.13222750909653375, "grad_norm": 1.651217577011838, "learning_rate": 9.96844167096305e-06, "loss": 0.4355, "step": 4488 }, { "epoch": 0.13225697155399732, "grad_norm": 1.7087836309840825, "learning_rate": 9.96838395925224e-06, "loss": 0.5929, "step": 4489 }, { "epoch": 0.1322864340114609, "grad_norm": 2.0455896932299193, "learning_rate": 9.968326194987383e-06, "loss": 0.5303, "step": 4490 }, { "epoch": 0.13231589646892447, "grad_norm": 1.9527245030530815, "learning_rate": 9.968268378169084e-06, "loss": 0.7046, "step": 4491 }, { "epoch": 0.13234535892638805, "grad_norm": 1.5206295815726816, "learning_rate": 9.968210508797956e-06, "loss": 0.3127, "step": 4492 }, { "epoch": 0.13237482138385162, "grad_norm": 1.5638043417495524, "learning_rate": 9.968152586874612e-06, "loss": 0.5182, "step": 4493 }, { "epoch": 0.1324042838413152, "grad_norm": 1.673548745474884, "learning_rate": 9.968094612399663e-06, "loss": 0.5606, "step": 4494 }, { "epoch": 0.13243374629877877, "grad_norm": 1.6644461779566215, "learning_rate": 9.968036585373725e-06, "loss": 0.4399, "step": 4495 }, { "epoch": 0.13246320875624235, "grad_norm": 1.9106170157346662, "learning_rate": 9.967978505797408e-06, "loss": 0.6263, "step": 4496 }, { "epoch": 0.13249267121370595, "grad_norm": 2.0434959437108358, "learning_rate": 9.967920373671329e-06, "loss": 0.7456, "step": 4497 }, { "epoch": 0.13252213367116952, "grad_norm": 2.147389824585629, "learning_rate": 9.967862188996104e-06, "loss": 0.3587, "step": 4498 }, { "epoch": 0.1325515961286331, "grad_norm": 1.647956919038454, "learning_rate": 9.967803951772345e-06, "loss": 0.5545, "step": 4499 }, { "epoch": 0.13258105858609667, "grad_norm": 1.7244709783225702, "learning_rate": 9.96774566200067e-06, "loss": 0.5612, "step": 4500 }, { "epoch": 0.13261052104356025, "grad_norm": 1.917406176407204, "learning_rate": 9.967687319681695e-06, "loss": 0.4811, "step": 4501 }, { "epoch": 0.13263998350102382, "grad_norm": 1.795797383445597, "learning_rate": 9.96762892481604e-06, "loss": 0.4301, "step": 4502 }, { "epoch": 0.1326694459584874, "grad_norm": 1.9223329836547824, "learning_rate": 9.967570477404317e-06, "loss": 0.4842, "step": 4503 }, { "epoch": 0.13269890841595097, "grad_norm": 1.6239109865272578, "learning_rate": 9.967511977447148e-06, "loss": 0.4039, "step": 4504 }, { "epoch": 0.13272837087341455, "grad_norm": 1.676276808794386, "learning_rate": 9.967453424945153e-06, "loss": 0.5445, "step": 4505 }, { "epoch": 0.13275783333087812, "grad_norm": 1.4767057426421644, "learning_rate": 9.967394819898947e-06, "loss": 0.4023, "step": 4506 }, { "epoch": 0.1327872957883417, "grad_norm": 1.7287962455717538, "learning_rate": 9.967336162309155e-06, "loss": 0.5493, "step": 4507 }, { "epoch": 0.13281675824580527, "grad_norm": 1.782780409421821, "learning_rate": 9.967277452176393e-06, "loss": 0.6435, "step": 4508 }, { "epoch": 0.13284622070326885, "grad_norm": 1.647121923765874, "learning_rate": 9.967218689501286e-06, "loss": 0.3363, "step": 4509 }, { "epoch": 0.13287568316073245, "grad_norm": 1.4137356653281568, "learning_rate": 9.967159874284451e-06, "loss": 0.3671, "step": 4510 }, { "epoch": 0.13290514561819602, "grad_norm": 1.5514625950798484, "learning_rate": 9.967101006526515e-06, "loss": 0.4839, "step": 4511 }, { "epoch": 0.1329346080756596, "grad_norm": 1.7706332896327113, "learning_rate": 9.967042086228098e-06, "loss": 0.601, "step": 4512 }, { "epoch": 0.13296407053312317, "grad_norm": 1.7640713003207322, "learning_rate": 9.966983113389822e-06, "loss": 0.6517, "step": 4513 }, { "epoch": 0.13299353299058675, "grad_norm": 1.4317317871929731, "learning_rate": 9.966924088012314e-06, "loss": 0.4846, "step": 4514 }, { "epoch": 0.13302299544805032, "grad_norm": 1.6080520687379982, "learning_rate": 9.966865010096194e-06, "loss": 0.5253, "step": 4515 }, { "epoch": 0.1330524579055139, "grad_norm": 1.9252654434579428, "learning_rate": 9.966805879642092e-06, "loss": 0.6122, "step": 4516 }, { "epoch": 0.13308192036297747, "grad_norm": 1.6585919052128741, "learning_rate": 9.966746696650631e-06, "loss": 0.6998, "step": 4517 }, { "epoch": 0.13311138282044105, "grad_norm": 1.7094040387130651, "learning_rate": 9.966687461122438e-06, "loss": 0.5593, "step": 4518 }, { "epoch": 0.13314084527790462, "grad_norm": 1.7241233029145284, "learning_rate": 9.966628173058135e-06, "loss": 0.6149, "step": 4519 }, { "epoch": 0.1331703077353682, "grad_norm": 1.5046817128675247, "learning_rate": 9.966568832458356e-06, "loss": 0.4292, "step": 4520 }, { "epoch": 0.13319977019283177, "grad_norm": 1.7634218504684505, "learning_rate": 9.966509439323724e-06, "loss": 0.5987, "step": 4521 }, { "epoch": 0.13322923265029535, "grad_norm": 1.7126706339691489, "learning_rate": 9.96644999365487e-06, "loss": 0.6382, "step": 4522 }, { "epoch": 0.13325869510775895, "grad_norm": 1.717081210929772, "learning_rate": 9.96639049545242e-06, "loss": 0.5449, "step": 4523 }, { "epoch": 0.13328815756522253, "grad_norm": 1.5218123936390786, "learning_rate": 9.966330944717007e-06, "loss": 0.462, "step": 4524 }, { "epoch": 0.1333176200226861, "grad_norm": 1.7555495542874862, "learning_rate": 9.966271341449258e-06, "loss": 0.5888, "step": 4525 }, { "epoch": 0.13334708248014968, "grad_norm": 1.4506379296774041, "learning_rate": 9.966211685649804e-06, "loss": 0.4264, "step": 4526 }, { "epoch": 0.13337654493761325, "grad_norm": 1.762945165931735, "learning_rate": 9.966151977319277e-06, "loss": 0.563, "step": 4527 }, { "epoch": 0.13340600739507683, "grad_norm": 1.5813969445193432, "learning_rate": 9.966092216458305e-06, "loss": 0.5385, "step": 4528 }, { "epoch": 0.1334354698525404, "grad_norm": 1.847147804017316, "learning_rate": 9.966032403067526e-06, "loss": 0.5303, "step": 4529 }, { "epoch": 0.13346493231000398, "grad_norm": 1.626366755168393, "learning_rate": 9.965972537147567e-06, "loss": 0.5078, "step": 4530 }, { "epoch": 0.13349439476746755, "grad_norm": 1.7271908792952988, "learning_rate": 9.965912618699067e-06, "loss": 0.5181, "step": 4531 }, { "epoch": 0.13352385722493113, "grad_norm": 1.5662627577751334, "learning_rate": 9.965852647722655e-06, "loss": 0.5293, "step": 4532 }, { "epoch": 0.1335533196823947, "grad_norm": 1.6034141868314213, "learning_rate": 9.965792624218969e-06, "loss": 0.5295, "step": 4533 }, { "epoch": 0.13358278213985827, "grad_norm": 1.7106460122101597, "learning_rate": 9.96573254818864e-06, "loss": 0.4306, "step": 4534 }, { "epoch": 0.13361224459732185, "grad_norm": 1.6439139483406793, "learning_rate": 9.965672419632306e-06, "loss": 0.3494, "step": 4535 }, { "epoch": 0.13364170705478545, "grad_norm": 1.7583616128482922, "learning_rate": 9.965612238550602e-06, "loss": 0.6692, "step": 4536 }, { "epoch": 0.13367116951224903, "grad_norm": 1.6967086874574233, "learning_rate": 9.965552004944165e-06, "loss": 0.6632, "step": 4537 }, { "epoch": 0.1337006319697126, "grad_norm": 1.5214554989357316, "learning_rate": 9.965491718813633e-06, "loss": 0.4633, "step": 4538 }, { "epoch": 0.13373009442717618, "grad_norm": 1.7198730710253083, "learning_rate": 9.965431380159643e-06, "loss": 0.5201, "step": 4539 }, { "epoch": 0.13375955688463975, "grad_norm": 1.3977068050646957, "learning_rate": 9.965370988982832e-06, "loss": 0.4508, "step": 4540 }, { "epoch": 0.13378901934210333, "grad_norm": 1.7898278286898073, "learning_rate": 9.965310545283841e-06, "loss": 0.7249, "step": 4541 }, { "epoch": 0.1338184817995669, "grad_norm": 1.6097911764641912, "learning_rate": 9.965250049063309e-06, "loss": 0.4219, "step": 4542 }, { "epoch": 0.13384794425703048, "grad_norm": 1.749891189153877, "learning_rate": 9.965189500321873e-06, "loss": 0.5791, "step": 4543 }, { "epoch": 0.13387740671449405, "grad_norm": 1.747872554761217, "learning_rate": 9.965128899060176e-06, "loss": 0.6565, "step": 4544 }, { "epoch": 0.13390686917195763, "grad_norm": 1.9297067868767113, "learning_rate": 9.965068245278862e-06, "loss": 0.424, "step": 4545 }, { "epoch": 0.1339363316294212, "grad_norm": 1.6298248730209914, "learning_rate": 9.965007538978566e-06, "loss": 0.552, "step": 4546 }, { "epoch": 0.13396579408688478, "grad_norm": 1.8933565489680662, "learning_rate": 9.964946780159934e-06, "loss": 0.5216, "step": 4547 }, { "epoch": 0.13399525654434835, "grad_norm": 1.6425359366330508, "learning_rate": 9.964885968823607e-06, "loss": 0.502, "step": 4548 }, { "epoch": 0.13402471900181195, "grad_norm": 1.6186921517426442, "learning_rate": 9.96482510497023e-06, "loss": 0.4428, "step": 4549 }, { "epoch": 0.13405418145927553, "grad_norm": 2.1685814156206136, "learning_rate": 9.964764188600449e-06, "loss": 0.6086, "step": 4550 }, { "epoch": 0.1340836439167391, "grad_norm": 1.4968044983208684, "learning_rate": 9.964703219714901e-06, "loss": 0.4411, "step": 4551 }, { "epoch": 0.13411310637420268, "grad_norm": 1.8598218881239732, "learning_rate": 9.964642198314238e-06, "loss": 0.5069, "step": 4552 }, { "epoch": 0.13414256883166625, "grad_norm": 1.8380115548834748, "learning_rate": 9.964581124399102e-06, "loss": 0.4373, "step": 4553 }, { "epoch": 0.13417203128912983, "grad_norm": 1.7588462562945941, "learning_rate": 9.964519997970142e-06, "loss": 0.5355, "step": 4554 }, { "epoch": 0.1342014937465934, "grad_norm": 1.6089044916044917, "learning_rate": 9.964458819028e-06, "loss": 0.6458, "step": 4555 }, { "epoch": 0.13423095620405698, "grad_norm": 1.6106208960711184, "learning_rate": 9.964397587573326e-06, "loss": 0.4202, "step": 4556 }, { "epoch": 0.13426041866152055, "grad_norm": 1.752164501984747, "learning_rate": 9.964336303606767e-06, "loss": 0.4063, "step": 4557 }, { "epoch": 0.13428988111898413, "grad_norm": 1.697738991036075, "learning_rate": 9.964274967128973e-06, "loss": 0.5155, "step": 4558 }, { "epoch": 0.1343193435764477, "grad_norm": 1.7649275365773884, "learning_rate": 9.964213578140589e-06, "loss": 0.4597, "step": 4559 }, { "epoch": 0.13434880603391128, "grad_norm": 1.7368003978746012, "learning_rate": 9.964152136642268e-06, "loss": 0.7138, "step": 4560 }, { "epoch": 0.13437826849137485, "grad_norm": 1.7510515890836467, "learning_rate": 9.964090642634659e-06, "loss": 0.622, "step": 4561 }, { "epoch": 0.13440773094883846, "grad_norm": 1.7521227075959163, "learning_rate": 9.964029096118411e-06, "loss": 0.4979, "step": 4562 }, { "epoch": 0.13443719340630203, "grad_norm": 1.6984212577023132, "learning_rate": 9.963967497094178e-06, "loss": 0.5454, "step": 4563 }, { "epoch": 0.1344666558637656, "grad_norm": 1.633845449250173, "learning_rate": 9.963905845562606e-06, "loss": 0.5275, "step": 4564 }, { "epoch": 0.13449611832122918, "grad_norm": 1.9726504093750783, "learning_rate": 9.963844141524355e-06, "loss": 0.6006, "step": 4565 }, { "epoch": 0.13452558077869275, "grad_norm": 1.7796765973805557, "learning_rate": 9.963782384980071e-06, "loss": 0.4945, "step": 4566 }, { "epoch": 0.13455504323615633, "grad_norm": 1.7316310371791719, "learning_rate": 9.96372057593041e-06, "loss": 0.6436, "step": 4567 }, { "epoch": 0.1345845056936199, "grad_norm": 1.671130131157503, "learning_rate": 9.963658714376025e-06, "loss": 0.4824, "step": 4568 }, { "epoch": 0.13461396815108348, "grad_norm": 1.7329109024617535, "learning_rate": 9.963596800317572e-06, "loss": 0.3904, "step": 4569 }, { "epoch": 0.13464343060854705, "grad_norm": 1.8253454807591676, "learning_rate": 9.963534833755704e-06, "loss": 0.5798, "step": 4570 }, { "epoch": 0.13467289306601063, "grad_norm": 1.7225769172442145, "learning_rate": 9.963472814691078e-06, "loss": 0.4129, "step": 4571 }, { "epoch": 0.1347023555234742, "grad_norm": 1.516910372228537, "learning_rate": 9.963410743124349e-06, "loss": 0.5197, "step": 4572 }, { "epoch": 0.13473181798093778, "grad_norm": 1.56373606280587, "learning_rate": 9.963348619056174e-06, "loss": 0.535, "step": 4573 }, { "epoch": 0.13476128043840135, "grad_norm": 1.7534523108433822, "learning_rate": 9.96328644248721e-06, "loss": 0.5801, "step": 4574 }, { "epoch": 0.13479074289586496, "grad_norm": 1.984312676971648, "learning_rate": 9.963224213418112e-06, "loss": 0.6723, "step": 4575 }, { "epoch": 0.13482020535332853, "grad_norm": 1.6713897981216161, "learning_rate": 9.963161931849543e-06, "loss": 0.5704, "step": 4576 }, { "epoch": 0.1348496678107921, "grad_norm": 1.803184861193753, "learning_rate": 9.96309959778216e-06, "loss": 0.4959, "step": 4577 }, { "epoch": 0.13487913026825568, "grad_norm": 2.0614777291393764, "learning_rate": 9.963037211216622e-06, "loss": 0.5701, "step": 4578 }, { "epoch": 0.13490859272571926, "grad_norm": 1.7311046987937269, "learning_rate": 9.962974772153588e-06, "loss": 0.5533, "step": 4579 }, { "epoch": 0.13493805518318283, "grad_norm": 1.4385831051517586, "learning_rate": 9.962912280593721e-06, "loss": 0.4357, "step": 4580 }, { "epoch": 0.1349675176406464, "grad_norm": 1.8542064523067026, "learning_rate": 9.962849736537678e-06, "loss": 0.6273, "step": 4581 }, { "epoch": 0.13499698009810998, "grad_norm": 1.629433849854751, "learning_rate": 9.962787139986125e-06, "loss": 0.5934, "step": 4582 }, { "epoch": 0.13502644255557356, "grad_norm": 1.6848947939871495, "learning_rate": 9.962724490939721e-06, "loss": 0.6119, "step": 4583 }, { "epoch": 0.13505590501303713, "grad_norm": 1.7246801145693127, "learning_rate": 9.96266178939913e-06, "loss": 0.5817, "step": 4584 }, { "epoch": 0.1350853674705007, "grad_norm": 1.8814259684526036, "learning_rate": 9.962599035365015e-06, "loss": 0.708, "step": 4585 }, { "epoch": 0.13511482992796428, "grad_norm": 1.811301394447103, "learning_rate": 9.96253622883804e-06, "loss": 0.6355, "step": 4586 }, { "epoch": 0.13514429238542786, "grad_norm": 1.813049200296496, "learning_rate": 9.96247336981887e-06, "loss": 0.6493, "step": 4587 }, { "epoch": 0.13517375484289146, "grad_norm": 1.6725091311979123, "learning_rate": 9.962410458308168e-06, "loss": 0.4901, "step": 4588 }, { "epoch": 0.13520321730035503, "grad_norm": 1.6751886164547674, "learning_rate": 9.9623474943066e-06, "loss": 0.5598, "step": 4589 }, { "epoch": 0.1352326797578186, "grad_norm": 1.6489937660741911, "learning_rate": 9.962284477814834e-06, "loss": 0.6231, "step": 4590 }, { "epoch": 0.13526214221528218, "grad_norm": 1.6550087923322874, "learning_rate": 9.962221408833535e-06, "loss": 0.5438, "step": 4591 }, { "epoch": 0.13529160467274576, "grad_norm": 1.6381202572218785, "learning_rate": 9.962158287363369e-06, "loss": 0.5389, "step": 4592 }, { "epoch": 0.13532106713020933, "grad_norm": 1.7012460325288257, "learning_rate": 9.962095113405005e-06, "loss": 0.506, "step": 4593 }, { "epoch": 0.1353505295876729, "grad_norm": 1.6997513586581983, "learning_rate": 9.962031886959111e-06, "loss": 0.5536, "step": 4594 }, { "epoch": 0.13537999204513648, "grad_norm": 1.6543838397440713, "learning_rate": 9.961968608026356e-06, "loss": 0.5234, "step": 4595 }, { "epoch": 0.13540945450260006, "grad_norm": 1.7594515403493483, "learning_rate": 9.96190527660741e-06, "loss": 0.5922, "step": 4596 }, { "epoch": 0.13543891696006363, "grad_norm": 1.774036003731452, "learning_rate": 9.961841892702943e-06, "loss": 0.5988, "step": 4597 }, { "epoch": 0.1354683794175272, "grad_norm": 1.7134295363704113, "learning_rate": 9.961778456313621e-06, "loss": 0.4302, "step": 4598 }, { "epoch": 0.13549784187499078, "grad_norm": 1.7167557654252785, "learning_rate": 9.961714967440124e-06, "loss": 0.5028, "step": 4599 }, { "epoch": 0.13552730433245436, "grad_norm": 1.8315022547040296, "learning_rate": 9.961651426083115e-06, "loss": 0.3782, "step": 4600 }, { "epoch": 0.13555676678991796, "grad_norm": 1.9410992567676204, "learning_rate": 9.961587832243268e-06, "loss": 0.6819, "step": 4601 }, { "epoch": 0.13558622924738153, "grad_norm": 2.165018136293476, "learning_rate": 9.96152418592126e-06, "loss": 0.4877, "step": 4602 }, { "epoch": 0.1356156917048451, "grad_norm": 1.697381489202374, "learning_rate": 9.96146048711776e-06, "loss": 0.3736, "step": 4603 }, { "epoch": 0.13564515416230868, "grad_norm": 1.6109841770658586, "learning_rate": 9.961396735833444e-06, "loss": 0.4659, "step": 4604 }, { "epoch": 0.13567461661977226, "grad_norm": 1.65657962982212, "learning_rate": 9.961332932068983e-06, "loss": 0.5375, "step": 4605 }, { "epoch": 0.13570407907723583, "grad_norm": 1.741443732310607, "learning_rate": 9.961269075825055e-06, "loss": 0.602, "step": 4606 }, { "epoch": 0.1357335415346994, "grad_norm": 1.6682258748593348, "learning_rate": 9.961205167102337e-06, "loss": 0.5649, "step": 4607 }, { "epoch": 0.13576300399216298, "grad_norm": 1.5857690621271878, "learning_rate": 9.9611412059015e-06, "loss": 0.5379, "step": 4608 }, { "epoch": 0.13579246644962656, "grad_norm": 2.034193507197463, "learning_rate": 9.961077192223225e-06, "loss": 0.5626, "step": 4609 }, { "epoch": 0.13582192890709013, "grad_norm": 1.6072377983757515, "learning_rate": 9.961013126068187e-06, "loss": 0.4962, "step": 4610 }, { "epoch": 0.1358513913645537, "grad_norm": 1.7217981409128889, "learning_rate": 9.960949007437063e-06, "loss": 0.4632, "step": 4611 }, { "epoch": 0.13588085382201728, "grad_norm": 1.527761309156237, "learning_rate": 9.960884836330533e-06, "loss": 0.4263, "step": 4612 }, { "epoch": 0.13591031627948086, "grad_norm": 1.6260084856393524, "learning_rate": 9.960820612749276e-06, "loss": 0.5093, "step": 4613 }, { "epoch": 0.13593977873694446, "grad_norm": 1.596800821811265, "learning_rate": 9.960756336693968e-06, "loss": 0.5035, "step": 4614 }, { "epoch": 0.13596924119440804, "grad_norm": 1.5903401528945118, "learning_rate": 9.960692008165293e-06, "loss": 0.4878, "step": 4615 }, { "epoch": 0.1359987036518716, "grad_norm": 1.520246556723161, "learning_rate": 9.960627627163931e-06, "loss": 0.4192, "step": 4616 }, { "epoch": 0.13602816610933519, "grad_norm": 1.528066888895201, "learning_rate": 9.96056319369056e-06, "loss": 0.4543, "step": 4617 }, { "epoch": 0.13605762856679876, "grad_norm": 1.7824601713485138, "learning_rate": 9.960498707745864e-06, "loss": 0.5944, "step": 4618 }, { "epoch": 0.13608709102426234, "grad_norm": 1.708819242289942, "learning_rate": 9.960434169330524e-06, "loss": 0.549, "step": 4619 }, { "epoch": 0.1361165534817259, "grad_norm": 1.498469859351138, "learning_rate": 9.960369578445224e-06, "loss": 0.5283, "step": 4620 }, { "epoch": 0.13614601593918949, "grad_norm": 1.98184616264698, "learning_rate": 9.960304935090644e-06, "loss": 0.6019, "step": 4621 }, { "epoch": 0.13617547839665306, "grad_norm": 1.8318639638889331, "learning_rate": 9.960240239267473e-06, "loss": 0.6495, "step": 4622 }, { "epoch": 0.13620494085411664, "grad_norm": 1.9101460162548847, "learning_rate": 9.96017549097639e-06, "loss": 0.4972, "step": 4623 }, { "epoch": 0.1362344033115802, "grad_norm": 1.7069090943025054, "learning_rate": 9.960110690218084e-06, "loss": 0.5885, "step": 4624 }, { "epoch": 0.13626386576904378, "grad_norm": 1.6118281861839914, "learning_rate": 9.960045836993238e-06, "loss": 0.5644, "step": 4625 }, { "epoch": 0.13629332822650736, "grad_norm": 1.6522498604174067, "learning_rate": 9.959980931302539e-06, "loss": 0.5989, "step": 4626 }, { "epoch": 0.13632279068397096, "grad_norm": 1.606266993726911, "learning_rate": 9.959915973146673e-06, "loss": 0.4965, "step": 4627 }, { "epoch": 0.13635225314143454, "grad_norm": 1.9195692979868035, "learning_rate": 9.959850962526328e-06, "loss": 0.6254, "step": 4628 }, { "epoch": 0.1363817155988981, "grad_norm": 1.7592729241028313, "learning_rate": 9.95978589944219e-06, "loss": 0.5696, "step": 4629 }, { "epoch": 0.1364111780563617, "grad_norm": 1.6790676564868559, "learning_rate": 9.959720783894949e-06, "loss": 0.6323, "step": 4630 }, { "epoch": 0.13644064051382526, "grad_norm": 1.6440846134365616, "learning_rate": 9.959655615885294e-06, "loss": 0.5007, "step": 4631 }, { "epoch": 0.13647010297128884, "grad_norm": 1.4302390274321368, "learning_rate": 9.959590395413912e-06, "loss": 0.3815, "step": 4632 }, { "epoch": 0.1364995654287524, "grad_norm": 1.6188397695963486, "learning_rate": 9.959525122481495e-06, "loss": 0.4725, "step": 4633 }, { "epoch": 0.136529027886216, "grad_norm": 1.9430620479862717, "learning_rate": 9.959459797088733e-06, "loss": 0.6847, "step": 4634 }, { "epoch": 0.13655849034367956, "grad_norm": 1.6757178860816946, "learning_rate": 9.959394419236316e-06, "loss": 0.4769, "step": 4635 }, { "epoch": 0.13658795280114314, "grad_norm": 1.897372549823347, "learning_rate": 9.959328988924936e-06, "loss": 0.4968, "step": 4636 }, { "epoch": 0.1366174152586067, "grad_norm": 1.6849180991232957, "learning_rate": 9.959263506155285e-06, "loss": 0.6598, "step": 4637 }, { "epoch": 0.1366468777160703, "grad_norm": 1.8349376308857996, "learning_rate": 9.959197970928058e-06, "loss": 0.7127, "step": 4638 }, { "epoch": 0.13667634017353386, "grad_norm": 1.6528900711469976, "learning_rate": 9.959132383243945e-06, "loss": 0.451, "step": 4639 }, { "epoch": 0.13670580263099746, "grad_norm": 1.5826198761456192, "learning_rate": 9.95906674310364e-06, "loss": 0.5109, "step": 4640 }, { "epoch": 0.13673526508846104, "grad_norm": 1.6424810169216808, "learning_rate": 9.959001050507841e-06, "loss": 0.5371, "step": 4641 }, { "epoch": 0.1367647275459246, "grad_norm": 1.820554101239478, "learning_rate": 9.958935305457238e-06, "loss": 0.5917, "step": 4642 }, { "epoch": 0.1367941900033882, "grad_norm": 1.8157337063339491, "learning_rate": 9.958869507952528e-06, "loss": 0.6544, "step": 4643 }, { "epoch": 0.13682365246085176, "grad_norm": 1.9093042764959853, "learning_rate": 9.958803657994411e-06, "loss": 0.6761, "step": 4644 }, { "epoch": 0.13685311491831534, "grad_norm": 1.6813439023886305, "learning_rate": 9.958737755583577e-06, "loss": 0.4418, "step": 4645 }, { "epoch": 0.1368825773757789, "grad_norm": 1.6903967253904182, "learning_rate": 9.958671800720727e-06, "loss": 0.5626, "step": 4646 }, { "epoch": 0.1369120398332425, "grad_norm": 1.602607139813918, "learning_rate": 9.95860579340656e-06, "loss": 0.4622, "step": 4647 }, { "epoch": 0.13694150229070606, "grad_norm": 1.613583833545746, "learning_rate": 9.95853973364177e-06, "loss": 0.4891, "step": 4648 }, { "epoch": 0.13697096474816964, "grad_norm": 1.5319247062713648, "learning_rate": 9.958473621427057e-06, "loss": 0.4771, "step": 4649 }, { "epoch": 0.1370004272056332, "grad_norm": 1.7553074356450138, "learning_rate": 9.958407456763122e-06, "loss": 0.6011, "step": 4650 }, { "epoch": 0.1370298896630968, "grad_norm": 1.5828938278057962, "learning_rate": 9.958341239650664e-06, "loss": 0.4774, "step": 4651 }, { "epoch": 0.13705935212056036, "grad_norm": 1.66455828039814, "learning_rate": 9.958274970090385e-06, "loss": 0.5097, "step": 4652 }, { "epoch": 0.13708881457802397, "grad_norm": 1.681229159678397, "learning_rate": 9.958208648082983e-06, "loss": 0.5203, "step": 4653 }, { "epoch": 0.13711827703548754, "grad_norm": 1.7373689271292427, "learning_rate": 9.95814227362916e-06, "loss": 0.4787, "step": 4654 }, { "epoch": 0.13714773949295112, "grad_norm": 2.1290327806150215, "learning_rate": 9.958075846729622e-06, "loss": 0.6729, "step": 4655 }, { "epoch": 0.1371772019504147, "grad_norm": 1.8309998280104682, "learning_rate": 9.958009367385067e-06, "loss": 0.6983, "step": 4656 }, { "epoch": 0.13720666440787826, "grad_norm": 1.7195820791395922, "learning_rate": 9.9579428355962e-06, "loss": 0.5542, "step": 4657 }, { "epoch": 0.13723612686534184, "grad_norm": 1.6475960260292943, "learning_rate": 9.957876251363723e-06, "loss": 0.6018, "step": 4658 }, { "epoch": 0.13726558932280541, "grad_norm": 1.700211940530597, "learning_rate": 9.957809614688345e-06, "loss": 0.5915, "step": 4659 }, { "epoch": 0.137295051780269, "grad_norm": 2.094540894942581, "learning_rate": 9.957742925570767e-06, "loss": 0.4902, "step": 4660 }, { "epoch": 0.13732451423773256, "grad_norm": 1.8386042233958138, "learning_rate": 9.957676184011693e-06, "loss": 0.5476, "step": 4661 }, { "epoch": 0.13735397669519614, "grad_norm": 1.6695807972154277, "learning_rate": 9.957609390011834e-06, "loss": 0.5898, "step": 4662 }, { "epoch": 0.13738343915265971, "grad_norm": 1.812398024044078, "learning_rate": 9.957542543571892e-06, "loss": 0.567, "step": 4663 }, { "epoch": 0.1374129016101233, "grad_norm": 1.760157682822354, "learning_rate": 9.957475644692576e-06, "loss": 0.533, "step": 4664 }, { "epoch": 0.13744236406758686, "grad_norm": 1.8990390895403948, "learning_rate": 9.957408693374593e-06, "loss": 0.6921, "step": 4665 }, { "epoch": 0.13747182652505047, "grad_norm": 1.7655341953992925, "learning_rate": 9.957341689618653e-06, "loss": 0.8106, "step": 4666 }, { "epoch": 0.13750128898251404, "grad_norm": 1.7629589510395327, "learning_rate": 9.957274633425464e-06, "loss": 0.6387, "step": 4667 }, { "epoch": 0.13753075143997762, "grad_norm": 1.8262934318960369, "learning_rate": 9.957207524795733e-06, "loss": 0.5746, "step": 4668 }, { "epoch": 0.1375602138974412, "grad_norm": 1.7524230916992212, "learning_rate": 9.957140363730174e-06, "loss": 0.6008, "step": 4669 }, { "epoch": 0.13758967635490477, "grad_norm": 1.8300853595851594, "learning_rate": 9.957073150229492e-06, "loss": 0.582, "step": 4670 }, { "epoch": 0.13761913881236834, "grad_norm": 1.7105617917018026, "learning_rate": 9.957005884294401e-06, "loss": 0.458, "step": 4671 }, { "epoch": 0.13764860126983192, "grad_norm": 1.8427208272113196, "learning_rate": 9.956938565925614e-06, "loss": 0.6204, "step": 4672 }, { "epoch": 0.1376780637272955, "grad_norm": 1.7353944373358465, "learning_rate": 9.956871195123841e-06, "loss": 0.5778, "step": 4673 }, { "epoch": 0.13770752618475907, "grad_norm": 1.881804631611964, "learning_rate": 9.956803771889796e-06, "loss": 0.5137, "step": 4674 }, { "epoch": 0.13773698864222264, "grad_norm": 1.6118370540664286, "learning_rate": 9.95673629622419e-06, "loss": 0.5231, "step": 4675 }, { "epoch": 0.13776645109968622, "grad_norm": 1.7932268868815104, "learning_rate": 9.95666876812774e-06, "loss": 0.4675, "step": 4676 }, { "epoch": 0.1377959135571498, "grad_norm": 1.940883729504556, "learning_rate": 9.956601187601155e-06, "loss": 0.5118, "step": 4677 }, { "epoch": 0.13782537601461337, "grad_norm": 1.5841225855488863, "learning_rate": 9.956533554645157e-06, "loss": 0.3779, "step": 4678 }, { "epoch": 0.13785483847207697, "grad_norm": 1.733336824783931, "learning_rate": 9.956465869260457e-06, "loss": 0.4739, "step": 4679 }, { "epoch": 0.13788430092954054, "grad_norm": 1.585777343266322, "learning_rate": 9.956398131447768e-06, "loss": 0.5824, "step": 4680 }, { "epoch": 0.13791376338700412, "grad_norm": 1.9358248650021248, "learning_rate": 9.956330341207814e-06, "loss": 0.6784, "step": 4681 }, { "epoch": 0.1379432258444677, "grad_norm": 1.8693920636911072, "learning_rate": 9.956262498541307e-06, "loss": 0.6109, "step": 4682 }, { "epoch": 0.13797268830193127, "grad_norm": 1.8807242763224994, "learning_rate": 9.956194603448965e-06, "loss": 0.5799, "step": 4683 }, { "epoch": 0.13800215075939484, "grad_norm": 1.5453906030918036, "learning_rate": 9.956126655931507e-06, "loss": 0.5642, "step": 4684 }, { "epoch": 0.13803161321685842, "grad_norm": 1.6409434068021773, "learning_rate": 9.956058655989653e-06, "loss": 0.3789, "step": 4685 }, { "epoch": 0.138061075674322, "grad_norm": 1.6577440054049555, "learning_rate": 9.95599060362412e-06, "loss": 0.5529, "step": 4686 }, { "epoch": 0.13809053813178557, "grad_norm": 1.9166006391192092, "learning_rate": 9.955922498835628e-06, "loss": 0.6306, "step": 4687 }, { "epoch": 0.13812000058924914, "grad_norm": 1.9828103026661772, "learning_rate": 9.9558543416249e-06, "loss": 0.5221, "step": 4688 }, { "epoch": 0.13814946304671272, "grad_norm": 2.022654111184564, "learning_rate": 9.955786131992653e-06, "loss": 0.576, "step": 4689 }, { "epoch": 0.1381789255041763, "grad_norm": 1.883215987909713, "learning_rate": 9.955717869939612e-06, "loss": 0.621, "step": 4690 }, { "epoch": 0.13820838796163987, "grad_norm": 1.6899792094471882, "learning_rate": 9.955649555466498e-06, "loss": 0.5203, "step": 4691 }, { "epoch": 0.13823785041910347, "grad_norm": 1.8542925497726612, "learning_rate": 9.955581188574032e-06, "loss": 0.6443, "step": 4692 }, { "epoch": 0.13826731287656704, "grad_norm": 1.4944234761429214, "learning_rate": 9.955512769262938e-06, "loss": 0.4413, "step": 4693 }, { "epoch": 0.13829677533403062, "grad_norm": 1.654204284199911, "learning_rate": 9.955444297533943e-06, "loss": 0.5328, "step": 4694 }, { "epoch": 0.1383262377914942, "grad_norm": 1.7559424267952486, "learning_rate": 9.955375773387765e-06, "loss": 0.5321, "step": 4695 }, { "epoch": 0.13835570024895777, "grad_norm": 1.6791960515984217, "learning_rate": 9.955307196825134e-06, "loss": 0.5239, "step": 4696 }, { "epoch": 0.13838516270642134, "grad_norm": 1.7786602355499581, "learning_rate": 9.955238567846774e-06, "loss": 0.6727, "step": 4697 }, { "epoch": 0.13841462516388492, "grad_norm": 1.8499933779530147, "learning_rate": 9.955169886453411e-06, "loss": 0.6721, "step": 4698 }, { "epoch": 0.1384440876213485, "grad_norm": 1.9093482940216808, "learning_rate": 9.95510115264577e-06, "loss": 0.5628, "step": 4699 }, { "epoch": 0.13847355007881207, "grad_norm": 1.7785891514770844, "learning_rate": 9.955032366424579e-06, "loss": 0.5756, "step": 4700 }, { "epoch": 0.13850301253627564, "grad_norm": 1.81113768449108, "learning_rate": 9.954963527790567e-06, "loss": 0.619, "step": 4701 }, { "epoch": 0.13853247499373922, "grad_norm": 1.6191070069718094, "learning_rate": 9.954894636744459e-06, "loss": 0.4637, "step": 4702 }, { "epoch": 0.1385619374512028, "grad_norm": 1.947756787411793, "learning_rate": 9.954825693286987e-06, "loss": 0.7005, "step": 4703 }, { "epoch": 0.13859139990866637, "grad_norm": 1.6970947555416602, "learning_rate": 9.95475669741888e-06, "loss": 0.5017, "step": 4704 }, { "epoch": 0.13862086236612997, "grad_norm": 1.6905293537411432, "learning_rate": 9.954687649140864e-06, "loss": 0.5187, "step": 4705 }, { "epoch": 0.13865032482359355, "grad_norm": 1.9616286707817174, "learning_rate": 9.954618548453673e-06, "loss": 0.5592, "step": 4706 }, { "epoch": 0.13867978728105712, "grad_norm": 1.9027097136052056, "learning_rate": 9.954549395358037e-06, "loss": 0.5428, "step": 4707 }, { "epoch": 0.1387092497385207, "grad_norm": 1.9582715980097596, "learning_rate": 9.954480189854688e-06, "loss": 0.4297, "step": 4708 }, { "epoch": 0.13873871219598427, "grad_norm": 1.5696275159477395, "learning_rate": 9.954410931944356e-06, "loss": 0.5309, "step": 4709 }, { "epoch": 0.13876817465344785, "grad_norm": 1.825201473683876, "learning_rate": 9.954341621627777e-06, "loss": 0.5597, "step": 4710 }, { "epoch": 0.13879763711091142, "grad_norm": 1.8774586380699765, "learning_rate": 9.95427225890568e-06, "loss": 0.5669, "step": 4711 }, { "epoch": 0.138827099568375, "grad_norm": 1.7484869156711635, "learning_rate": 9.954202843778803e-06, "loss": 0.5507, "step": 4712 }, { "epoch": 0.13885656202583857, "grad_norm": 2.0102047852232166, "learning_rate": 9.954133376247878e-06, "loss": 0.585, "step": 4713 }, { "epoch": 0.13888602448330215, "grad_norm": 1.7535383934521556, "learning_rate": 9.954063856313639e-06, "loss": 0.5501, "step": 4714 }, { "epoch": 0.13891548694076572, "grad_norm": 1.7362342446414714, "learning_rate": 9.953994283976822e-06, "loss": 0.556, "step": 4715 }, { "epoch": 0.1389449493982293, "grad_norm": 1.6372066080242262, "learning_rate": 9.953924659238164e-06, "loss": 0.5221, "step": 4716 }, { "epoch": 0.13897441185569287, "grad_norm": 1.9305054644207962, "learning_rate": 9.9538549820984e-06, "loss": 0.6373, "step": 4717 }, { "epoch": 0.13900387431315647, "grad_norm": 1.76716939787225, "learning_rate": 9.953785252558269e-06, "loss": 0.565, "step": 4718 }, { "epoch": 0.13903333677062005, "grad_norm": 2.244152164806645, "learning_rate": 9.953715470618507e-06, "loss": 0.7671, "step": 4719 }, { "epoch": 0.13906279922808362, "grad_norm": 1.840616491464542, "learning_rate": 9.953645636279851e-06, "loss": 0.4384, "step": 4720 }, { "epoch": 0.1390922616855472, "grad_norm": 1.6087593364488015, "learning_rate": 9.953575749543044e-06, "loss": 0.5062, "step": 4721 }, { "epoch": 0.13912172414301077, "grad_norm": 1.6461134982431465, "learning_rate": 9.95350581040882e-06, "loss": 0.5232, "step": 4722 }, { "epoch": 0.13915118660047435, "grad_norm": 1.5771464311878434, "learning_rate": 9.95343581887792e-06, "loss": 0.4748, "step": 4723 }, { "epoch": 0.13918064905793792, "grad_norm": 1.6314927253272822, "learning_rate": 9.953365774951088e-06, "loss": 0.5117, "step": 4724 }, { "epoch": 0.1392101115154015, "grad_norm": 1.6645559083899608, "learning_rate": 9.953295678629063e-06, "loss": 0.4399, "step": 4725 }, { "epoch": 0.13923957397286507, "grad_norm": 1.7176733037743601, "learning_rate": 9.953225529912585e-06, "loss": 0.5791, "step": 4726 }, { "epoch": 0.13926903643032865, "grad_norm": 1.6211348340917668, "learning_rate": 9.953155328802395e-06, "loss": 0.5865, "step": 4727 }, { "epoch": 0.13929849888779222, "grad_norm": 1.921301051784905, "learning_rate": 9.95308507529924e-06, "loss": 0.5617, "step": 4728 }, { "epoch": 0.1393279613452558, "grad_norm": 1.9340472338939272, "learning_rate": 9.95301476940386e-06, "loss": 0.658, "step": 4729 }, { "epoch": 0.13935742380271937, "grad_norm": 1.6548331001296765, "learning_rate": 9.952944411116998e-06, "loss": 0.4625, "step": 4730 }, { "epoch": 0.13938688626018297, "grad_norm": 1.6023346849267537, "learning_rate": 9.952874000439401e-06, "loss": 0.466, "step": 4731 }, { "epoch": 0.13941634871764655, "grad_norm": 1.5799924998807828, "learning_rate": 9.95280353737181e-06, "loss": 0.5723, "step": 4732 }, { "epoch": 0.13944581117511012, "grad_norm": 1.6142462783930838, "learning_rate": 9.952733021914976e-06, "loss": 0.5262, "step": 4733 }, { "epoch": 0.1394752736325737, "grad_norm": 1.8412318029748402, "learning_rate": 9.95266245406964e-06, "loss": 0.7905, "step": 4734 }, { "epoch": 0.13950473609003727, "grad_norm": 1.8838321703947523, "learning_rate": 9.952591833836547e-06, "loss": 0.5988, "step": 4735 }, { "epoch": 0.13953419854750085, "grad_norm": 1.5685061134590812, "learning_rate": 9.95252116121645e-06, "loss": 0.4427, "step": 4736 }, { "epoch": 0.13956366100496442, "grad_norm": 1.9601181270661183, "learning_rate": 9.952450436210092e-06, "loss": 0.6922, "step": 4737 }, { "epoch": 0.139593123462428, "grad_norm": 1.69207022851335, "learning_rate": 9.952379658818226e-06, "loss": 0.5783, "step": 4738 }, { "epoch": 0.13962258591989157, "grad_norm": 1.6934594885013452, "learning_rate": 9.952308829041593e-06, "loss": 0.5801, "step": 4739 }, { "epoch": 0.13965204837735515, "grad_norm": 1.636259186083265, "learning_rate": 9.95223794688095e-06, "loss": 0.5604, "step": 4740 }, { "epoch": 0.13968151083481872, "grad_norm": 1.9579090651152717, "learning_rate": 9.95216701233704e-06, "loss": 0.4533, "step": 4741 }, { "epoch": 0.1397109732922823, "grad_norm": 1.6867087050671767, "learning_rate": 9.952096025410617e-06, "loss": 0.603, "step": 4742 }, { "epoch": 0.13974043574974587, "grad_norm": 1.520211828037916, "learning_rate": 9.952024986102435e-06, "loss": 0.459, "step": 4743 }, { "epoch": 0.13976989820720948, "grad_norm": 1.9126491132920158, "learning_rate": 9.951953894413238e-06, "loss": 0.4851, "step": 4744 }, { "epoch": 0.13979936066467305, "grad_norm": 1.6044776832905892, "learning_rate": 9.951882750343783e-06, "loss": 0.5189, "step": 4745 }, { "epoch": 0.13982882312213663, "grad_norm": 1.53492250983646, "learning_rate": 9.951811553894823e-06, "loss": 0.4576, "step": 4746 }, { "epoch": 0.1398582855796002, "grad_norm": 2.097347946778846, "learning_rate": 9.951740305067108e-06, "loss": 0.342, "step": 4747 }, { "epoch": 0.13988774803706377, "grad_norm": 1.7766979930248037, "learning_rate": 9.951669003861393e-06, "loss": 0.7011, "step": 4748 }, { "epoch": 0.13991721049452735, "grad_norm": 1.5719822708719169, "learning_rate": 9.951597650278434e-06, "loss": 0.543, "step": 4749 }, { "epoch": 0.13994667295199092, "grad_norm": 1.7015309750015797, "learning_rate": 9.95152624431898e-06, "loss": 0.5051, "step": 4750 }, { "epoch": 0.1399761354094545, "grad_norm": 1.4996015163722876, "learning_rate": 9.951454785983793e-06, "loss": 0.4122, "step": 4751 }, { "epoch": 0.14000559786691807, "grad_norm": 1.6544946456152783, "learning_rate": 9.951383275273627e-06, "loss": 0.4917, "step": 4752 }, { "epoch": 0.14003506032438165, "grad_norm": 1.6633539188770812, "learning_rate": 9.951311712189238e-06, "loss": 0.6159, "step": 4753 }, { "epoch": 0.14006452278184522, "grad_norm": 1.8865003991185867, "learning_rate": 9.951240096731383e-06, "loss": 0.6187, "step": 4754 }, { "epoch": 0.1400939852393088, "grad_norm": 1.7974791302757867, "learning_rate": 9.951168428900818e-06, "loss": 0.5308, "step": 4755 }, { "epoch": 0.14012344769677237, "grad_norm": 1.690627701853079, "learning_rate": 9.9510967086983e-06, "loss": 0.3695, "step": 4756 }, { "epoch": 0.14015291015423598, "grad_norm": 1.612614475208075, "learning_rate": 9.951024936124593e-06, "loss": 0.5235, "step": 4757 }, { "epoch": 0.14018237261169955, "grad_norm": 1.6372256483789, "learning_rate": 9.950953111180452e-06, "loss": 0.5219, "step": 4758 }, { "epoch": 0.14021183506916313, "grad_norm": 2.0215622920786194, "learning_rate": 9.950881233866638e-06, "loss": 0.5969, "step": 4759 }, { "epoch": 0.1402412975266267, "grad_norm": 1.5420232488390682, "learning_rate": 9.950809304183913e-06, "loss": 0.4475, "step": 4760 }, { "epoch": 0.14027075998409028, "grad_norm": 1.8292941185528642, "learning_rate": 9.950737322133032e-06, "loss": 0.5771, "step": 4761 }, { "epoch": 0.14030022244155385, "grad_norm": 1.4591483998542263, "learning_rate": 9.950665287714765e-06, "loss": 0.4405, "step": 4762 }, { "epoch": 0.14032968489901743, "grad_norm": 1.5550988347150554, "learning_rate": 9.950593200929866e-06, "loss": 0.5429, "step": 4763 }, { "epoch": 0.140359147356481, "grad_norm": 1.623966807672056, "learning_rate": 9.950521061779102e-06, "loss": 0.4895, "step": 4764 }, { "epoch": 0.14038860981394458, "grad_norm": 1.5836422757613364, "learning_rate": 9.950448870263234e-06, "loss": 0.4206, "step": 4765 }, { "epoch": 0.14041807227140815, "grad_norm": 1.8020839113644491, "learning_rate": 9.950376626383028e-06, "loss": 0.5078, "step": 4766 }, { "epoch": 0.14044753472887173, "grad_norm": 1.6932118880014604, "learning_rate": 9.950304330139246e-06, "loss": 0.5343, "step": 4767 }, { "epoch": 0.1404769971863353, "grad_norm": 1.7626439734289048, "learning_rate": 9.950231981532655e-06, "loss": 0.5316, "step": 4768 }, { "epoch": 0.14050645964379888, "grad_norm": 1.8016678867329217, "learning_rate": 9.950159580564017e-06, "loss": 0.5491, "step": 4769 }, { "epoch": 0.14053592210126248, "grad_norm": 1.6384496620133409, "learning_rate": 9.950087127234099e-06, "loss": 0.5372, "step": 4770 }, { "epoch": 0.14056538455872605, "grad_norm": 1.8191688471985408, "learning_rate": 9.950014621543669e-06, "loss": 0.6275, "step": 4771 }, { "epoch": 0.14059484701618963, "grad_norm": 1.9067810598213786, "learning_rate": 9.949942063493492e-06, "loss": 0.553, "step": 4772 }, { "epoch": 0.1406243094736532, "grad_norm": 1.9032508988927446, "learning_rate": 9.949869453084337e-06, "loss": 0.6175, "step": 4773 }, { "epoch": 0.14065377193111678, "grad_norm": 1.6634669060570972, "learning_rate": 9.949796790316973e-06, "loss": 0.5189, "step": 4774 }, { "epoch": 0.14068323438858035, "grad_norm": 1.6787950465623878, "learning_rate": 9.949724075192165e-06, "loss": 0.4624, "step": 4775 }, { "epoch": 0.14071269684604393, "grad_norm": 1.5579268456391087, "learning_rate": 9.949651307710683e-06, "loss": 0.4299, "step": 4776 }, { "epoch": 0.1407421593035075, "grad_norm": 1.6348467189389277, "learning_rate": 9.9495784878733e-06, "loss": 0.5138, "step": 4777 }, { "epoch": 0.14077162176097108, "grad_norm": 2.041120038583171, "learning_rate": 9.949505615680784e-06, "loss": 0.5029, "step": 4778 }, { "epoch": 0.14080108421843465, "grad_norm": 1.8765406510156426, "learning_rate": 9.949432691133907e-06, "loss": 0.451, "step": 4779 }, { "epoch": 0.14083054667589823, "grad_norm": 1.7077876247572128, "learning_rate": 9.949359714233437e-06, "loss": 0.5886, "step": 4780 }, { "epoch": 0.1408600091333618, "grad_norm": 1.7641143951652705, "learning_rate": 9.94928668498015e-06, "loss": 0.566, "step": 4781 }, { "epoch": 0.14088947159082538, "grad_norm": 1.6885285910767336, "learning_rate": 9.949213603374816e-06, "loss": 0.4159, "step": 4782 }, { "epoch": 0.14091893404828898, "grad_norm": 1.6368213264422666, "learning_rate": 9.949140469418211e-06, "loss": 0.4864, "step": 4783 }, { "epoch": 0.14094839650575255, "grad_norm": 1.5935681547056197, "learning_rate": 9.949067283111104e-06, "loss": 0.5897, "step": 4784 }, { "epoch": 0.14097785896321613, "grad_norm": 1.8098333070209935, "learning_rate": 9.948994044454272e-06, "loss": 0.6496, "step": 4785 }, { "epoch": 0.1410073214206797, "grad_norm": 1.5026464798180874, "learning_rate": 9.948920753448489e-06, "loss": 0.4467, "step": 4786 }, { "epoch": 0.14103678387814328, "grad_norm": 1.6682410858568575, "learning_rate": 9.94884741009453e-06, "loss": 0.5956, "step": 4787 }, { "epoch": 0.14106624633560685, "grad_norm": 1.6407752609453958, "learning_rate": 9.948774014393173e-06, "loss": 0.514, "step": 4788 }, { "epoch": 0.14109570879307043, "grad_norm": 1.6447272862136333, "learning_rate": 9.948700566345192e-06, "loss": 0.4642, "step": 4789 }, { "epoch": 0.141125171250534, "grad_norm": 2.0042583835278838, "learning_rate": 9.948627065951365e-06, "loss": 0.5387, "step": 4790 }, { "epoch": 0.14115463370799758, "grad_norm": 1.8443377165738755, "learning_rate": 9.948553513212468e-06, "loss": 0.5554, "step": 4791 }, { "epoch": 0.14118409616546115, "grad_norm": 1.7727913623878242, "learning_rate": 9.94847990812928e-06, "loss": 0.6213, "step": 4792 }, { "epoch": 0.14121355862292473, "grad_norm": 1.6424724290550148, "learning_rate": 9.94840625070258e-06, "loss": 0.5015, "step": 4793 }, { "epoch": 0.1412430210803883, "grad_norm": 1.8540371538052176, "learning_rate": 9.948332540933148e-06, "loss": 0.5047, "step": 4794 }, { "epoch": 0.14127248353785188, "grad_norm": 1.8538162664122102, "learning_rate": 9.948258778821761e-06, "loss": 0.6173, "step": 4795 }, { "epoch": 0.14130194599531548, "grad_norm": 1.7363669191857694, "learning_rate": 9.948184964369201e-06, "loss": 0.4577, "step": 4796 }, { "epoch": 0.14133140845277906, "grad_norm": 1.73469731731363, "learning_rate": 9.948111097576248e-06, "loss": 0.5975, "step": 4797 }, { "epoch": 0.14136087091024263, "grad_norm": 1.5742241299759754, "learning_rate": 9.948037178443685e-06, "loss": 0.5006, "step": 4798 }, { "epoch": 0.1413903333677062, "grad_norm": 1.5715160091105629, "learning_rate": 9.947963206972291e-06, "loss": 0.4955, "step": 4799 }, { "epoch": 0.14141979582516978, "grad_norm": 1.8125042502559683, "learning_rate": 9.947889183162852e-06, "loss": 0.592, "step": 4800 }, { "epoch": 0.14144925828263336, "grad_norm": 1.9932314464180638, "learning_rate": 9.947815107016148e-06, "loss": 0.532, "step": 4801 }, { "epoch": 0.14147872074009693, "grad_norm": 1.8297368074815277, "learning_rate": 9.947740978532965e-06, "loss": 0.6576, "step": 4802 }, { "epoch": 0.1415081831975605, "grad_norm": 1.6758985514444413, "learning_rate": 9.947666797714085e-06, "loss": 0.4566, "step": 4803 }, { "epoch": 0.14153764565502408, "grad_norm": 1.917377266529733, "learning_rate": 9.947592564560293e-06, "loss": 0.5915, "step": 4804 }, { "epoch": 0.14156710811248766, "grad_norm": 1.7237446364554658, "learning_rate": 9.947518279072374e-06, "loss": 0.5488, "step": 4805 }, { "epoch": 0.14159657056995123, "grad_norm": 1.778415517401362, "learning_rate": 9.947443941251116e-06, "loss": 0.5574, "step": 4806 }, { "epoch": 0.1416260330274148, "grad_norm": 1.6335945065610697, "learning_rate": 9.947369551097304e-06, "loss": 0.5645, "step": 4807 }, { "epoch": 0.14165549548487838, "grad_norm": 1.8194192931803406, "learning_rate": 9.947295108611723e-06, "loss": 0.5647, "step": 4808 }, { "epoch": 0.14168495794234198, "grad_norm": 1.702346652734173, "learning_rate": 9.947220613795163e-06, "loss": 0.611, "step": 4809 }, { "epoch": 0.14171442039980556, "grad_norm": 1.8571953118848534, "learning_rate": 9.94714606664841e-06, "loss": 0.5355, "step": 4810 }, { "epoch": 0.14174388285726913, "grad_norm": 2.135790598424169, "learning_rate": 9.947071467172254e-06, "loss": 0.711, "step": 4811 }, { "epoch": 0.1417733453147327, "grad_norm": 1.788857890072243, "learning_rate": 9.946996815367485e-06, "loss": 0.5285, "step": 4812 }, { "epoch": 0.14180280777219628, "grad_norm": 1.7421815117110842, "learning_rate": 9.94692211123489e-06, "loss": 0.5478, "step": 4813 }, { "epoch": 0.14183227022965986, "grad_norm": 1.6558307833171437, "learning_rate": 9.946847354775262e-06, "loss": 0.5148, "step": 4814 }, { "epoch": 0.14186173268712343, "grad_norm": 1.803110379757397, "learning_rate": 9.946772545989389e-06, "loss": 0.4946, "step": 4815 }, { "epoch": 0.141891195144587, "grad_norm": 1.9049016765302418, "learning_rate": 9.946697684878064e-06, "loss": 0.5433, "step": 4816 }, { "epoch": 0.14192065760205058, "grad_norm": 1.592088005063122, "learning_rate": 9.946622771442078e-06, "loss": 0.4982, "step": 4817 }, { "epoch": 0.14195012005951416, "grad_norm": 1.8226901369120287, "learning_rate": 9.946547805682226e-06, "loss": 0.4418, "step": 4818 }, { "epoch": 0.14197958251697773, "grad_norm": 1.5966121626848742, "learning_rate": 9.946472787599295e-06, "loss": 0.4969, "step": 4819 }, { "epoch": 0.1420090449744413, "grad_norm": 1.6332092548862363, "learning_rate": 9.946397717194084e-06, "loss": 0.6152, "step": 4820 }, { "epoch": 0.1420385074319049, "grad_norm": 1.5833043226571775, "learning_rate": 9.946322594467387e-06, "loss": 0.5783, "step": 4821 }, { "epoch": 0.14206796988936848, "grad_norm": 1.658119644760873, "learning_rate": 9.946247419419995e-06, "loss": 0.4998, "step": 4822 }, { "epoch": 0.14209743234683206, "grad_norm": 1.628924630565989, "learning_rate": 9.946172192052706e-06, "loss": 0.512, "step": 4823 }, { "epoch": 0.14212689480429563, "grad_norm": 1.5703853412490245, "learning_rate": 9.946096912366315e-06, "loss": 0.5539, "step": 4824 }, { "epoch": 0.1421563572617592, "grad_norm": 1.6034752894269195, "learning_rate": 9.946021580361619e-06, "loss": 0.4393, "step": 4825 }, { "epoch": 0.14218581971922278, "grad_norm": 1.896430227209399, "learning_rate": 9.945946196039413e-06, "loss": 0.6108, "step": 4826 }, { "epoch": 0.14221528217668636, "grad_norm": 1.58530278778818, "learning_rate": 9.945870759400496e-06, "loss": 0.43, "step": 4827 }, { "epoch": 0.14224474463414993, "grad_norm": 1.619844438480201, "learning_rate": 9.945795270445665e-06, "loss": 0.3443, "step": 4828 }, { "epoch": 0.1422742070916135, "grad_norm": 1.7342000488498086, "learning_rate": 9.94571972917572e-06, "loss": 0.4978, "step": 4829 }, { "epoch": 0.14230366954907708, "grad_norm": 1.6641165727447167, "learning_rate": 9.945644135591458e-06, "loss": 0.4611, "step": 4830 }, { "epoch": 0.14233313200654066, "grad_norm": 1.5184888926211133, "learning_rate": 9.945568489693678e-06, "loss": 0.5667, "step": 4831 }, { "epoch": 0.14236259446400423, "grad_norm": 1.649258072688183, "learning_rate": 9.945492791483185e-06, "loss": 0.5904, "step": 4832 }, { "epoch": 0.1423920569214678, "grad_norm": 1.6991299012193293, "learning_rate": 9.945417040960775e-06, "loss": 0.5615, "step": 4833 }, { "epoch": 0.1424215193789314, "grad_norm": 1.7585790484323585, "learning_rate": 9.94534123812725e-06, "loss": 0.5615, "step": 4834 }, { "epoch": 0.14245098183639499, "grad_norm": 1.7427717036726271, "learning_rate": 9.945265382983411e-06, "loss": 0.5741, "step": 4835 }, { "epoch": 0.14248044429385856, "grad_norm": 1.8135394331925783, "learning_rate": 9.945189475530066e-06, "loss": 0.5614, "step": 4836 }, { "epoch": 0.14250990675132214, "grad_norm": 1.7339141191931067, "learning_rate": 9.94511351576801e-06, "loss": 0.3821, "step": 4837 }, { "epoch": 0.1425393692087857, "grad_norm": 1.7596755693815627, "learning_rate": 9.945037503698052e-06, "loss": 0.6062, "step": 4838 }, { "epoch": 0.14256883166624928, "grad_norm": 1.8394234352389152, "learning_rate": 9.944961439320994e-06, "loss": 0.6329, "step": 4839 }, { "epoch": 0.14259829412371286, "grad_norm": 1.640689417939809, "learning_rate": 9.944885322637642e-06, "loss": 0.4933, "step": 4840 }, { "epoch": 0.14262775658117643, "grad_norm": 1.7547280279148996, "learning_rate": 9.9448091536488e-06, "loss": 0.5034, "step": 4841 }, { "epoch": 0.14265721903864, "grad_norm": 1.6212998511770629, "learning_rate": 9.944732932355273e-06, "loss": 0.5145, "step": 4842 }, { "epoch": 0.14268668149610358, "grad_norm": 1.656658762339579, "learning_rate": 9.944656658757868e-06, "loss": 0.5397, "step": 4843 }, { "epoch": 0.14271614395356716, "grad_norm": 1.5835064883074024, "learning_rate": 9.944580332857392e-06, "loss": 0.5298, "step": 4844 }, { "epoch": 0.14274560641103073, "grad_norm": 1.7470076928076221, "learning_rate": 9.944503954654652e-06, "loss": 0.621, "step": 4845 }, { "epoch": 0.1427750688684943, "grad_norm": 1.7509268381300458, "learning_rate": 9.944427524150455e-06, "loss": 0.578, "step": 4846 }, { "epoch": 0.1428045313259579, "grad_norm": 1.706126025564426, "learning_rate": 9.944351041345613e-06, "loss": 0.5562, "step": 4847 }, { "epoch": 0.1428339937834215, "grad_norm": 1.623831328302413, "learning_rate": 9.944274506240931e-06, "loss": 0.4046, "step": 4848 }, { "epoch": 0.14286345624088506, "grad_norm": 1.6291409586286898, "learning_rate": 9.94419791883722e-06, "loss": 0.5598, "step": 4849 }, { "epoch": 0.14289291869834864, "grad_norm": 1.6477623060795745, "learning_rate": 9.94412127913529e-06, "loss": 0.4814, "step": 4850 }, { "epoch": 0.1429223811558122, "grad_norm": 1.7704555040739374, "learning_rate": 9.944044587135953e-06, "loss": 0.584, "step": 4851 }, { "epoch": 0.1429518436132758, "grad_norm": 1.5839333833881963, "learning_rate": 9.943967842840019e-06, "loss": 0.5273, "step": 4852 }, { "epoch": 0.14298130607073936, "grad_norm": 1.5996865575506025, "learning_rate": 9.943891046248299e-06, "loss": 0.4488, "step": 4853 }, { "epoch": 0.14301076852820294, "grad_norm": 1.8474987819826234, "learning_rate": 9.943814197361607e-06, "loss": 0.596, "step": 4854 }, { "epoch": 0.1430402309856665, "grad_norm": 1.7477132773717732, "learning_rate": 9.943737296180753e-06, "loss": 0.492, "step": 4855 }, { "epoch": 0.14306969344313009, "grad_norm": 1.5917876572390488, "learning_rate": 9.943660342706556e-06, "loss": 0.6978, "step": 4856 }, { "epoch": 0.14309915590059366, "grad_norm": 1.576491105165362, "learning_rate": 9.943583336939824e-06, "loss": 0.5082, "step": 4857 }, { "epoch": 0.14312861835805724, "grad_norm": 1.534515413139826, "learning_rate": 9.943506278881375e-06, "loss": 0.4451, "step": 4858 }, { "epoch": 0.1431580808155208, "grad_norm": 1.684479877876914, "learning_rate": 9.943429168532021e-06, "loss": 0.5822, "step": 4859 }, { "epoch": 0.1431875432729844, "grad_norm": 1.9995949116531635, "learning_rate": 9.943352005892584e-06, "loss": 0.7781, "step": 4860 }, { "epoch": 0.143217005730448, "grad_norm": 1.6903805747314502, "learning_rate": 9.943274790963873e-06, "loss": 0.4566, "step": 4861 }, { "epoch": 0.14324646818791156, "grad_norm": 1.6854620306326222, "learning_rate": 9.943197523746709e-06, "loss": 0.473, "step": 4862 }, { "epoch": 0.14327593064537514, "grad_norm": 1.927818730841035, "learning_rate": 9.943120204241908e-06, "loss": 0.6715, "step": 4863 }, { "epoch": 0.1433053931028387, "grad_norm": 1.7001657750087276, "learning_rate": 9.943042832450287e-06, "loss": 0.5889, "step": 4864 }, { "epoch": 0.1433348555603023, "grad_norm": 1.636575067371134, "learning_rate": 9.942965408372665e-06, "loss": 0.5445, "step": 4865 }, { "epoch": 0.14336431801776586, "grad_norm": 1.7280103673438676, "learning_rate": 9.942887932009863e-06, "loss": 0.4546, "step": 4866 }, { "epoch": 0.14339378047522944, "grad_norm": 1.75277934343977, "learning_rate": 9.942810403362697e-06, "loss": 0.5363, "step": 4867 }, { "epoch": 0.143423242932693, "grad_norm": 1.8596065072506909, "learning_rate": 9.942732822431991e-06, "loss": 0.5922, "step": 4868 }, { "epoch": 0.1434527053901566, "grad_norm": 1.7557091645743659, "learning_rate": 9.942655189218562e-06, "loss": 0.579, "step": 4869 }, { "epoch": 0.14348216784762016, "grad_norm": 1.6403876175292709, "learning_rate": 9.942577503723234e-06, "loss": 0.6329, "step": 4870 }, { "epoch": 0.14351163030508374, "grad_norm": 1.6435033213309962, "learning_rate": 9.942499765946826e-06, "loss": 0.5596, "step": 4871 }, { "epoch": 0.1435410927625473, "grad_norm": 1.882574017415962, "learning_rate": 9.942421975890163e-06, "loss": 0.6797, "step": 4872 }, { "epoch": 0.14357055522001091, "grad_norm": 2.026099102147135, "learning_rate": 9.942344133554065e-06, "loss": 0.5788, "step": 4873 }, { "epoch": 0.1436000176774745, "grad_norm": 1.5422485577957838, "learning_rate": 9.942266238939358e-06, "loss": 0.5478, "step": 4874 }, { "epoch": 0.14362948013493806, "grad_norm": 1.9130037282023122, "learning_rate": 9.942188292046865e-06, "loss": 0.5439, "step": 4875 }, { "epoch": 0.14365894259240164, "grad_norm": 1.7207183497832343, "learning_rate": 9.942110292877411e-06, "loss": 0.6676, "step": 4876 }, { "epoch": 0.14368840504986521, "grad_norm": 1.556890537577289, "learning_rate": 9.94203224143182e-06, "loss": 0.4183, "step": 4877 }, { "epoch": 0.1437178675073288, "grad_norm": 1.7471371424437148, "learning_rate": 9.941954137710918e-06, "loss": 0.5333, "step": 4878 }, { "epoch": 0.14374732996479236, "grad_norm": 1.6517772123229388, "learning_rate": 9.941875981715532e-06, "loss": 0.6017, "step": 4879 }, { "epoch": 0.14377679242225594, "grad_norm": 1.8208027325796086, "learning_rate": 9.941797773446486e-06, "loss": 0.5494, "step": 4880 }, { "epoch": 0.14380625487971951, "grad_norm": 1.5695385976282534, "learning_rate": 9.941719512904611e-06, "loss": 0.5199, "step": 4881 }, { "epoch": 0.1438357173371831, "grad_norm": 1.7645723383305454, "learning_rate": 9.941641200090733e-06, "loss": 0.6908, "step": 4882 }, { "epoch": 0.14386517979464666, "grad_norm": 1.886791512940247, "learning_rate": 9.94156283500568e-06, "loss": 0.4683, "step": 4883 }, { "epoch": 0.14389464225211024, "grad_norm": 1.7265362898900487, "learning_rate": 9.94148441765028e-06, "loss": 0.5009, "step": 4884 }, { "epoch": 0.1439241047095738, "grad_norm": 1.548372602010598, "learning_rate": 9.941405948025365e-06, "loss": 0.4596, "step": 4885 }, { "epoch": 0.14395356716703742, "grad_norm": 1.6927365022868124, "learning_rate": 9.941327426131764e-06, "loss": 0.5781, "step": 4886 }, { "epoch": 0.143983029624501, "grad_norm": 1.7928684428275201, "learning_rate": 9.941248851970306e-06, "loss": 0.5674, "step": 4887 }, { "epoch": 0.14401249208196457, "grad_norm": 1.6774480117755644, "learning_rate": 9.941170225541825e-06, "loss": 0.4271, "step": 4888 }, { "epoch": 0.14404195453942814, "grad_norm": 1.7675021724105702, "learning_rate": 9.941091546847152e-06, "loss": 0.4689, "step": 4889 }, { "epoch": 0.14407141699689172, "grad_norm": 1.8521849807740218, "learning_rate": 9.941012815887117e-06, "loss": 0.549, "step": 4890 }, { "epoch": 0.1441008794543553, "grad_norm": 1.7829643816168674, "learning_rate": 9.940934032662554e-06, "loss": 0.5889, "step": 4891 }, { "epoch": 0.14413034191181887, "grad_norm": 1.680002436251056, "learning_rate": 9.940855197174296e-06, "loss": 0.5976, "step": 4892 }, { "epoch": 0.14415980436928244, "grad_norm": 1.9453276615478114, "learning_rate": 9.940776309423181e-06, "loss": 0.6156, "step": 4893 }, { "epoch": 0.14418926682674602, "grad_norm": 1.9469817523793131, "learning_rate": 9.940697369410036e-06, "loss": 0.5937, "step": 4894 }, { "epoch": 0.1442187292842096, "grad_norm": 1.6518915608597808, "learning_rate": 9.940618377135703e-06, "loss": 0.4767, "step": 4895 }, { "epoch": 0.14424819174167317, "grad_norm": 1.8561294494328207, "learning_rate": 9.940539332601013e-06, "loss": 0.4171, "step": 4896 }, { "epoch": 0.14427765419913674, "grad_norm": 1.7105384764003586, "learning_rate": 9.940460235806803e-06, "loss": 0.5579, "step": 4897 }, { "epoch": 0.14430711665660031, "grad_norm": 1.796427055974045, "learning_rate": 9.940381086753913e-06, "loss": 0.6113, "step": 4898 }, { "epoch": 0.14433657911406392, "grad_norm": 1.89478279331998, "learning_rate": 9.940301885443174e-06, "loss": 0.5363, "step": 4899 }, { "epoch": 0.1443660415715275, "grad_norm": 1.8315810741046237, "learning_rate": 9.940222631875429e-06, "loss": 0.595, "step": 4900 }, { "epoch": 0.14439550402899107, "grad_norm": 1.7216496564233954, "learning_rate": 9.940143326051514e-06, "loss": 0.5339, "step": 4901 }, { "epoch": 0.14442496648645464, "grad_norm": 1.7953751186535925, "learning_rate": 9.94006396797227e-06, "loss": 0.647, "step": 4902 }, { "epoch": 0.14445442894391822, "grad_norm": 1.6159616248852018, "learning_rate": 9.939984557638533e-06, "loss": 0.4847, "step": 4903 }, { "epoch": 0.1444838914013818, "grad_norm": 1.9393740004497049, "learning_rate": 9.939905095051145e-06, "loss": 0.4146, "step": 4904 }, { "epoch": 0.14451335385884537, "grad_norm": 1.671830089504506, "learning_rate": 9.939825580210948e-06, "loss": 0.5427, "step": 4905 }, { "epoch": 0.14454281631630894, "grad_norm": 1.5765006485785809, "learning_rate": 9.93974601311878e-06, "loss": 0.5025, "step": 4906 }, { "epoch": 0.14457227877377252, "grad_norm": 1.609979577463458, "learning_rate": 9.939666393775484e-06, "loss": 0.4998, "step": 4907 }, { "epoch": 0.1446017412312361, "grad_norm": 1.5436487881184564, "learning_rate": 9.939586722181904e-06, "loss": 0.572, "step": 4908 }, { "epoch": 0.14463120368869967, "grad_norm": 1.7083262350477293, "learning_rate": 9.93950699833888e-06, "loss": 0.6408, "step": 4909 }, { "epoch": 0.14466066614616324, "grad_norm": 1.6422567849869454, "learning_rate": 9.939427222247255e-06, "loss": 0.4312, "step": 4910 }, { "epoch": 0.14469012860362682, "grad_norm": 1.7265095754470365, "learning_rate": 9.939347393907876e-06, "loss": 0.5256, "step": 4911 }, { "epoch": 0.14471959106109042, "grad_norm": 1.5330171041565326, "learning_rate": 9.939267513321585e-06, "loss": 0.5423, "step": 4912 }, { "epoch": 0.144749053518554, "grad_norm": 1.6853586196789987, "learning_rate": 9.939187580489227e-06, "loss": 0.5206, "step": 4913 }, { "epoch": 0.14477851597601757, "grad_norm": 1.5426657891471611, "learning_rate": 9.93910759541165e-06, "loss": 0.4479, "step": 4914 }, { "epoch": 0.14480797843348114, "grad_norm": 1.6567093680455727, "learning_rate": 9.939027558089695e-06, "loss": 0.5026, "step": 4915 }, { "epoch": 0.14483744089094472, "grad_norm": 1.6217446767549002, "learning_rate": 9.938947468524214e-06, "loss": 0.3905, "step": 4916 }, { "epoch": 0.1448669033484083, "grad_norm": 1.8459707998803416, "learning_rate": 9.938867326716051e-06, "loss": 0.7178, "step": 4917 }, { "epoch": 0.14489636580587187, "grad_norm": 1.663378225718936, "learning_rate": 9.938787132666054e-06, "loss": 0.6133, "step": 4918 }, { "epoch": 0.14492582826333544, "grad_norm": 1.7400152008304832, "learning_rate": 9.938706886375073e-06, "loss": 0.6829, "step": 4919 }, { "epoch": 0.14495529072079902, "grad_norm": 1.6265179863986152, "learning_rate": 9.938626587843957e-06, "loss": 0.6036, "step": 4920 }, { "epoch": 0.1449847531782626, "grad_norm": 1.702216572146001, "learning_rate": 9.938546237073552e-06, "loss": 0.4817, "step": 4921 }, { "epoch": 0.14501421563572617, "grad_norm": 1.8699788905984032, "learning_rate": 9.93846583406471e-06, "loss": 0.7514, "step": 4922 }, { "epoch": 0.14504367809318974, "grad_norm": 1.6602830437322056, "learning_rate": 9.938385378818281e-06, "loss": 0.591, "step": 4923 }, { "epoch": 0.14507314055065332, "grad_norm": 1.6812563318101592, "learning_rate": 9.938304871335119e-06, "loss": 0.5475, "step": 4924 }, { "epoch": 0.14510260300811692, "grad_norm": 1.7990962068536502, "learning_rate": 9.93822431161607e-06, "loss": 0.5767, "step": 4925 }, { "epoch": 0.1451320654655805, "grad_norm": 1.8384552765271973, "learning_rate": 9.938143699661991e-06, "loss": 0.4621, "step": 4926 }, { "epoch": 0.14516152792304407, "grad_norm": 1.6588553484673425, "learning_rate": 9.938063035473732e-06, "loss": 0.4665, "step": 4927 }, { "epoch": 0.14519099038050765, "grad_norm": 1.6594923132956074, "learning_rate": 9.937982319052149e-06, "loss": 0.5858, "step": 4928 }, { "epoch": 0.14522045283797122, "grad_norm": 1.8632552888125178, "learning_rate": 9.937901550398092e-06, "loss": 0.5955, "step": 4929 }, { "epoch": 0.1452499152954348, "grad_norm": 1.7952797593953722, "learning_rate": 9.937820729512418e-06, "loss": 0.4865, "step": 4930 }, { "epoch": 0.14527937775289837, "grad_norm": 1.5730190475962753, "learning_rate": 9.93773985639598e-06, "loss": 0.4566, "step": 4931 }, { "epoch": 0.14530884021036194, "grad_norm": 1.5523214517656672, "learning_rate": 9.937658931049637e-06, "loss": 0.6157, "step": 4932 }, { "epoch": 0.14533830266782552, "grad_norm": 1.6609416886377422, "learning_rate": 9.93757795347424e-06, "loss": 0.5018, "step": 4933 }, { "epoch": 0.1453677651252891, "grad_norm": 1.6471619675810036, "learning_rate": 9.93749692367065e-06, "loss": 0.5529, "step": 4934 }, { "epoch": 0.14539722758275267, "grad_norm": 1.719827887569577, "learning_rate": 9.937415841639721e-06, "loss": 0.5046, "step": 4935 }, { "epoch": 0.14542669004021624, "grad_norm": 1.8913956512105752, "learning_rate": 9.937334707382314e-06, "loss": 0.6014, "step": 4936 }, { "epoch": 0.14545615249767982, "grad_norm": 1.6516168355218839, "learning_rate": 9.937253520899284e-06, "loss": 0.4784, "step": 4937 }, { "epoch": 0.14548561495514342, "grad_norm": 1.701920241377983, "learning_rate": 9.93717228219149e-06, "loss": 0.579, "step": 4938 }, { "epoch": 0.145515077412607, "grad_norm": 1.9125128462835026, "learning_rate": 9.937090991259793e-06, "loss": 0.5693, "step": 4939 }, { "epoch": 0.14554453987007057, "grad_norm": 1.686152386573071, "learning_rate": 9.937009648105052e-06, "loss": 0.4408, "step": 4940 }, { "epoch": 0.14557400232753415, "grad_norm": 1.641186302977719, "learning_rate": 9.936928252728129e-06, "loss": 0.5855, "step": 4941 }, { "epoch": 0.14560346478499772, "grad_norm": 1.53752507289314, "learning_rate": 9.93684680512988e-06, "loss": 0.4058, "step": 4942 }, { "epoch": 0.1456329272424613, "grad_norm": 1.5904855439203769, "learning_rate": 9.936765305311173e-06, "loss": 0.354, "step": 4943 }, { "epoch": 0.14566238969992487, "grad_norm": 1.6969682420861418, "learning_rate": 9.936683753272867e-06, "loss": 0.5067, "step": 4944 }, { "epoch": 0.14569185215738845, "grad_norm": 1.49369510620977, "learning_rate": 9.936602149015824e-06, "loss": 0.4177, "step": 4945 }, { "epoch": 0.14572131461485202, "grad_norm": 1.6916186165855325, "learning_rate": 9.936520492540907e-06, "loss": 0.5039, "step": 4946 }, { "epoch": 0.1457507770723156, "grad_norm": 1.8266620585752793, "learning_rate": 9.936438783848983e-06, "loss": 0.5329, "step": 4947 }, { "epoch": 0.14578023952977917, "grad_norm": 1.5803945970856346, "learning_rate": 9.936357022940913e-06, "loss": 0.4165, "step": 4948 }, { "epoch": 0.14580970198724275, "grad_norm": 1.720959015996193, "learning_rate": 9.936275209817562e-06, "loss": 0.5581, "step": 4949 }, { "epoch": 0.14583916444470632, "grad_norm": 1.6693223197416793, "learning_rate": 9.936193344479796e-06, "loss": 0.5615, "step": 4950 }, { "epoch": 0.14586862690216992, "grad_norm": 1.8768344260127308, "learning_rate": 9.936111426928483e-06, "loss": 0.6736, "step": 4951 }, { "epoch": 0.1458980893596335, "grad_norm": 1.6018088540558266, "learning_rate": 9.936029457164487e-06, "loss": 0.4701, "step": 4952 }, { "epoch": 0.14592755181709707, "grad_norm": 1.5665471637928217, "learning_rate": 9.935947435188673e-06, "loss": 0.4379, "step": 4953 }, { "epoch": 0.14595701427456065, "grad_norm": 1.5981642778035894, "learning_rate": 9.935865361001915e-06, "loss": 0.5074, "step": 4954 }, { "epoch": 0.14598647673202422, "grad_norm": 1.6162532813562602, "learning_rate": 9.935783234605076e-06, "loss": 0.5967, "step": 4955 }, { "epoch": 0.1460159391894878, "grad_norm": 1.7364978660969559, "learning_rate": 9.935701055999027e-06, "loss": 0.5136, "step": 4956 }, { "epoch": 0.14604540164695137, "grad_norm": 1.6568829054989007, "learning_rate": 9.935618825184635e-06, "loss": 0.6979, "step": 4957 }, { "epoch": 0.14607486410441495, "grad_norm": 1.5686494858021671, "learning_rate": 9.93553654216277e-06, "loss": 0.5315, "step": 4958 }, { "epoch": 0.14610432656187852, "grad_norm": 1.6778543811601632, "learning_rate": 9.935454206934307e-06, "loss": 0.5682, "step": 4959 }, { "epoch": 0.1461337890193421, "grad_norm": 1.7778702126485717, "learning_rate": 9.935371819500113e-06, "loss": 0.6667, "step": 4960 }, { "epoch": 0.14616325147680567, "grad_norm": 1.6025359669782009, "learning_rate": 9.935289379861059e-06, "loss": 0.5369, "step": 4961 }, { "epoch": 0.14619271393426925, "grad_norm": 1.6265870902040809, "learning_rate": 9.935206888018018e-06, "loss": 0.3845, "step": 4962 }, { "epoch": 0.14622217639173282, "grad_norm": 1.6509213995165666, "learning_rate": 9.935124343971862e-06, "loss": 0.5239, "step": 4963 }, { "epoch": 0.14625163884919642, "grad_norm": 1.7684393749234772, "learning_rate": 9.935041747723467e-06, "loss": 0.6849, "step": 4964 }, { "epoch": 0.14628110130666, "grad_norm": 1.6028108646381813, "learning_rate": 9.934959099273701e-06, "loss": 0.5415, "step": 4965 }, { "epoch": 0.14631056376412357, "grad_norm": 1.7193890770152895, "learning_rate": 9.934876398623444e-06, "loss": 0.649, "step": 4966 }, { "epoch": 0.14634002622158715, "grad_norm": 1.5685665927230625, "learning_rate": 9.934793645773568e-06, "loss": 0.6506, "step": 4967 }, { "epoch": 0.14636948867905072, "grad_norm": 1.6637749323413689, "learning_rate": 9.934710840724948e-06, "loss": 0.3736, "step": 4968 }, { "epoch": 0.1463989511365143, "grad_norm": 1.7269778458781633, "learning_rate": 9.934627983478463e-06, "loss": 0.574, "step": 4969 }, { "epoch": 0.14642841359397787, "grad_norm": 1.798271720367284, "learning_rate": 9.934545074034983e-06, "loss": 0.6105, "step": 4970 }, { "epoch": 0.14645787605144145, "grad_norm": 1.3958428983141142, "learning_rate": 9.934462112395393e-06, "loss": 0.4643, "step": 4971 }, { "epoch": 0.14648733850890502, "grad_norm": 1.63287271145784, "learning_rate": 9.934379098560564e-06, "loss": 0.5395, "step": 4972 }, { "epoch": 0.1465168009663686, "grad_norm": 1.5236500503817365, "learning_rate": 9.934296032531378e-06, "loss": 0.4529, "step": 4973 }, { "epoch": 0.14654626342383217, "grad_norm": 1.6128058492127344, "learning_rate": 9.934212914308712e-06, "loss": 0.4824, "step": 4974 }, { "epoch": 0.14657572588129575, "grad_norm": 1.5073541498623568, "learning_rate": 9.934129743893445e-06, "loss": 0.5261, "step": 4975 }, { "epoch": 0.14660518833875932, "grad_norm": 1.956300076434741, "learning_rate": 9.934046521286459e-06, "loss": 0.7123, "step": 4976 }, { "epoch": 0.14663465079622293, "grad_norm": 1.6841565367523508, "learning_rate": 9.93396324648863e-06, "loss": 0.5149, "step": 4977 }, { "epoch": 0.1466641132536865, "grad_norm": 1.937355947251175, "learning_rate": 9.933879919500843e-06, "loss": 0.6191, "step": 4978 }, { "epoch": 0.14669357571115008, "grad_norm": 1.4513490028188667, "learning_rate": 9.933796540323976e-06, "loss": 0.4333, "step": 4979 }, { "epoch": 0.14672303816861365, "grad_norm": 1.7051792004256863, "learning_rate": 9.933713108958915e-06, "loss": 0.5113, "step": 4980 }, { "epoch": 0.14675250062607723, "grad_norm": 1.7589940704361036, "learning_rate": 9.93362962540654e-06, "loss": 0.5794, "step": 4981 }, { "epoch": 0.1467819630835408, "grad_norm": 1.6859863829280541, "learning_rate": 9.933546089667734e-06, "loss": 0.6859, "step": 4982 }, { "epoch": 0.14681142554100438, "grad_norm": 1.888386051611759, "learning_rate": 9.933462501743383e-06, "loss": 0.5565, "step": 4983 }, { "epoch": 0.14684088799846795, "grad_norm": 1.6108700948019563, "learning_rate": 9.933378861634366e-06, "loss": 0.5986, "step": 4984 }, { "epoch": 0.14687035045593153, "grad_norm": 1.9930629935111641, "learning_rate": 9.933295169341573e-06, "loss": 0.5786, "step": 4985 }, { "epoch": 0.1468998129133951, "grad_norm": 1.786673872122024, "learning_rate": 9.933211424865887e-06, "loss": 0.6541, "step": 4986 }, { "epoch": 0.14692927537085868, "grad_norm": 1.4812214055929434, "learning_rate": 9.933127628208193e-06, "loss": 0.3378, "step": 4987 }, { "epoch": 0.14695873782832225, "grad_norm": 1.6040399253699036, "learning_rate": 9.93304377936938e-06, "loss": 0.5503, "step": 4988 }, { "epoch": 0.14698820028578582, "grad_norm": 1.960530918874231, "learning_rate": 9.932959878350331e-06, "loss": 0.5804, "step": 4989 }, { "epoch": 0.14701766274324943, "grad_norm": 1.5582728721618653, "learning_rate": 9.932875925151937e-06, "loss": 0.4833, "step": 4990 }, { "epoch": 0.147047125200713, "grad_norm": 1.809872770729068, "learning_rate": 9.932791919775084e-06, "loss": 0.5488, "step": 4991 }, { "epoch": 0.14707658765817658, "grad_norm": 1.4022885494413275, "learning_rate": 9.932707862220663e-06, "loss": 0.4685, "step": 4992 }, { "epoch": 0.14710605011564015, "grad_norm": 1.7301904331140956, "learning_rate": 9.93262375248956e-06, "loss": 0.4147, "step": 4993 }, { "epoch": 0.14713551257310373, "grad_norm": 1.7130668649946859, "learning_rate": 9.932539590582667e-06, "loss": 0.4305, "step": 4994 }, { "epoch": 0.1471649750305673, "grad_norm": 1.6644364891348806, "learning_rate": 9.932455376500874e-06, "loss": 0.4719, "step": 4995 }, { "epoch": 0.14719443748803088, "grad_norm": 1.9299894786148128, "learning_rate": 9.932371110245069e-06, "loss": 0.6776, "step": 4996 }, { "epoch": 0.14722389994549445, "grad_norm": 1.842994096765976, "learning_rate": 9.932286791816148e-06, "loss": 0.5694, "step": 4997 }, { "epoch": 0.14725336240295803, "grad_norm": 1.7733447018410866, "learning_rate": 9.932202421215e-06, "loss": 0.6008, "step": 4998 }, { "epoch": 0.1472828248604216, "grad_norm": 2.0359878817128854, "learning_rate": 9.932117998442518e-06, "loss": 0.5214, "step": 4999 }, { "epoch": 0.14731228731788518, "grad_norm": 1.8202089692091026, "learning_rate": 9.932033523499593e-06, "loss": 0.5207, "step": 5000 }, { "epoch": 0.14734174977534875, "grad_norm": 1.6815084052876468, "learning_rate": 9.931948996387123e-06, "loss": 0.4601, "step": 5001 }, { "epoch": 0.14737121223281233, "grad_norm": 1.541599347737566, "learning_rate": 9.931864417105998e-06, "loss": 0.4655, "step": 5002 }, { "epoch": 0.14740067469027593, "grad_norm": 1.5403105902020355, "learning_rate": 9.931779785657116e-06, "loss": 0.4535, "step": 5003 }, { "epoch": 0.1474301371477395, "grad_norm": 1.5834263325685909, "learning_rate": 9.931695102041368e-06, "loss": 0.4505, "step": 5004 }, { "epoch": 0.14745959960520308, "grad_norm": 1.5125818952893684, "learning_rate": 9.931610366259654e-06, "loss": 0.3934, "step": 5005 }, { "epoch": 0.14748906206266665, "grad_norm": 1.661041699878168, "learning_rate": 9.931525578312868e-06, "loss": 0.5244, "step": 5006 }, { "epoch": 0.14751852452013023, "grad_norm": 2.0557587275415803, "learning_rate": 9.931440738201908e-06, "loss": 0.5419, "step": 5007 }, { "epoch": 0.1475479869775938, "grad_norm": 1.6998059879011054, "learning_rate": 9.931355845927669e-06, "loss": 0.4957, "step": 5008 }, { "epoch": 0.14757744943505738, "grad_norm": 1.6081711010335633, "learning_rate": 9.93127090149105e-06, "loss": 0.5075, "step": 5009 }, { "epoch": 0.14760691189252095, "grad_norm": 1.5957286303325675, "learning_rate": 9.931185904892953e-06, "loss": 0.5339, "step": 5010 }, { "epoch": 0.14763637434998453, "grad_norm": 1.6274441770735801, "learning_rate": 9.931100856134274e-06, "loss": 0.5371, "step": 5011 }, { "epoch": 0.1476658368074481, "grad_norm": 1.8222336686211014, "learning_rate": 9.931015755215912e-06, "loss": 0.441, "step": 5012 }, { "epoch": 0.14769529926491168, "grad_norm": 1.8271614786734445, "learning_rate": 9.930930602138768e-06, "loss": 0.5288, "step": 5013 }, { "epoch": 0.14772476172237525, "grad_norm": 1.8075109963295304, "learning_rate": 9.930845396903743e-06, "loss": 0.5212, "step": 5014 }, { "epoch": 0.14775422417983883, "grad_norm": 1.6611281442177785, "learning_rate": 9.93076013951174e-06, "loss": 0.5542, "step": 5015 }, { "epoch": 0.14778368663730243, "grad_norm": 1.8505228902475181, "learning_rate": 9.930674829963656e-06, "loss": 0.6219, "step": 5016 }, { "epoch": 0.147813149094766, "grad_norm": 1.8801300114865007, "learning_rate": 9.930589468260397e-06, "loss": 0.5205, "step": 5017 }, { "epoch": 0.14784261155222958, "grad_norm": 2.0544387444546346, "learning_rate": 9.930504054402868e-06, "loss": 0.612, "step": 5018 }, { "epoch": 0.14787207400969316, "grad_norm": 1.7509181052786635, "learning_rate": 9.930418588391965e-06, "loss": 0.5249, "step": 5019 }, { "epoch": 0.14790153646715673, "grad_norm": 1.682517883603074, "learning_rate": 9.9303330702286e-06, "loss": 0.5229, "step": 5020 }, { "epoch": 0.1479309989246203, "grad_norm": 1.687780935249869, "learning_rate": 9.930247499913674e-06, "loss": 0.5789, "step": 5021 }, { "epoch": 0.14796046138208388, "grad_norm": 1.7469966097390337, "learning_rate": 9.930161877448093e-06, "loss": 0.4998, "step": 5022 }, { "epoch": 0.14798992383954745, "grad_norm": 1.518193579957537, "learning_rate": 9.930076202832762e-06, "loss": 0.4475, "step": 5023 }, { "epoch": 0.14801938629701103, "grad_norm": 2.133905992875073, "learning_rate": 9.929990476068586e-06, "loss": 0.5389, "step": 5024 }, { "epoch": 0.1480488487544746, "grad_norm": 1.7309465381703906, "learning_rate": 9.929904697156473e-06, "loss": 0.5822, "step": 5025 }, { "epoch": 0.14807831121193818, "grad_norm": 1.6055328701161669, "learning_rate": 9.929818866097332e-06, "loss": 0.4798, "step": 5026 }, { "epoch": 0.14810777366940175, "grad_norm": 1.68048827988606, "learning_rate": 9.929732982892068e-06, "loss": 0.5736, "step": 5027 }, { "epoch": 0.14813723612686533, "grad_norm": 1.549187662883403, "learning_rate": 9.929647047541594e-06, "loss": 0.4413, "step": 5028 }, { "epoch": 0.14816669858432893, "grad_norm": 1.6395881367208598, "learning_rate": 9.929561060046813e-06, "loss": 0.5304, "step": 5029 }, { "epoch": 0.1481961610417925, "grad_norm": 1.741158176327643, "learning_rate": 9.92947502040864e-06, "loss": 0.67, "step": 5030 }, { "epoch": 0.14822562349925608, "grad_norm": 1.593516050266534, "learning_rate": 9.929388928627979e-06, "loss": 0.5013, "step": 5031 }, { "epoch": 0.14825508595671966, "grad_norm": 1.7014024087211217, "learning_rate": 9.929302784705746e-06, "loss": 0.5294, "step": 5032 }, { "epoch": 0.14828454841418323, "grad_norm": 1.8786460164894265, "learning_rate": 9.92921658864285e-06, "loss": 0.5186, "step": 5033 }, { "epoch": 0.1483140108716468, "grad_norm": 1.623961553648936, "learning_rate": 9.929130340440202e-06, "loss": 0.4207, "step": 5034 }, { "epoch": 0.14834347332911038, "grad_norm": 1.5581152458406515, "learning_rate": 9.929044040098719e-06, "loss": 0.4949, "step": 5035 }, { "epoch": 0.14837293578657396, "grad_norm": 1.614368519801693, "learning_rate": 9.928957687619307e-06, "loss": 0.5185, "step": 5036 }, { "epoch": 0.14840239824403753, "grad_norm": 1.51716321495312, "learning_rate": 9.928871283002886e-06, "loss": 0.6205, "step": 5037 }, { "epoch": 0.1484318607015011, "grad_norm": 1.8713949390937477, "learning_rate": 9.928784826250364e-06, "loss": 0.6423, "step": 5038 }, { "epoch": 0.14846132315896468, "grad_norm": 1.6769625245265436, "learning_rate": 9.928698317362659e-06, "loss": 0.488, "step": 5039 }, { "epoch": 0.14849078561642826, "grad_norm": 1.5370858882269105, "learning_rate": 9.928611756340684e-06, "loss": 0.5385, "step": 5040 }, { "epoch": 0.14852024807389183, "grad_norm": 1.5112586591476396, "learning_rate": 9.928525143185358e-06, "loss": 0.3932, "step": 5041 }, { "epoch": 0.14854971053135543, "grad_norm": 1.8127319796787262, "learning_rate": 9.928438477897593e-06, "loss": 0.6353, "step": 5042 }, { "epoch": 0.148579172988819, "grad_norm": 1.8811635577807406, "learning_rate": 9.92835176047831e-06, "loss": 0.6398, "step": 5043 }, { "epoch": 0.14860863544628258, "grad_norm": 1.6445906656311249, "learning_rate": 9.928264990928422e-06, "loss": 0.5489, "step": 5044 }, { "epoch": 0.14863809790374616, "grad_norm": 1.8646152460709213, "learning_rate": 9.92817816924885e-06, "loss": 0.5749, "step": 5045 }, { "epoch": 0.14866756036120973, "grad_norm": 1.5494746781718916, "learning_rate": 9.928091295440512e-06, "loss": 0.4906, "step": 5046 }, { "epoch": 0.1486970228186733, "grad_norm": 1.971300935479221, "learning_rate": 9.928004369504325e-06, "loss": 0.5629, "step": 5047 }, { "epoch": 0.14872648527613688, "grad_norm": 1.863003799151428, "learning_rate": 9.927917391441208e-06, "loss": 0.652, "step": 5048 }, { "epoch": 0.14875594773360046, "grad_norm": 1.516755959273401, "learning_rate": 9.927830361252087e-06, "loss": 0.5293, "step": 5049 }, { "epoch": 0.14878541019106403, "grad_norm": 1.7479666923508865, "learning_rate": 9.927743278937874e-06, "loss": 0.644, "step": 5050 }, { "epoch": 0.1488148726485276, "grad_norm": 1.4902201340381886, "learning_rate": 9.927656144499497e-06, "loss": 0.4945, "step": 5051 }, { "epoch": 0.14884433510599118, "grad_norm": 1.6358310117501471, "learning_rate": 9.927568957937873e-06, "loss": 0.5389, "step": 5052 }, { "epoch": 0.14887379756345476, "grad_norm": 1.720847487334314, "learning_rate": 9.927481719253928e-06, "loss": 0.6664, "step": 5053 }, { "epoch": 0.14890326002091833, "grad_norm": 1.6048658077720774, "learning_rate": 9.927394428448582e-06, "loss": 0.4445, "step": 5054 }, { "epoch": 0.14893272247838193, "grad_norm": 1.7316007846309818, "learning_rate": 9.92730708552276e-06, "loss": 0.5564, "step": 5055 }, { "epoch": 0.1489621849358455, "grad_norm": 1.6734111206745033, "learning_rate": 9.927219690477385e-06, "loss": 0.602, "step": 5056 }, { "epoch": 0.14899164739330908, "grad_norm": 1.6715464194134384, "learning_rate": 9.927132243313381e-06, "loss": 0.5196, "step": 5057 }, { "epoch": 0.14902110985077266, "grad_norm": 1.6001496988951953, "learning_rate": 9.927044744031676e-06, "loss": 0.4138, "step": 5058 }, { "epoch": 0.14905057230823623, "grad_norm": 1.6710230490512412, "learning_rate": 9.926957192633192e-06, "loss": 0.4614, "step": 5059 }, { "epoch": 0.1490800347656998, "grad_norm": 2.100515323531562, "learning_rate": 9.926869589118857e-06, "loss": 0.4381, "step": 5060 }, { "epoch": 0.14910949722316338, "grad_norm": 1.3903048117632029, "learning_rate": 9.926781933489596e-06, "loss": 0.4176, "step": 5061 }, { "epoch": 0.14913895968062696, "grad_norm": 1.7489614779947196, "learning_rate": 9.926694225746337e-06, "loss": 0.5474, "step": 5062 }, { "epoch": 0.14916842213809053, "grad_norm": 1.9717348405832449, "learning_rate": 9.926606465890009e-06, "loss": 0.4838, "step": 5063 }, { "epoch": 0.1491978845955541, "grad_norm": 1.6795967013733948, "learning_rate": 9.92651865392154e-06, "loss": 0.543, "step": 5064 }, { "epoch": 0.14922734705301768, "grad_norm": 1.8925202075783716, "learning_rate": 9.926430789841856e-06, "loss": 0.6591, "step": 5065 }, { "epoch": 0.14925680951048126, "grad_norm": 1.7028802118263628, "learning_rate": 9.926342873651888e-06, "loss": 0.5609, "step": 5066 }, { "epoch": 0.14928627196794483, "grad_norm": 1.713420360875193, "learning_rate": 9.926254905352568e-06, "loss": 0.4812, "step": 5067 }, { "epoch": 0.14931573442540844, "grad_norm": 1.6050312358496095, "learning_rate": 9.926166884944826e-06, "loss": 0.4666, "step": 5068 }, { "epoch": 0.149345196882872, "grad_norm": 1.6245656172803427, "learning_rate": 9.92607881242959e-06, "loss": 0.5655, "step": 5069 }, { "epoch": 0.14937465934033559, "grad_norm": 1.6610330494099879, "learning_rate": 9.925990687807794e-06, "loss": 0.5286, "step": 5070 }, { "epoch": 0.14940412179779916, "grad_norm": 1.6030788234634616, "learning_rate": 9.92590251108037e-06, "loss": 0.5105, "step": 5071 }, { "epoch": 0.14943358425526274, "grad_norm": 1.7468448765325708, "learning_rate": 9.92581428224825e-06, "loss": 0.6588, "step": 5072 }, { "epoch": 0.1494630467127263, "grad_norm": 1.5947815409260941, "learning_rate": 9.925726001312367e-06, "loss": 0.5075, "step": 5073 }, { "epoch": 0.14949250917018989, "grad_norm": 1.783611010048961, "learning_rate": 9.925637668273657e-06, "loss": 0.5414, "step": 5074 }, { "epoch": 0.14952197162765346, "grad_norm": 1.6872964865692097, "learning_rate": 9.925549283133054e-06, "loss": 0.5757, "step": 5075 }, { "epoch": 0.14955143408511704, "grad_norm": 1.7956940894581106, "learning_rate": 9.92546084589149e-06, "loss": 0.509, "step": 5076 }, { "epoch": 0.1495808965425806, "grad_norm": 1.7054957649057458, "learning_rate": 9.925372356549901e-06, "loss": 0.5231, "step": 5077 }, { "epoch": 0.14961035900004419, "grad_norm": 1.6645029778021996, "learning_rate": 9.925283815109227e-06, "loss": 0.4655, "step": 5078 }, { "epoch": 0.14963982145750776, "grad_norm": 1.8498262804632684, "learning_rate": 9.925195221570401e-06, "loss": 0.5666, "step": 5079 }, { "epoch": 0.14966928391497133, "grad_norm": 1.591743930798042, "learning_rate": 9.92510657593436e-06, "loss": 0.588, "step": 5080 }, { "epoch": 0.14969874637243494, "grad_norm": 1.6098152802966021, "learning_rate": 9.925017878202044e-06, "loss": 0.4926, "step": 5081 }, { "epoch": 0.1497282088298985, "grad_norm": 1.6735131130308631, "learning_rate": 9.924929128374387e-06, "loss": 0.5705, "step": 5082 }, { "epoch": 0.1497576712873621, "grad_norm": 1.6431398610667227, "learning_rate": 9.924840326452334e-06, "loss": 0.4473, "step": 5083 }, { "epoch": 0.14978713374482566, "grad_norm": 1.661622032074698, "learning_rate": 9.92475147243682e-06, "loss": 0.4832, "step": 5084 }, { "epoch": 0.14981659620228924, "grad_norm": 1.758787462618204, "learning_rate": 9.924662566328784e-06, "loss": 0.5999, "step": 5085 }, { "epoch": 0.1498460586597528, "grad_norm": 1.5173597789850795, "learning_rate": 9.92457360812917e-06, "loss": 0.4714, "step": 5086 }, { "epoch": 0.1498755211172164, "grad_norm": 1.8948632367271827, "learning_rate": 9.924484597838918e-06, "loss": 0.4763, "step": 5087 }, { "epoch": 0.14990498357467996, "grad_norm": 1.555471444164508, "learning_rate": 9.924395535458966e-06, "loss": 0.5475, "step": 5088 }, { "epoch": 0.14993444603214354, "grad_norm": 1.652139056875826, "learning_rate": 9.924306420990261e-06, "loss": 0.5259, "step": 5089 }, { "epoch": 0.1499639084896071, "grad_norm": 1.5799649071597628, "learning_rate": 9.924217254433743e-06, "loss": 0.3476, "step": 5090 }, { "epoch": 0.1499933709470707, "grad_norm": 1.7145978624479554, "learning_rate": 9.924128035790354e-06, "loss": 0.6138, "step": 5091 }, { "epoch": 0.15002283340453426, "grad_norm": 1.61813611323324, "learning_rate": 9.924038765061042e-06, "loss": 0.5396, "step": 5092 }, { "epoch": 0.15005229586199784, "grad_norm": 1.9419639133767541, "learning_rate": 9.923949442246746e-06, "loss": 0.6515, "step": 5093 }, { "epoch": 0.15008175831946144, "grad_norm": 1.586099697921583, "learning_rate": 9.923860067348414e-06, "loss": 0.4687, "step": 5094 }, { "epoch": 0.15011122077692501, "grad_norm": 1.698123316804502, "learning_rate": 9.923770640366992e-06, "loss": 0.5665, "step": 5095 }, { "epoch": 0.1501406832343886, "grad_norm": 1.7563854396451892, "learning_rate": 9.923681161303424e-06, "loss": 0.5201, "step": 5096 }, { "epoch": 0.15017014569185216, "grad_norm": 1.6798381660324633, "learning_rate": 9.923591630158658e-06, "loss": 0.6026, "step": 5097 }, { "epoch": 0.15019960814931574, "grad_norm": 1.4404349333497255, "learning_rate": 9.92350204693364e-06, "loss": 0.4457, "step": 5098 }, { "epoch": 0.1502290706067793, "grad_norm": 1.7321077720467306, "learning_rate": 9.923412411629317e-06, "loss": 0.4146, "step": 5099 }, { "epoch": 0.1502585330642429, "grad_norm": 1.4577542389807638, "learning_rate": 9.923322724246638e-06, "loss": 0.4663, "step": 5100 }, { "epoch": 0.15028799552170646, "grad_norm": 1.8429214112228527, "learning_rate": 9.923232984786553e-06, "loss": 0.496, "step": 5101 }, { "epoch": 0.15031745797917004, "grad_norm": 1.7195823254765934, "learning_rate": 9.923143193250007e-06, "loss": 0.6208, "step": 5102 }, { "epoch": 0.1503469204366336, "grad_norm": 1.5856138899385133, "learning_rate": 9.923053349637955e-06, "loss": 0.4948, "step": 5103 }, { "epoch": 0.1503763828940972, "grad_norm": 1.6855993809411196, "learning_rate": 9.922963453951345e-06, "loss": 0.4752, "step": 5104 }, { "epoch": 0.15040584535156076, "grad_norm": 1.7144347800446862, "learning_rate": 9.922873506191127e-06, "loss": 0.5195, "step": 5105 }, { "epoch": 0.15043530780902434, "grad_norm": 1.7827824513093389, "learning_rate": 9.922783506358255e-06, "loss": 0.5246, "step": 5106 }, { "epoch": 0.15046477026648794, "grad_norm": 1.4770400102322059, "learning_rate": 9.922693454453678e-06, "loss": 0.5164, "step": 5107 }, { "epoch": 0.15049423272395152, "grad_norm": 1.6863950508645895, "learning_rate": 9.922603350478351e-06, "loss": 0.3952, "step": 5108 }, { "epoch": 0.1505236951814151, "grad_norm": 1.832717777852898, "learning_rate": 9.922513194433224e-06, "loss": 0.5184, "step": 5109 }, { "epoch": 0.15055315763887867, "grad_norm": 1.5757563501974712, "learning_rate": 9.922422986319254e-06, "loss": 0.524, "step": 5110 }, { "epoch": 0.15058262009634224, "grad_norm": 1.5886153909919434, "learning_rate": 9.922332726137394e-06, "loss": 0.5722, "step": 5111 }, { "epoch": 0.15061208255380581, "grad_norm": 1.5211091127093617, "learning_rate": 9.922242413888597e-06, "loss": 0.5364, "step": 5112 }, { "epoch": 0.1506415450112694, "grad_norm": 1.509777120984384, "learning_rate": 9.922152049573821e-06, "loss": 0.4509, "step": 5113 }, { "epoch": 0.15067100746873296, "grad_norm": 1.6946059958091138, "learning_rate": 9.92206163319402e-06, "loss": 0.5086, "step": 5114 }, { "epoch": 0.15070046992619654, "grad_norm": 1.8326545164765164, "learning_rate": 9.921971164750153e-06, "loss": 0.7109, "step": 5115 }, { "epoch": 0.15072993238366011, "grad_norm": 1.541621200258978, "learning_rate": 9.921880644243172e-06, "loss": 0.4604, "step": 5116 }, { "epoch": 0.1507593948411237, "grad_norm": 2.267511007064842, "learning_rate": 9.92179007167404e-06, "loss": 0.6557, "step": 5117 }, { "epoch": 0.15078885729858726, "grad_norm": 1.8498466211767663, "learning_rate": 9.92169944704371e-06, "loss": 0.6764, "step": 5118 }, { "epoch": 0.15081831975605084, "grad_norm": 1.746572186002077, "learning_rate": 9.921608770353145e-06, "loss": 0.494, "step": 5119 }, { "epoch": 0.15084778221351444, "grad_norm": 1.5745699247698088, "learning_rate": 9.921518041603305e-06, "loss": 0.4508, "step": 5120 }, { "epoch": 0.15087724467097802, "grad_norm": 1.6045241328067512, "learning_rate": 9.921427260795143e-06, "loss": 0.4809, "step": 5121 }, { "epoch": 0.1509067071284416, "grad_norm": 1.648353749287056, "learning_rate": 9.921336427929625e-06, "loss": 0.5351, "step": 5122 }, { "epoch": 0.15093616958590517, "grad_norm": 1.6866020162328204, "learning_rate": 9.921245543007709e-06, "loss": 0.5648, "step": 5123 }, { "epoch": 0.15096563204336874, "grad_norm": 1.8647690157107621, "learning_rate": 9.92115460603036e-06, "loss": 0.4876, "step": 5124 }, { "epoch": 0.15099509450083232, "grad_norm": 1.6675572591888426, "learning_rate": 9.921063616998534e-06, "loss": 0.4834, "step": 5125 }, { "epoch": 0.1510245569582959, "grad_norm": 1.9490721446849573, "learning_rate": 9.920972575913199e-06, "loss": 0.6231, "step": 5126 }, { "epoch": 0.15105401941575947, "grad_norm": 1.7015157368701304, "learning_rate": 9.920881482775315e-06, "loss": 0.4747, "step": 5127 }, { "epoch": 0.15108348187322304, "grad_norm": 1.6209671750520218, "learning_rate": 9.920790337585846e-06, "loss": 0.6282, "step": 5128 }, { "epoch": 0.15111294433068662, "grad_norm": 1.4204306608858617, "learning_rate": 9.920699140345757e-06, "loss": 0.4052, "step": 5129 }, { "epoch": 0.1511424067881502, "grad_norm": 1.503036928813061, "learning_rate": 9.920607891056013e-06, "loss": 0.4493, "step": 5130 }, { "epoch": 0.15117186924561377, "grad_norm": 1.6992010972585962, "learning_rate": 9.920516589717577e-06, "loss": 0.5427, "step": 5131 }, { "epoch": 0.15120133170307734, "grad_norm": 1.869905179966126, "learning_rate": 9.920425236331417e-06, "loss": 0.6733, "step": 5132 }, { "epoch": 0.15123079416054094, "grad_norm": 1.740473020168827, "learning_rate": 9.920333830898497e-06, "loss": 0.5211, "step": 5133 }, { "epoch": 0.15126025661800452, "grad_norm": 1.7973546648500924, "learning_rate": 9.920242373419786e-06, "loss": 0.5865, "step": 5134 }, { "epoch": 0.1512897190754681, "grad_norm": 1.7692813800786484, "learning_rate": 9.920150863896252e-06, "loss": 0.5686, "step": 5135 }, { "epoch": 0.15131918153293167, "grad_norm": 1.670397603518192, "learning_rate": 9.920059302328859e-06, "loss": 0.4699, "step": 5136 }, { "epoch": 0.15134864399039524, "grad_norm": 1.6363562212791143, "learning_rate": 9.91996768871858e-06, "loss": 0.5833, "step": 5137 }, { "epoch": 0.15137810644785882, "grad_norm": 1.9894916840991794, "learning_rate": 9.919876023066383e-06, "loss": 0.4533, "step": 5138 }, { "epoch": 0.1514075689053224, "grad_norm": 1.8358214301761948, "learning_rate": 9.919784305373235e-06, "loss": 0.5854, "step": 5139 }, { "epoch": 0.15143703136278597, "grad_norm": 1.5936551381001527, "learning_rate": 9.91969253564011e-06, "loss": 0.364, "step": 5140 }, { "epoch": 0.15146649382024954, "grad_norm": 1.7762390830741417, "learning_rate": 9.919600713867974e-06, "loss": 0.6213, "step": 5141 }, { "epoch": 0.15149595627771312, "grad_norm": 1.6603617421508179, "learning_rate": 9.919508840057803e-06, "loss": 0.5053, "step": 5142 }, { "epoch": 0.1515254187351767, "grad_norm": 1.7298023282073642, "learning_rate": 9.919416914210565e-06, "loss": 0.5435, "step": 5143 }, { "epoch": 0.15155488119264027, "grad_norm": 1.9220034811958633, "learning_rate": 9.919324936327234e-06, "loss": 0.4665, "step": 5144 }, { "epoch": 0.15158434365010384, "grad_norm": 1.7241765821236723, "learning_rate": 9.919232906408785e-06, "loss": 0.578, "step": 5145 }, { "epoch": 0.15161380610756744, "grad_norm": 1.589789322875434, "learning_rate": 9.919140824456188e-06, "loss": 0.4567, "step": 5146 }, { "epoch": 0.15164326856503102, "grad_norm": 1.6106423626899338, "learning_rate": 9.91904869047042e-06, "loss": 0.562, "step": 5147 }, { "epoch": 0.1516727310224946, "grad_norm": 1.6664108379716984, "learning_rate": 9.918956504452453e-06, "loss": 0.5351, "step": 5148 }, { "epoch": 0.15170219347995817, "grad_norm": 1.900453139499848, "learning_rate": 9.918864266403264e-06, "loss": 0.6092, "step": 5149 }, { "epoch": 0.15173165593742174, "grad_norm": 1.706653844076006, "learning_rate": 9.918771976323827e-06, "loss": 0.5342, "step": 5150 }, { "epoch": 0.15176111839488532, "grad_norm": 1.500166476794849, "learning_rate": 9.91867963421512e-06, "loss": 0.4359, "step": 5151 }, { "epoch": 0.1517905808523489, "grad_norm": 2.147725950437274, "learning_rate": 9.918587240078117e-06, "loss": 0.597, "step": 5152 }, { "epoch": 0.15182004330981247, "grad_norm": 1.705026243041631, "learning_rate": 9.9184947939138e-06, "loss": 0.5543, "step": 5153 }, { "epoch": 0.15184950576727604, "grad_norm": 1.6114945704929375, "learning_rate": 9.918402295723143e-06, "loss": 0.5999, "step": 5154 }, { "epoch": 0.15187896822473962, "grad_norm": 1.742895748416378, "learning_rate": 9.918309745507123e-06, "loss": 0.7094, "step": 5155 }, { "epoch": 0.1519084306822032, "grad_norm": 1.9690839869160632, "learning_rate": 9.918217143266723e-06, "loss": 0.6386, "step": 5156 }, { "epoch": 0.15193789313966677, "grad_norm": 1.7448573184398553, "learning_rate": 9.918124489002922e-06, "loss": 0.5321, "step": 5157 }, { "epoch": 0.15196735559713034, "grad_norm": 1.6805754788069325, "learning_rate": 9.918031782716697e-06, "loss": 0.6061, "step": 5158 }, { "epoch": 0.15199681805459395, "grad_norm": 1.8612450957982338, "learning_rate": 9.917939024409034e-06, "loss": 0.4685, "step": 5159 }, { "epoch": 0.15202628051205752, "grad_norm": 1.5561423679473823, "learning_rate": 9.917846214080907e-06, "loss": 0.51, "step": 5160 }, { "epoch": 0.1520557429695211, "grad_norm": 1.6999107561601525, "learning_rate": 9.917753351733304e-06, "loss": 0.6287, "step": 5161 }, { "epoch": 0.15208520542698467, "grad_norm": 1.5009136734091515, "learning_rate": 9.917660437367204e-06, "loss": 0.4807, "step": 5162 }, { "epoch": 0.15211466788444825, "grad_norm": 1.7130771036032286, "learning_rate": 9.91756747098359e-06, "loss": 0.5566, "step": 5163 }, { "epoch": 0.15214413034191182, "grad_norm": 1.6000604160327483, "learning_rate": 9.917474452583446e-06, "loss": 0.4141, "step": 5164 }, { "epoch": 0.1521735927993754, "grad_norm": 1.5527642493354217, "learning_rate": 9.917381382167756e-06, "loss": 0.4498, "step": 5165 }, { "epoch": 0.15220305525683897, "grad_norm": 1.6976990019428697, "learning_rate": 9.917288259737505e-06, "loss": 0.5544, "step": 5166 }, { "epoch": 0.15223251771430255, "grad_norm": 1.8909253495866605, "learning_rate": 9.917195085293679e-06, "loss": 0.5053, "step": 5167 }, { "epoch": 0.15226198017176612, "grad_norm": 1.7790095693284345, "learning_rate": 9.917101858837259e-06, "loss": 0.592, "step": 5168 }, { "epoch": 0.1522914426292297, "grad_norm": 1.5961354041721796, "learning_rate": 9.917008580369236e-06, "loss": 0.5062, "step": 5169 }, { "epoch": 0.15232090508669327, "grad_norm": 1.9267835460354228, "learning_rate": 9.916915249890594e-06, "loss": 0.4812, "step": 5170 }, { "epoch": 0.15235036754415685, "grad_norm": 1.4934482741539312, "learning_rate": 9.91682186740232e-06, "loss": 0.4882, "step": 5171 }, { "epoch": 0.15237983000162045, "grad_norm": 1.7277273588055093, "learning_rate": 9.916728432905403e-06, "loss": 0.4173, "step": 5172 }, { "epoch": 0.15240929245908402, "grad_norm": 1.6658432075715055, "learning_rate": 9.91663494640083e-06, "loss": 0.5767, "step": 5173 }, { "epoch": 0.1524387549165476, "grad_norm": 1.4897701505368883, "learning_rate": 9.916541407889594e-06, "loss": 0.4492, "step": 5174 }, { "epoch": 0.15246821737401117, "grad_norm": 1.5888625485833945, "learning_rate": 9.916447817372682e-06, "loss": 0.5856, "step": 5175 }, { "epoch": 0.15249767983147475, "grad_norm": 1.5570411472164523, "learning_rate": 9.91635417485108e-06, "loss": 0.4573, "step": 5176 }, { "epoch": 0.15252714228893832, "grad_norm": 1.5819084709709001, "learning_rate": 9.916260480325782e-06, "loss": 0.4597, "step": 5177 }, { "epoch": 0.1525566047464019, "grad_norm": 1.7218817733642722, "learning_rate": 9.91616673379778e-06, "loss": 0.5695, "step": 5178 }, { "epoch": 0.15258606720386547, "grad_norm": 1.594496529033299, "learning_rate": 9.916072935268064e-06, "loss": 0.4231, "step": 5179 }, { "epoch": 0.15261552966132905, "grad_norm": 1.650434012532255, "learning_rate": 9.915979084737629e-06, "loss": 0.5098, "step": 5180 }, { "epoch": 0.15264499211879262, "grad_norm": 1.7658634726168878, "learning_rate": 9.915885182207463e-06, "loss": 0.5776, "step": 5181 }, { "epoch": 0.1526744545762562, "grad_norm": 1.8381688333081696, "learning_rate": 9.915791227678561e-06, "loss": 0.4767, "step": 5182 }, { "epoch": 0.15270391703371977, "grad_norm": 1.6007549578552498, "learning_rate": 9.915697221151921e-06, "loss": 0.5827, "step": 5183 }, { "epoch": 0.15273337949118335, "grad_norm": 1.8006196837759332, "learning_rate": 9.91560316262853e-06, "loss": 0.6054, "step": 5184 }, { "epoch": 0.15276284194864695, "grad_norm": 1.887589312812375, "learning_rate": 9.91550905210939e-06, "loss": 0.4423, "step": 5185 }, { "epoch": 0.15279230440611052, "grad_norm": 1.9788334271843637, "learning_rate": 9.915414889595492e-06, "loss": 0.5911, "step": 5186 }, { "epoch": 0.1528217668635741, "grad_norm": 1.8628817980400914, "learning_rate": 9.915320675087833e-06, "loss": 0.5432, "step": 5187 }, { "epoch": 0.15285122932103767, "grad_norm": 1.6167745990002607, "learning_rate": 9.91522640858741e-06, "loss": 0.3515, "step": 5188 }, { "epoch": 0.15288069177850125, "grad_norm": 1.687982683973884, "learning_rate": 9.91513209009522e-06, "loss": 0.476, "step": 5189 }, { "epoch": 0.15291015423596482, "grad_norm": 1.5877788921867695, "learning_rate": 9.915037719612262e-06, "loss": 0.4602, "step": 5190 }, { "epoch": 0.1529396166934284, "grad_norm": 1.6345046603185869, "learning_rate": 9.91494329713953e-06, "loss": 0.5866, "step": 5191 }, { "epoch": 0.15296907915089197, "grad_norm": 1.6161274043677722, "learning_rate": 9.914848822678028e-06, "loss": 0.5215, "step": 5192 }, { "epoch": 0.15299854160835555, "grad_norm": 1.6838907746171932, "learning_rate": 9.914754296228753e-06, "loss": 0.5568, "step": 5193 }, { "epoch": 0.15302800406581912, "grad_norm": 1.6050210782163343, "learning_rate": 9.914659717792704e-06, "loss": 0.6285, "step": 5194 }, { "epoch": 0.1530574665232827, "grad_norm": 1.8168911479582386, "learning_rate": 9.914565087370882e-06, "loss": 0.5774, "step": 5195 }, { "epoch": 0.15308692898074627, "grad_norm": 1.9167572866486242, "learning_rate": 9.91447040496429e-06, "loss": 0.7447, "step": 5196 }, { "epoch": 0.15311639143820985, "grad_norm": 1.5644304849227169, "learning_rate": 9.914375670573925e-06, "loss": 0.5069, "step": 5197 }, { "epoch": 0.15314585389567345, "grad_norm": 1.726218644409793, "learning_rate": 9.914280884200795e-06, "loss": 0.479, "step": 5198 }, { "epoch": 0.15317531635313703, "grad_norm": 1.690252539207047, "learning_rate": 9.914186045845898e-06, "loss": 0.498, "step": 5199 }, { "epoch": 0.1532047788106006, "grad_norm": 1.7628096745086057, "learning_rate": 9.914091155510239e-06, "loss": 0.5339, "step": 5200 }, { "epoch": 0.15323424126806418, "grad_norm": 1.6086176852466383, "learning_rate": 9.913996213194821e-06, "loss": 0.488, "step": 5201 }, { "epoch": 0.15326370372552775, "grad_norm": 1.5707055237526637, "learning_rate": 9.913901218900648e-06, "loss": 0.4141, "step": 5202 }, { "epoch": 0.15329316618299133, "grad_norm": 1.896017448251656, "learning_rate": 9.913806172628726e-06, "loss": 0.553, "step": 5203 }, { "epoch": 0.1533226286404549, "grad_norm": 1.7515538777403512, "learning_rate": 9.91371107438006e-06, "loss": 0.5543, "step": 5204 }, { "epoch": 0.15335209109791847, "grad_norm": 1.500091082453514, "learning_rate": 9.913615924155657e-06, "loss": 0.4342, "step": 5205 }, { "epoch": 0.15338155355538205, "grad_norm": 1.6426565248336402, "learning_rate": 9.91352072195652e-06, "loss": 0.493, "step": 5206 }, { "epoch": 0.15341101601284562, "grad_norm": 1.4690013799828812, "learning_rate": 9.91342546778366e-06, "loss": 0.4548, "step": 5207 }, { "epoch": 0.1534404784703092, "grad_norm": 1.5656633357767444, "learning_rate": 9.913330161638082e-06, "loss": 0.5021, "step": 5208 }, { "epoch": 0.15346994092777277, "grad_norm": 1.7719716696441026, "learning_rate": 9.913234803520795e-06, "loss": 0.5926, "step": 5209 }, { "epoch": 0.15349940338523635, "grad_norm": 1.4124783712949407, "learning_rate": 9.913139393432808e-06, "loss": 0.3465, "step": 5210 }, { "epoch": 0.15352886584269995, "grad_norm": 1.8275136982880131, "learning_rate": 9.913043931375129e-06, "loss": 0.5283, "step": 5211 }, { "epoch": 0.15355832830016353, "grad_norm": 1.7732370651803413, "learning_rate": 9.912948417348769e-06, "loss": 0.5612, "step": 5212 }, { "epoch": 0.1535877907576271, "grad_norm": 1.630254732087803, "learning_rate": 9.912852851354738e-06, "loss": 0.5372, "step": 5213 }, { "epoch": 0.15361725321509068, "grad_norm": 1.598507282869076, "learning_rate": 9.912757233394045e-06, "loss": 0.5214, "step": 5214 }, { "epoch": 0.15364671567255425, "grad_norm": 1.7647178228833658, "learning_rate": 9.912661563467707e-06, "loss": 0.523, "step": 5215 }, { "epoch": 0.15367617813001783, "grad_norm": 1.659603808305765, "learning_rate": 9.912565841576728e-06, "loss": 0.5971, "step": 5216 }, { "epoch": 0.1537056405874814, "grad_norm": 1.612331289131772, "learning_rate": 9.912470067722127e-06, "loss": 0.4526, "step": 5217 }, { "epoch": 0.15373510304494498, "grad_norm": 1.7076027950015318, "learning_rate": 9.912374241904914e-06, "loss": 0.6324, "step": 5218 }, { "epoch": 0.15376456550240855, "grad_norm": 1.5879700414287465, "learning_rate": 9.912278364126104e-06, "loss": 0.354, "step": 5219 }, { "epoch": 0.15379402795987213, "grad_norm": 1.7622913720320565, "learning_rate": 9.91218243438671e-06, "loss": 0.5767, "step": 5220 }, { "epoch": 0.1538234904173357, "grad_norm": 1.988881385662343, "learning_rate": 9.91208645268775e-06, "loss": 0.7185, "step": 5221 }, { "epoch": 0.15385295287479928, "grad_norm": 1.7592090788961587, "learning_rate": 9.911990419030233e-06, "loss": 0.5604, "step": 5222 }, { "epoch": 0.15388241533226285, "grad_norm": 1.7418162606089007, "learning_rate": 9.91189433341518e-06, "loss": 0.5916, "step": 5223 }, { "epoch": 0.15391187778972645, "grad_norm": 1.6693345781797109, "learning_rate": 9.911798195843604e-06, "loss": 0.586, "step": 5224 }, { "epoch": 0.15394134024719003, "grad_norm": 1.6642582981672887, "learning_rate": 9.911702006316525e-06, "loss": 0.5267, "step": 5225 }, { "epoch": 0.1539708027046536, "grad_norm": 1.8031376778326293, "learning_rate": 9.91160576483496e-06, "loss": 0.4688, "step": 5226 }, { "epoch": 0.15400026516211718, "grad_norm": 1.6864694922202086, "learning_rate": 9.911509471399925e-06, "loss": 0.4693, "step": 5227 }, { "epoch": 0.15402972761958075, "grad_norm": 1.5934144654924087, "learning_rate": 9.91141312601244e-06, "loss": 0.3936, "step": 5228 }, { "epoch": 0.15405919007704433, "grad_norm": 1.6821516057836758, "learning_rate": 9.911316728673522e-06, "loss": 0.4641, "step": 5229 }, { "epoch": 0.1540886525345079, "grad_norm": 1.7450644862764966, "learning_rate": 9.911220279384194e-06, "loss": 0.5336, "step": 5230 }, { "epoch": 0.15411811499197148, "grad_norm": 1.9352109879892476, "learning_rate": 9.911123778145474e-06, "loss": 0.5056, "step": 5231 }, { "epoch": 0.15414757744943505, "grad_norm": 1.7389603248387169, "learning_rate": 9.911027224958384e-06, "loss": 0.5516, "step": 5232 }, { "epoch": 0.15417703990689863, "grad_norm": 1.6932690596419555, "learning_rate": 9.910930619823944e-06, "loss": 0.6061, "step": 5233 }, { "epoch": 0.1542065023643622, "grad_norm": 1.7409650327795563, "learning_rate": 9.910833962743178e-06, "loss": 0.4514, "step": 5234 }, { "epoch": 0.15423596482182578, "grad_norm": 1.8955297081482025, "learning_rate": 9.910737253717106e-06, "loss": 0.6647, "step": 5235 }, { "epoch": 0.15426542727928935, "grad_norm": 1.7966478225471585, "learning_rate": 9.91064049274675e-06, "loss": 0.5873, "step": 5236 }, { "epoch": 0.15429488973675295, "grad_norm": 1.7720188126380092, "learning_rate": 9.910543679833138e-06, "loss": 0.4347, "step": 5237 }, { "epoch": 0.15432435219421653, "grad_norm": 1.7034754444979592, "learning_rate": 9.91044681497729e-06, "loss": 0.6459, "step": 5238 }, { "epoch": 0.1543538146516801, "grad_norm": 1.6788606539749151, "learning_rate": 9.910349898180234e-06, "loss": 0.4075, "step": 5239 }, { "epoch": 0.15438327710914368, "grad_norm": 1.6388202661465001, "learning_rate": 9.910252929442991e-06, "loss": 0.3606, "step": 5240 }, { "epoch": 0.15441273956660725, "grad_norm": 1.568375677412281, "learning_rate": 9.910155908766591e-06, "loss": 0.5531, "step": 5241 }, { "epoch": 0.15444220202407083, "grad_norm": 1.897310281260199, "learning_rate": 9.910058836152057e-06, "loss": 0.5726, "step": 5242 }, { "epoch": 0.1544716644815344, "grad_norm": 1.5165389343417723, "learning_rate": 9.909961711600417e-06, "loss": 0.4716, "step": 5243 }, { "epoch": 0.15450112693899798, "grad_norm": 1.695526510071288, "learning_rate": 9.9098645351127e-06, "loss": 0.6365, "step": 5244 }, { "epoch": 0.15453058939646155, "grad_norm": 1.5892788351855114, "learning_rate": 9.90976730668993e-06, "loss": 0.5803, "step": 5245 }, { "epoch": 0.15456005185392513, "grad_norm": 1.5665611736229674, "learning_rate": 9.909670026333139e-06, "loss": 0.4987, "step": 5246 }, { "epoch": 0.1545895143113887, "grad_norm": 1.7229604782495291, "learning_rate": 9.909572694043355e-06, "loss": 0.5258, "step": 5247 }, { "epoch": 0.15461897676885228, "grad_norm": 1.8505779052065936, "learning_rate": 9.909475309821607e-06, "loss": 0.6782, "step": 5248 }, { "epoch": 0.15464843922631585, "grad_norm": 1.6669146313733574, "learning_rate": 9.909377873668924e-06, "loss": 0.584, "step": 5249 }, { "epoch": 0.15467790168377946, "grad_norm": 1.8860148210559484, "learning_rate": 9.909280385586339e-06, "loss": 0.6649, "step": 5250 }, { "epoch": 0.15470736414124303, "grad_norm": 1.6822404727691709, "learning_rate": 9.909182845574882e-06, "loss": 0.5195, "step": 5251 }, { "epoch": 0.1547368265987066, "grad_norm": 1.4415689929716298, "learning_rate": 9.909085253635587e-06, "loss": 0.4878, "step": 5252 }, { "epoch": 0.15476628905617018, "grad_norm": 1.7960185446339858, "learning_rate": 9.908987609769483e-06, "loss": 0.5127, "step": 5253 }, { "epoch": 0.15479575151363376, "grad_norm": 1.5209547400482242, "learning_rate": 9.908889913977602e-06, "loss": 0.5922, "step": 5254 }, { "epoch": 0.15482521397109733, "grad_norm": 1.4949147579312225, "learning_rate": 9.908792166260982e-06, "loss": 0.4881, "step": 5255 }, { "epoch": 0.1548546764285609, "grad_norm": 1.6465796794017247, "learning_rate": 9.908694366620655e-06, "loss": 0.5517, "step": 5256 }, { "epoch": 0.15488413888602448, "grad_norm": 1.7354176813128934, "learning_rate": 9.908596515057653e-06, "loss": 0.6825, "step": 5257 }, { "epoch": 0.15491360134348806, "grad_norm": 1.7582914176591993, "learning_rate": 9.908498611573014e-06, "loss": 0.4863, "step": 5258 }, { "epoch": 0.15494306380095163, "grad_norm": 1.7068384216603318, "learning_rate": 9.90840065616777e-06, "loss": 0.5002, "step": 5259 }, { "epoch": 0.1549725262584152, "grad_norm": 1.6160706986968778, "learning_rate": 9.908302648842964e-06, "loss": 0.4832, "step": 5260 }, { "epoch": 0.15500198871587878, "grad_norm": 1.8457409320505405, "learning_rate": 9.908204589599626e-06, "loss": 0.525, "step": 5261 }, { "epoch": 0.15503145117334236, "grad_norm": 1.6721050341186483, "learning_rate": 9.908106478438797e-06, "loss": 0.5562, "step": 5262 }, { "epoch": 0.15506091363080596, "grad_norm": 1.6820144869118974, "learning_rate": 9.908008315361512e-06, "loss": 0.5743, "step": 5263 }, { "epoch": 0.15509037608826953, "grad_norm": 1.6682316759299143, "learning_rate": 9.907910100368811e-06, "loss": 0.5978, "step": 5264 }, { "epoch": 0.1551198385457331, "grad_norm": 1.6761861640185896, "learning_rate": 9.907811833461733e-06, "loss": 0.4167, "step": 5265 }, { "epoch": 0.15514930100319668, "grad_norm": 2.1803679173379136, "learning_rate": 9.907713514641319e-06, "loss": 0.5988, "step": 5266 }, { "epoch": 0.15517876346066026, "grad_norm": 1.6170935358586238, "learning_rate": 9.907615143908605e-06, "loss": 0.4894, "step": 5267 }, { "epoch": 0.15520822591812383, "grad_norm": 1.7171090779136844, "learning_rate": 9.907516721264635e-06, "loss": 0.4967, "step": 5268 }, { "epoch": 0.1552376883755874, "grad_norm": 1.5383677232162494, "learning_rate": 9.907418246710447e-06, "loss": 0.5138, "step": 5269 }, { "epoch": 0.15526715083305098, "grad_norm": 1.8797709500782795, "learning_rate": 9.907319720247083e-06, "loss": 0.5358, "step": 5270 }, { "epoch": 0.15529661329051456, "grad_norm": 1.5880276382289351, "learning_rate": 9.90722114187559e-06, "loss": 0.6047, "step": 5271 }, { "epoch": 0.15532607574797813, "grad_norm": 1.708465617739589, "learning_rate": 9.907122511597008e-06, "loss": 0.5957, "step": 5272 }, { "epoch": 0.1553555382054417, "grad_norm": 1.7110419741085607, "learning_rate": 9.907023829412376e-06, "loss": 0.4798, "step": 5273 }, { "epoch": 0.15538500066290528, "grad_norm": 1.716103350479355, "learning_rate": 9.906925095322743e-06, "loss": 0.5364, "step": 5274 }, { "epoch": 0.15541446312036886, "grad_norm": 1.6840760908429713, "learning_rate": 9.906826309329152e-06, "loss": 0.4823, "step": 5275 }, { "epoch": 0.15544392557783246, "grad_norm": 1.5848555407942146, "learning_rate": 9.906727471432646e-06, "loss": 0.518, "step": 5276 }, { "epoch": 0.15547338803529603, "grad_norm": 1.6045491313892326, "learning_rate": 9.906628581634275e-06, "loss": 0.5088, "step": 5277 }, { "epoch": 0.1555028504927596, "grad_norm": 1.8027607916181383, "learning_rate": 9.90652963993508e-06, "loss": 0.6789, "step": 5278 }, { "epoch": 0.15553231295022318, "grad_norm": 1.607331379590128, "learning_rate": 9.906430646336111e-06, "loss": 0.4995, "step": 5279 }, { "epoch": 0.15556177540768676, "grad_norm": 1.8434497251368829, "learning_rate": 9.906331600838413e-06, "loss": 0.6378, "step": 5280 }, { "epoch": 0.15559123786515033, "grad_norm": 1.618425782755491, "learning_rate": 9.906232503443035e-06, "loss": 0.5814, "step": 5281 }, { "epoch": 0.1556207003226139, "grad_norm": 1.7483408239249487, "learning_rate": 9.906133354151026e-06, "loss": 0.4928, "step": 5282 }, { "epoch": 0.15565016278007748, "grad_norm": 1.6326859654548203, "learning_rate": 9.906034152963431e-06, "loss": 0.557, "step": 5283 }, { "epoch": 0.15567962523754106, "grad_norm": 1.81195886529633, "learning_rate": 9.905934899881304e-06, "loss": 0.6108, "step": 5284 }, { "epoch": 0.15570908769500463, "grad_norm": 1.6750866131804472, "learning_rate": 9.905835594905693e-06, "loss": 0.5793, "step": 5285 }, { "epoch": 0.1557385501524682, "grad_norm": 1.48945488044778, "learning_rate": 9.905736238037647e-06, "loss": 0.4177, "step": 5286 }, { "epoch": 0.15576801260993178, "grad_norm": 1.6192396802394353, "learning_rate": 9.905636829278217e-06, "loss": 0.6129, "step": 5287 }, { "epoch": 0.15579747506739536, "grad_norm": 1.5037282988831804, "learning_rate": 9.905537368628459e-06, "loss": 0.4046, "step": 5288 }, { "epoch": 0.15582693752485896, "grad_norm": 1.8579920545746271, "learning_rate": 9.90543785608942e-06, "loss": 0.6436, "step": 5289 }, { "epoch": 0.15585639998232254, "grad_norm": 1.8142018269970417, "learning_rate": 9.905338291662154e-06, "loss": 0.565, "step": 5290 }, { "epoch": 0.1558858624397861, "grad_norm": 1.5780740193288771, "learning_rate": 9.905238675347714e-06, "loss": 0.4886, "step": 5291 }, { "epoch": 0.15591532489724969, "grad_norm": 1.8525666208599427, "learning_rate": 9.905139007147155e-06, "loss": 0.6069, "step": 5292 }, { "epoch": 0.15594478735471326, "grad_norm": 1.9115025766844083, "learning_rate": 9.90503928706153e-06, "loss": 0.625, "step": 5293 }, { "epoch": 0.15597424981217684, "grad_norm": 1.607977343937726, "learning_rate": 9.904939515091895e-06, "loss": 0.4645, "step": 5294 }, { "epoch": 0.1560037122696404, "grad_norm": 1.7225841635625863, "learning_rate": 9.904839691239304e-06, "loss": 0.4323, "step": 5295 }, { "epoch": 0.15603317472710398, "grad_norm": 1.6226593861336238, "learning_rate": 9.904739815504814e-06, "loss": 0.4738, "step": 5296 }, { "epoch": 0.15606263718456756, "grad_norm": 1.4211548672878855, "learning_rate": 9.90463988788948e-06, "loss": 0.4237, "step": 5297 }, { "epoch": 0.15609209964203113, "grad_norm": 1.459035580357857, "learning_rate": 9.904539908394362e-06, "loss": 0.4493, "step": 5298 }, { "epoch": 0.1561215620994947, "grad_norm": 1.8358872825371548, "learning_rate": 9.904439877020514e-06, "loss": 0.701, "step": 5299 }, { "epoch": 0.15615102455695828, "grad_norm": 1.656481810678411, "learning_rate": 9.904339793768996e-06, "loss": 0.476, "step": 5300 }, { "epoch": 0.15618048701442186, "grad_norm": 1.86589479754916, "learning_rate": 9.904239658640866e-06, "loss": 0.5619, "step": 5301 }, { "epoch": 0.15620994947188546, "grad_norm": 1.5640810624830088, "learning_rate": 9.904139471637183e-06, "loss": 0.5443, "step": 5302 }, { "epoch": 0.15623941192934904, "grad_norm": 1.5289189948566932, "learning_rate": 9.904039232759008e-06, "loss": 0.5065, "step": 5303 }, { "epoch": 0.1562688743868126, "grad_norm": 1.820824778365909, "learning_rate": 9.903938942007402e-06, "loss": 0.6367, "step": 5304 }, { "epoch": 0.1562983368442762, "grad_norm": 1.7384297181019939, "learning_rate": 9.903838599383423e-06, "loss": 0.6178, "step": 5305 }, { "epoch": 0.15632779930173976, "grad_norm": 1.7167647758211857, "learning_rate": 9.903738204888134e-06, "loss": 0.5982, "step": 5306 }, { "epoch": 0.15635726175920334, "grad_norm": 1.9202330632322093, "learning_rate": 9.903637758522596e-06, "loss": 0.4572, "step": 5307 }, { "epoch": 0.1563867242166669, "grad_norm": 1.9347027938178194, "learning_rate": 9.903537260287874e-06, "loss": 0.559, "step": 5308 }, { "epoch": 0.1564161866741305, "grad_norm": 1.516841880246677, "learning_rate": 9.903436710185028e-06, "loss": 0.455, "step": 5309 }, { "epoch": 0.15644564913159406, "grad_norm": 1.7477687693140673, "learning_rate": 9.903336108215122e-06, "loss": 0.5034, "step": 5310 }, { "epoch": 0.15647511158905764, "grad_norm": 1.7620231153118062, "learning_rate": 9.903235454379221e-06, "loss": 0.5844, "step": 5311 }, { "epoch": 0.1565045740465212, "grad_norm": 1.4633155114858492, "learning_rate": 9.903134748678392e-06, "loss": 0.4611, "step": 5312 }, { "epoch": 0.15653403650398479, "grad_norm": 1.7271736247052716, "learning_rate": 9.903033991113696e-06, "loss": 0.6801, "step": 5313 }, { "epoch": 0.15656349896144836, "grad_norm": 1.6922931480622907, "learning_rate": 9.9029331816862e-06, "loss": 0.6469, "step": 5314 }, { "epoch": 0.15659296141891196, "grad_norm": 1.5222867972074232, "learning_rate": 9.902832320396974e-06, "loss": 0.4298, "step": 5315 }, { "epoch": 0.15662242387637554, "grad_norm": 1.9299884603128523, "learning_rate": 9.902731407247079e-06, "loss": 0.5566, "step": 5316 }, { "epoch": 0.1566518863338391, "grad_norm": 1.8636273264846155, "learning_rate": 9.902630442237587e-06, "loss": 0.6226, "step": 5317 }, { "epoch": 0.1566813487913027, "grad_norm": 2.080342105110987, "learning_rate": 9.902529425369563e-06, "loss": 0.5934, "step": 5318 }, { "epoch": 0.15671081124876626, "grad_norm": 2.0200485861822863, "learning_rate": 9.902428356644078e-06, "loss": 0.4653, "step": 5319 }, { "epoch": 0.15674027370622984, "grad_norm": 1.5431356572816768, "learning_rate": 9.9023272360622e-06, "loss": 0.4806, "step": 5320 }, { "epoch": 0.1567697361636934, "grad_norm": 1.5684349656252634, "learning_rate": 9.902226063624998e-06, "loss": 0.535, "step": 5321 }, { "epoch": 0.156799198621157, "grad_norm": 1.6485654955849924, "learning_rate": 9.902124839333542e-06, "loss": 0.5539, "step": 5322 }, { "epoch": 0.15682866107862056, "grad_norm": 1.6326498629019113, "learning_rate": 9.902023563188906e-06, "loss": 0.495, "step": 5323 }, { "epoch": 0.15685812353608414, "grad_norm": 1.5824840886517206, "learning_rate": 9.901922235192155e-06, "loss": 0.2674, "step": 5324 }, { "epoch": 0.1568875859935477, "grad_norm": 1.5770785864531613, "learning_rate": 9.901820855344367e-06, "loss": 0.5062, "step": 5325 }, { "epoch": 0.1569170484510113, "grad_norm": 1.6261036683618628, "learning_rate": 9.90171942364661e-06, "loss": 0.3412, "step": 5326 }, { "epoch": 0.15694651090847486, "grad_norm": 1.6762480978289558, "learning_rate": 9.90161794009996e-06, "loss": 0.5041, "step": 5327 }, { "epoch": 0.15697597336593846, "grad_norm": 1.5144198464878833, "learning_rate": 9.901516404705488e-06, "loss": 0.4361, "step": 5328 }, { "epoch": 0.15700543582340204, "grad_norm": 1.83668024116264, "learning_rate": 9.901414817464272e-06, "loss": 0.7516, "step": 5329 }, { "epoch": 0.15703489828086561, "grad_norm": 1.513624399806271, "learning_rate": 9.901313178377383e-06, "loss": 0.458, "step": 5330 }, { "epoch": 0.1570643607383292, "grad_norm": 1.601809145641656, "learning_rate": 9.901211487445897e-06, "loss": 0.4071, "step": 5331 }, { "epoch": 0.15709382319579276, "grad_norm": 1.8356431985101616, "learning_rate": 9.901109744670886e-06, "loss": 0.6247, "step": 5332 }, { "epoch": 0.15712328565325634, "grad_norm": 1.594878986394632, "learning_rate": 9.901007950053434e-06, "loss": 0.5177, "step": 5333 }, { "epoch": 0.15715274811071991, "grad_norm": 1.5593396265195438, "learning_rate": 9.900906103594611e-06, "loss": 0.4975, "step": 5334 }, { "epoch": 0.1571822105681835, "grad_norm": 1.657728070931089, "learning_rate": 9.900804205295498e-06, "loss": 0.5655, "step": 5335 }, { "epoch": 0.15721167302564706, "grad_norm": 1.5426225434629748, "learning_rate": 9.900702255157173e-06, "loss": 0.4139, "step": 5336 }, { "epoch": 0.15724113548311064, "grad_norm": 1.7152121286805926, "learning_rate": 9.90060025318071e-06, "loss": 0.5375, "step": 5337 }, { "epoch": 0.1572705979405742, "grad_norm": 1.7172054138973714, "learning_rate": 9.900498199367194e-06, "loss": 0.5936, "step": 5338 }, { "epoch": 0.1573000603980378, "grad_norm": 1.6223888919646725, "learning_rate": 9.9003960937177e-06, "loss": 0.4024, "step": 5339 }, { "epoch": 0.15732952285550136, "grad_norm": 1.6952786387259222, "learning_rate": 9.90029393623331e-06, "loss": 0.633, "step": 5340 }, { "epoch": 0.15735898531296497, "grad_norm": 1.5477051197489733, "learning_rate": 9.900191726915104e-06, "loss": 0.4595, "step": 5341 }, { "epoch": 0.15738844777042854, "grad_norm": 1.7146202248382174, "learning_rate": 9.900089465764163e-06, "loss": 0.5739, "step": 5342 }, { "epoch": 0.15741791022789212, "grad_norm": 1.7119179512902547, "learning_rate": 9.89998715278157e-06, "loss": 0.4163, "step": 5343 }, { "epoch": 0.1574473726853557, "grad_norm": 1.877102181820925, "learning_rate": 9.899884787968406e-06, "loss": 0.559, "step": 5344 }, { "epoch": 0.15747683514281927, "grad_norm": 1.9885608003138193, "learning_rate": 9.899782371325753e-06, "loss": 0.6205, "step": 5345 }, { "epoch": 0.15750629760028284, "grad_norm": 1.956750719578513, "learning_rate": 9.899679902854696e-06, "loss": 0.5478, "step": 5346 }, { "epoch": 0.15753576005774642, "grad_norm": 1.9916475023032636, "learning_rate": 9.899577382556318e-06, "loss": 0.6035, "step": 5347 }, { "epoch": 0.15756522251521, "grad_norm": 1.7175757747047966, "learning_rate": 9.899474810431703e-06, "loss": 0.6303, "step": 5348 }, { "epoch": 0.15759468497267357, "grad_norm": 1.7817593981314677, "learning_rate": 9.899372186481937e-06, "loss": 0.6234, "step": 5349 }, { "epoch": 0.15762414743013714, "grad_norm": 2.5679456190161134, "learning_rate": 9.899269510708104e-06, "loss": 0.6005, "step": 5350 }, { "epoch": 0.15765360988760072, "grad_norm": 1.6022129258162887, "learning_rate": 9.899166783111293e-06, "loss": 0.5893, "step": 5351 }, { "epoch": 0.1576830723450643, "grad_norm": 2.062665643495655, "learning_rate": 9.899064003692589e-06, "loss": 0.5784, "step": 5352 }, { "epoch": 0.15771253480252787, "grad_norm": 1.8178373498122813, "learning_rate": 9.898961172453079e-06, "loss": 0.5768, "step": 5353 }, { "epoch": 0.15774199725999147, "grad_norm": 1.5745870379781584, "learning_rate": 9.89885828939385e-06, "loss": 0.414, "step": 5354 }, { "epoch": 0.15777145971745504, "grad_norm": 1.6068239111086642, "learning_rate": 9.898755354515992e-06, "loss": 0.6022, "step": 5355 }, { "epoch": 0.15780092217491862, "grad_norm": 1.8316453414963638, "learning_rate": 9.898652367820592e-06, "loss": 0.6252, "step": 5356 }, { "epoch": 0.1578303846323822, "grad_norm": 1.7071693804047567, "learning_rate": 9.89854932930874e-06, "loss": 0.5575, "step": 5357 }, { "epoch": 0.15785984708984577, "grad_norm": 1.7114155838488996, "learning_rate": 9.898446238981524e-06, "loss": 0.5346, "step": 5358 }, { "epoch": 0.15788930954730934, "grad_norm": 1.8296699174763134, "learning_rate": 9.898343096840041e-06, "loss": 0.7291, "step": 5359 }, { "epoch": 0.15791877200477292, "grad_norm": 1.89341610141862, "learning_rate": 9.898239902885374e-06, "loss": 0.5473, "step": 5360 }, { "epoch": 0.1579482344622365, "grad_norm": 1.8275649403958854, "learning_rate": 9.89813665711862e-06, "loss": 0.5258, "step": 5361 }, { "epoch": 0.15797769691970007, "grad_norm": 1.540846162930785, "learning_rate": 9.898033359540869e-06, "loss": 0.5129, "step": 5362 }, { "epoch": 0.15800715937716364, "grad_norm": 1.7195278565122234, "learning_rate": 9.897930010153214e-06, "loss": 0.6053, "step": 5363 }, { "epoch": 0.15803662183462722, "grad_norm": 1.7154134190116108, "learning_rate": 9.897826608956746e-06, "loss": 0.5595, "step": 5364 }, { "epoch": 0.1580660842920908, "grad_norm": 1.5504709788048319, "learning_rate": 9.897723155952563e-06, "loss": 0.3805, "step": 5365 }, { "epoch": 0.15809554674955437, "grad_norm": 1.7779394639535353, "learning_rate": 9.897619651141757e-06, "loss": 0.4607, "step": 5366 }, { "epoch": 0.15812500920701797, "grad_norm": 1.8308696110208766, "learning_rate": 9.897516094525421e-06, "loss": 0.5965, "step": 5367 }, { "epoch": 0.15815447166448154, "grad_norm": 1.660723424372387, "learning_rate": 9.897412486104654e-06, "loss": 0.5954, "step": 5368 }, { "epoch": 0.15818393412194512, "grad_norm": 1.6073707476393917, "learning_rate": 9.897308825880549e-06, "loss": 0.426, "step": 5369 }, { "epoch": 0.1582133965794087, "grad_norm": 1.7901362052498824, "learning_rate": 9.897205113854206e-06, "loss": 0.5121, "step": 5370 }, { "epoch": 0.15824285903687227, "grad_norm": 1.6635267484342184, "learning_rate": 9.89710135002672e-06, "loss": 0.5436, "step": 5371 }, { "epoch": 0.15827232149433584, "grad_norm": 1.7056094774927522, "learning_rate": 9.896997534399184e-06, "loss": 0.6098, "step": 5372 }, { "epoch": 0.15830178395179942, "grad_norm": 1.6526236123784956, "learning_rate": 9.896893666972705e-06, "loss": 0.4146, "step": 5373 }, { "epoch": 0.158331246409263, "grad_norm": 1.5668025766655014, "learning_rate": 9.896789747748375e-06, "loss": 0.4762, "step": 5374 }, { "epoch": 0.15836070886672657, "grad_norm": 1.7941292869630348, "learning_rate": 9.896685776727295e-06, "loss": 0.5136, "step": 5375 }, { "epoch": 0.15839017132419014, "grad_norm": 1.6102287719362234, "learning_rate": 9.896581753910566e-06, "loss": 0.5261, "step": 5376 }, { "epoch": 0.15841963378165372, "grad_norm": 1.7652467152123525, "learning_rate": 9.896477679299288e-06, "loss": 0.489, "step": 5377 }, { "epoch": 0.1584490962391173, "grad_norm": 1.6259884123151405, "learning_rate": 9.89637355289456e-06, "loss": 0.4902, "step": 5378 }, { "epoch": 0.1584785586965809, "grad_norm": 1.670640060749559, "learning_rate": 9.896269374697486e-06, "loss": 0.6463, "step": 5379 }, { "epoch": 0.15850802115404447, "grad_norm": 1.647278557975094, "learning_rate": 9.896165144709166e-06, "loss": 0.5045, "step": 5380 }, { "epoch": 0.15853748361150805, "grad_norm": 1.6882145952032948, "learning_rate": 9.896060862930704e-06, "loss": 0.4479, "step": 5381 }, { "epoch": 0.15856694606897162, "grad_norm": 1.7748307889243917, "learning_rate": 9.895956529363203e-06, "loss": 0.5343, "step": 5382 }, { "epoch": 0.1585964085264352, "grad_norm": 1.5190871964944421, "learning_rate": 9.895852144007764e-06, "loss": 0.4662, "step": 5383 }, { "epoch": 0.15862587098389877, "grad_norm": 1.8175117481145777, "learning_rate": 9.895747706865492e-06, "loss": 0.5202, "step": 5384 }, { "epoch": 0.15865533344136235, "grad_norm": 2.0391858987865668, "learning_rate": 9.895643217937496e-06, "loss": 0.6504, "step": 5385 }, { "epoch": 0.15868479589882592, "grad_norm": 1.7013831796331051, "learning_rate": 9.895538677224877e-06, "loss": 0.5547, "step": 5386 }, { "epoch": 0.1587142583562895, "grad_norm": 1.5351467321201269, "learning_rate": 9.895434084728741e-06, "loss": 0.5232, "step": 5387 }, { "epoch": 0.15874372081375307, "grad_norm": 1.5035373362457896, "learning_rate": 9.895329440450196e-06, "loss": 0.515, "step": 5388 }, { "epoch": 0.15877318327121664, "grad_norm": 1.9464126180093133, "learning_rate": 9.895224744390349e-06, "loss": 0.5117, "step": 5389 }, { "epoch": 0.15880264572868022, "grad_norm": 1.713834173586567, "learning_rate": 9.895119996550303e-06, "loss": 0.615, "step": 5390 }, { "epoch": 0.1588321081861438, "grad_norm": 1.8200795051012055, "learning_rate": 9.895015196931173e-06, "loss": 0.5538, "step": 5391 }, { "epoch": 0.1588615706436074, "grad_norm": 1.7461337216899226, "learning_rate": 9.894910345534063e-06, "loss": 0.6676, "step": 5392 }, { "epoch": 0.15889103310107097, "grad_norm": 1.722131991484338, "learning_rate": 9.894805442360083e-06, "loss": 0.551, "step": 5393 }, { "epoch": 0.15892049555853455, "grad_norm": 1.5893445975137421, "learning_rate": 9.894700487410343e-06, "loss": 0.5545, "step": 5394 }, { "epoch": 0.15894995801599812, "grad_norm": 1.7654055091462126, "learning_rate": 9.894595480685954e-06, "loss": 0.5515, "step": 5395 }, { "epoch": 0.1589794204734617, "grad_norm": 1.8321431065954576, "learning_rate": 9.894490422188024e-06, "loss": 0.6175, "step": 5396 }, { "epoch": 0.15900888293092527, "grad_norm": 1.7380561592932064, "learning_rate": 9.894385311917666e-06, "loss": 0.5864, "step": 5397 }, { "epoch": 0.15903834538838885, "grad_norm": 1.7071811583835437, "learning_rate": 9.894280149875991e-06, "loss": 0.4839, "step": 5398 }, { "epoch": 0.15906780784585242, "grad_norm": 1.5564307886707098, "learning_rate": 9.894174936064114e-06, "loss": 0.5401, "step": 5399 }, { "epoch": 0.159097270303316, "grad_norm": 1.7839390343532657, "learning_rate": 9.894069670483147e-06, "loss": 0.6265, "step": 5400 }, { "epoch": 0.15912673276077957, "grad_norm": 1.6931850611961936, "learning_rate": 9.8939643531342e-06, "loss": 0.4221, "step": 5401 }, { "epoch": 0.15915619521824315, "grad_norm": 1.6447225811573787, "learning_rate": 9.893858984018389e-06, "loss": 0.5333, "step": 5402 }, { "epoch": 0.15918565767570672, "grad_norm": 1.6415574804591242, "learning_rate": 9.893753563136832e-06, "loss": 0.4456, "step": 5403 }, { "epoch": 0.1592151201331703, "grad_norm": 1.7786448640943584, "learning_rate": 9.89364809049064e-06, "loss": 0.5448, "step": 5404 }, { "epoch": 0.1592445825906339, "grad_norm": 1.6936959567709573, "learning_rate": 9.893542566080929e-06, "loss": 0.4169, "step": 5405 }, { "epoch": 0.15927404504809747, "grad_norm": 1.7178108140048074, "learning_rate": 9.893436989908817e-06, "loss": 0.5362, "step": 5406 }, { "epoch": 0.15930350750556105, "grad_norm": 1.702175008074955, "learning_rate": 9.89333136197542e-06, "loss": 0.5369, "step": 5407 }, { "epoch": 0.15933296996302462, "grad_norm": 1.8011810339159013, "learning_rate": 9.893225682281855e-06, "loss": 0.5758, "step": 5408 }, { "epoch": 0.1593624324204882, "grad_norm": 1.6168933715193847, "learning_rate": 9.89311995082924e-06, "loss": 0.5544, "step": 5409 }, { "epoch": 0.15939189487795177, "grad_norm": 1.630682864523785, "learning_rate": 9.893014167618693e-06, "loss": 0.5736, "step": 5410 }, { "epoch": 0.15942135733541535, "grad_norm": 1.6539043083722513, "learning_rate": 9.892908332651333e-06, "loss": 0.5771, "step": 5411 }, { "epoch": 0.15945081979287892, "grad_norm": 1.5612166971074355, "learning_rate": 9.89280244592828e-06, "loss": 0.673, "step": 5412 }, { "epoch": 0.1594802822503425, "grad_norm": 1.5670811822818265, "learning_rate": 9.892696507450654e-06, "loss": 0.4857, "step": 5413 }, { "epoch": 0.15950974470780607, "grad_norm": 1.7427731695348905, "learning_rate": 9.892590517219577e-06, "loss": 0.511, "step": 5414 }, { "epoch": 0.15953920716526965, "grad_norm": 1.6331986982213378, "learning_rate": 9.892484475236167e-06, "loss": 0.5382, "step": 5415 }, { "epoch": 0.15956866962273322, "grad_norm": 1.7088710659445434, "learning_rate": 9.892378381501547e-06, "loss": 0.4965, "step": 5416 }, { "epoch": 0.1595981320801968, "grad_norm": 1.6115473906452922, "learning_rate": 9.89227223601684e-06, "loss": 0.5057, "step": 5417 }, { "epoch": 0.1596275945376604, "grad_norm": 1.4524982300006772, "learning_rate": 9.892166038783169e-06, "loss": 0.4326, "step": 5418 }, { "epoch": 0.15965705699512397, "grad_norm": 1.6456248127679505, "learning_rate": 9.892059789801656e-06, "loss": 0.6687, "step": 5419 }, { "epoch": 0.15968651945258755, "grad_norm": 1.4660626906533074, "learning_rate": 9.891953489073425e-06, "loss": 0.3981, "step": 5420 }, { "epoch": 0.15971598191005112, "grad_norm": 1.974517591567075, "learning_rate": 9.8918471365996e-06, "loss": 0.5662, "step": 5421 }, { "epoch": 0.1597454443675147, "grad_norm": 1.530049790132524, "learning_rate": 9.891740732381307e-06, "loss": 0.3794, "step": 5422 }, { "epoch": 0.15977490682497827, "grad_norm": 1.8073263558356598, "learning_rate": 9.891634276419673e-06, "loss": 0.5405, "step": 5423 }, { "epoch": 0.15980436928244185, "grad_norm": 1.7371893801397356, "learning_rate": 9.891527768715822e-06, "loss": 0.5794, "step": 5424 }, { "epoch": 0.15983383173990542, "grad_norm": 1.6542940228094556, "learning_rate": 9.891421209270881e-06, "loss": 0.5044, "step": 5425 }, { "epoch": 0.159863294197369, "grad_norm": 1.4739263092693273, "learning_rate": 9.891314598085978e-06, "loss": 0.374, "step": 5426 }, { "epoch": 0.15989275665483257, "grad_norm": 1.6668320179653127, "learning_rate": 9.891207935162236e-06, "loss": 0.452, "step": 5427 }, { "epoch": 0.15992221911229615, "grad_norm": 1.6744479835741863, "learning_rate": 9.891101220500791e-06, "loss": 0.5639, "step": 5428 }, { "epoch": 0.15995168156975972, "grad_norm": 1.7839773888191834, "learning_rate": 9.890994454102767e-06, "loss": 0.6563, "step": 5429 }, { "epoch": 0.1599811440272233, "grad_norm": 1.7260431665147928, "learning_rate": 9.890887635969294e-06, "loss": 0.5638, "step": 5430 }, { "epoch": 0.1600106064846869, "grad_norm": 1.6475105800401018, "learning_rate": 9.890780766101503e-06, "loss": 0.5615, "step": 5431 }, { "epoch": 0.16004006894215048, "grad_norm": 1.5823853677981592, "learning_rate": 9.890673844500522e-06, "loss": 0.4626, "step": 5432 }, { "epoch": 0.16006953139961405, "grad_norm": 1.7985601334509183, "learning_rate": 9.890566871167484e-06, "loss": 0.4824, "step": 5433 }, { "epoch": 0.16009899385707763, "grad_norm": 1.5616675626306593, "learning_rate": 9.890459846103519e-06, "loss": 0.4307, "step": 5434 }, { "epoch": 0.1601284563145412, "grad_norm": 1.7373245310937762, "learning_rate": 9.890352769309763e-06, "loss": 0.4766, "step": 5435 }, { "epoch": 0.16015791877200478, "grad_norm": 1.642737562892517, "learning_rate": 9.890245640787344e-06, "loss": 0.5001, "step": 5436 }, { "epoch": 0.16018738122946835, "grad_norm": 1.6797577240182413, "learning_rate": 9.890138460537398e-06, "loss": 0.6038, "step": 5437 }, { "epoch": 0.16021684368693193, "grad_norm": 1.7207423021929604, "learning_rate": 9.890031228561056e-06, "loss": 0.5399, "step": 5438 }, { "epoch": 0.1602463061443955, "grad_norm": 1.67639485231138, "learning_rate": 9.889923944859453e-06, "loss": 0.5719, "step": 5439 }, { "epoch": 0.16027576860185908, "grad_norm": 1.4935328497129576, "learning_rate": 9.88981660943373e-06, "loss": 0.4046, "step": 5440 }, { "epoch": 0.16030523105932265, "grad_norm": 1.8384904061095806, "learning_rate": 9.88970922228501e-06, "loss": 0.4663, "step": 5441 }, { "epoch": 0.16033469351678623, "grad_norm": 1.5598077692586676, "learning_rate": 9.889601783414443e-06, "loss": 0.5361, "step": 5442 }, { "epoch": 0.1603641559742498, "grad_norm": 1.7815924409222965, "learning_rate": 9.889494292823153e-06, "loss": 0.619, "step": 5443 }, { "epoch": 0.1603936184317134, "grad_norm": 2.1595152495062657, "learning_rate": 9.889386750512285e-06, "loss": 0.5909, "step": 5444 }, { "epoch": 0.16042308088917698, "grad_norm": 1.6977546228677278, "learning_rate": 9.889279156482974e-06, "loss": 0.6394, "step": 5445 }, { "epoch": 0.16045254334664055, "grad_norm": 1.8065870794454666, "learning_rate": 9.889171510736358e-06, "loss": 0.4954, "step": 5446 }, { "epoch": 0.16048200580410413, "grad_norm": 1.6276890246787739, "learning_rate": 9.889063813273575e-06, "loss": 0.491, "step": 5447 }, { "epoch": 0.1605114682615677, "grad_norm": 1.9934168557751641, "learning_rate": 9.888956064095767e-06, "loss": 0.6572, "step": 5448 }, { "epoch": 0.16054093071903128, "grad_norm": 1.8460718379053207, "learning_rate": 9.88884826320407e-06, "loss": 0.6712, "step": 5449 }, { "epoch": 0.16057039317649485, "grad_norm": 1.4988886864888473, "learning_rate": 9.888740410599626e-06, "loss": 0.4991, "step": 5450 }, { "epoch": 0.16059985563395843, "grad_norm": 1.4877097828078012, "learning_rate": 9.888632506283576e-06, "loss": 0.6088, "step": 5451 }, { "epoch": 0.160629318091422, "grad_norm": 1.6136140343647805, "learning_rate": 9.88852455025706e-06, "loss": 0.4445, "step": 5452 }, { "epoch": 0.16065878054888558, "grad_norm": 1.7535475075467741, "learning_rate": 9.888416542521224e-06, "loss": 0.6002, "step": 5453 }, { "epoch": 0.16068824300634915, "grad_norm": 1.661198738503848, "learning_rate": 9.888308483077206e-06, "loss": 0.5758, "step": 5454 }, { "epoch": 0.16071770546381273, "grad_norm": 1.6251481238464032, "learning_rate": 9.888200371926151e-06, "loss": 0.5873, "step": 5455 }, { "epoch": 0.1607471679212763, "grad_norm": 1.8658176864183476, "learning_rate": 9.888092209069203e-06, "loss": 0.5289, "step": 5456 }, { "epoch": 0.1607766303787399, "grad_norm": 1.690215420356493, "learning_rate": 9.887983994507503e-06, "loss": 0.5203, "step": 5457 }, { "epoch": 0.16080609283620348, "grad_norm": 1.5115734181384126, "learning_rate": 9.8878757282422e-06, "loss": 0.4687, "step": 5458 }, { "epoch": 0.16083555529366705, "grad_norm": 1.5163664627470745, "learning_rate": 9.887767410274435e-06, "loss": 0.4798, "step": 5459 }, { "epoch": 0.16086501775113063, "grad_norm": 1.663474459684254, "learning_rate": 9.887659040605358e-06, "loss": 0.5627, "step": 5460 }, { "epoch": 0.1608944802085942, "grad_norm": 1.8279154586976185, "learning_rate": 9.887550619236113e-06, "loss": 0.4326, "step": 5461 }, { "epoch": 0.16092394266605778, "grad_norm": 1.6385248554341152, "learning_rate": 9.887442146167847e-06, "loss": 0.4394, "step": 5462 }, { "epoch": 0.16095340512352135, "grad_norm": 1.8364808758440918, "learning_rate": 9.887333621401706e-06, "loss": 0.5206, "step": 5463 }, { "epoch": 0.16098286758098493, "grad_norm": 1.7867732856313494, "learning_rate": 9.887225044938843e-06, "loss": 0.4868, "step": 5464 }, { "epoch": 0.1610123300384485, "grad_norm": 1.4979595797222423, "learning_rate": 9.8871164167804e-06, "loss": 0.4104, "step": 5465 }, { "epoch": 0.16104179249591208, "grad_norm": 1.530815099914064, "learning_rate": 9.887007736927529e-06, "loss": 0.5096, "step": 5466 }, { "epoch": 0.16107125495337565, "grad_norm": 1.537651729407192, "learning_rate": 9.886899005381379e-06, "loss": 0.3785, "step": 5467 }, { "epoch": 0.16110071741083923, "grad_norm": 1.54768522706426, "learning_rate": 9.8867902221431e-06, "loss": 0.4542, "step": 5468 }, { "epoch": 0.1611301798683028, "grad_norm": 1.465423616041854, "learning_rate": 9.886681387213847e-06, "loss": 0.3877, "step": 5469 }, { "epoch": 0.1611596423257664, "grad_norm": 1.541980217375097, "learning_rate": 9.886572500594764e-06, "loss": 0.421, "step": 5470 }, { "epoch": 0.16118910478322998, "grad_norm": 1.7962648683934272, "learning_rate": 9.88646356228701e-06, "loss": 0.6173, "step": 5471 }, { "epoch": 0.16121856724069356, "grad_norm": 1.9015142685638124, "learning_rate": 9.886354572291729e-06, "loss": 0.5547, "step": 5472 }, { "epoch": 0.16124802969815713, "grad_norm": 1.8628314574045959, "learning_rate": 9.886245530610083e-06, "loss": 0.6302, "step": 5473 }, { "epoch": 0.1612774921556207, "grad_norm": 1.805785486772092, "learning_rate": 9.886136437243217e-06, "loss": 0.3546, "step": 5474 }, { "epoch": 0.16130695461308428, "grad_norm": 1.7207829770474572, "learning_rate": 9.886027292192291e-06, "loss": 0.538, "step": 5475 }, { "epoch": 0.16133641707054786, "grad_norm": 1.4488453389499811, "learning_rate": 9.885918095458457e-06, "loss": 0.447, "step": 5476 }, { "epoch": 0.16136587952801143, "grad_norm": 1.564133823410445, "learning_rate": 9.88580884704287e-06, "loss": 0.4701, "step": 5477 }, { "epoch": 0.161395341985475, "grad_norm": 1.6690102109227625, "learning_rate": 9.885699546946687e-06, "loss": 0.3389, "step": 5478 }, { "epoch": 0.16142480444293858, "grad_norm": 1.675828180634911, "learning_rate": 9.885590195171063e-06, "loss": 0.4693, "step": 5479 }, { "epoch": 0.16145426690040215, "grad_norm": 1.6499219044955575, "learning_rate": 9.885480791717155e-06, "loss": 0.5975, "step": 5480 }, { "epoch": 0.16148372935786573, "grad_norm": 1.686613813236822, "learning_rate": 9.885371336586118e-06, "loss": 0.5231, "step": 5481 }, { "epoch": 0.1615131918153293, "grad_norm": 1.7835781621135074, "learning_rate": 9.885261829779115e-06, "loss": 0.7051, "step": 5482 }, { "epoch": 0.1615426542727929, "grad_norm": 1.4126002105735398, "learning_rate": 9.885152271297301e-06, "loss": 0.3575, "step": 5483 }, { "epoch": 0.16157211673025648, "grad_norm": 1.635974802679402, "learning_rate": 9.885042661141832e-06, "loss": 0.4169, "step": 5484 }, { "epoch": 0.16160157918772006, "grad_norm": 1.4726994949720558, "learning_rate": 9.884932999313872e-06, "loss": 0.465, "step": 5485 }, { "epoch": 0.16163104164518363, "grad_norm": 1.8027910444854525, "learning_rate": 9.88482328581458e-06, "loss": 0.5689, "step": 5486 }, { "epoch": 0.1616605041026472, "grad_norm": 1.8802502416540325, "learning_rate": 9.884713520645116e-06, "loss": 0.6876, "step": 5487 }, { "epoch": 0.16168996656011078, "grad_norm": 1.8140617126844818, "learning_rate": 9.884603703806642e-06, "loss": 0.6122, "step": 5488 }, { "epoch": 0.16171942901757436, "grad_norm": 1.916512770924693, "learning_rate": 9.884493835300315e-06, "loss": 0.6749, "step": 5489 }, { "epoch": 0.16174889147503793, "grad_norm": 1.7995085354643903, "learning_rate": 9.884383915127303e-06, "loss": 0.5146, "step": 5490 }, { "epoch": 0.1617783539325015, "grad_norm": 1.5461215197055886, "learning_rate": 9.884273943288767e-06, "loss": 0.4035, "step": 5491 }, { "epoch": 0.16180781638996508, "grad_norm": 1.7264215263221439, "learning_rate": 9.884163919785869e-06, "loss": 0.5836, "step": 5492 }, { "epoch": 0.16183727884742866, "grad_norm": 2.0890631199518475, "learning_rate": 9.884053844619775e-06, "loss": 0.5767, "step": 5493 }, { "epoch": 0.16186674130489223, "grad_norm": 1.5525496712347089, "learning_rate": 9.883943717791647e-06, "loss": 0.4824, "step": 5494 }, { "epoch": 0.1618962037623558, "grad_norm": 1.541191189050318, "learning_rate": 9.883833539302649e-06, "loss": 0.5423, "step": 5495 }, { "epoch": 0.1619256662198194, "grad_norm": 1.6704531366061182, "learning_rate": 9.883723309153952e-06, "loss": 0.6305, "step": 5496 }, { "epoch": 0.16195512867728298, "grad_norm": 1.8254289251801477, "learning_rate": 9.883613027346715e-06, "loss": 0.6167, "step": 5497 }, { "epoch": 0.16198459113474656, "grad_norm": 1.7865916439730043, "learning_rate": 9.88350269388211e-06, "loss": 0.4862, "step": 5498 }, { "epoch": 0.16201405359221013, "grad_norm": 1.88828302137583, "learning_rate": 9.8833923087613e-06, "loss": 0.4407, "step": 5499 }, { "epoch": 0.1620435160496737, "grad_norm": 1.9636004366193576, "learning_rate": 9.883281871985456e-06, "loss": 0.6966, "step": 5500 }, { "epoch": 0.16207297850713728, "grad_norm": 1.608065458813235, "learning_rate": 9.883171383555742e-06, "loss": 0.4315, "step": 5501 }, { "epoch": 0.16210244096460086, "grad_norm": 1.8969066813964284, "learning_rate": 9.883060843473331e-06, "loss": 0.6751, "step": 5502 }, { "epoch": 0.16213190342206443, "grad_norm": 1.8923613870910978, "learning_rate": 9.882950251739391e-06, "loss": 0.5534, "step": 5503 }, { "epoch": 0.162161365879528, "grad_norm": 1.7890583510108031, "learning_rate": 9.88283960835509e-06, "loss": 0.457, "step": 5504 }, { "epoch": 0.16219082833699158, "grad_norm": 1.9095622057966082, "learning_rate": 9.882728913321601e-06, "loss": 0.3747, "step": 5505 }, { "epoch": 0.16222029079445516, "grad_norm": 1.7809134950547083, "learning_rate": 9.882618166640092e-06, "loss": 0.4853, "step": 5506 }, { "epoch": 0.16224975325191873, "grad_norm": 1.623512402579229, "learning_rate": 9.882507368311738e-06, "loss": 0.4934, "step": 5507 }, { "epoch": 0.1622792157093823, "grad_norm": 1.7403417234449212, "learning_rate": 9.882396518337709e-06, "loss": 0.6385, "step": 5508 }, { "epoch": 0.1623086781668459, "grad_norm": 1.5328927553546168, "learning_rate": 9.882285616719176e-06, "loss": 0.5373, "step": 5509 }, { "epoch": 0.16233814062430948, "grad_norm": 1.8156340111418674, "learning_rate": 9.882174663457315e-06, "loss": 0.6585, "step": 5510 }, { "epoch": 0.16236760308177306, "grad_norm": 1.7785271221433243, "learning_rate": 9.882063658553299e-06, "loss": 0.5278, "step": 5511 }, { "epoch": 0.16239706553923663, "grad_norm": 1.7290726688070823, "learning_rate": 9.8819526020083e-06, "loss": 0.5176, "step": 5512 }, { "epoch": 0.1624265279967002, "grad_norm": 1.7243791838169296, "learning_rate": 9.881841493823494e-06, "loss": 0.6862, "step": 5513 }, { "epoch": 0.16245599045416378, "grad_norm": 1.5503892131001569, "learning_rate": 9.881730334000057e-06, "loss": 0.3907, "step": 5514 }, { "epoch": 0.16248545291162736, "grad_norm": 1.5398899590002344, "learning_rate": 9.881619122539163e-06, "loss": 0.5925, "step": 5515 }, { "epoch": 0.16251491536909093, "grad_norm": 1.6349924190109852, "learning_rate": 9.881507859441993e-06, "loss": 0.391, "step": 5516 }, { "epoch": 0.1625443778265545, "grad_norm": 2.0078883894218698, "learning_rate": 9.881396544709716e-06, "loss": 0.5449, "step": 5517 }, { "epoch": 0.16257384028401808, "grad_norm": 1.5319424157120924, "learning_rate": 9.881285178343518e-06, "loss": 0.5785, "step": 5518 }, { "epoch": 0.16260330274148166, "grad_norm": 1.907423041141043, "learning_rate": 9.88117376034457e-06, "loss": 0.5206, "step": 5519 }, { "epoch": 0.16263276519894523, "grad_norm": 1.698555617574254, "learning_rate": 9.881062290714056e-06, "loss": 0.6081, "step": 5520 }, { "epoch": 0.1626622276564088, "grad_norm": 1.56997589901018, "learning_rate": 9.880950769453151e-06, "loss": 0.5779, "step": 5521 }, { "epoch": 0.1626916901138724, "grad_norm": 1.662176700934883, "learning_rate": 9.880839196563037e-06, "loss": 0.5049, "step": 5522 }, { "epoch": 0.162721152571336, "grad_norm": 1.6959023030356715, "learning_rate": 9.880727572044894e-06, "loss": 0.3386, "step": 5523 }, { "epoch": 0.16275061502879956, "grad_norm": 1.6038864464573122, "learning_rate": 9.880615895899902e-06, "loss": 0.5272, "step": 5524 }, { "epoch": 0.16278007748626314, "grad_norm": 1.5792017422937412, "learning_rate": 9.88050416812924e-06, "loss": 0.5316, "step": 5525 }, { "epoch": 0.1628095399437267, "grad_norm": 1.5625204085083282, "learning_rate": 9.880392388734097e-06, "loss": 0.5621, "step": 5526 }, { "epoch": 0.16283900240119029, "grad_norm": 1.4893699201797008, "learning_rate": 9.880280557715647e-06, "loss": 0.4359, "step": 5527 }, { "epoch": 0.16286846485865386, "grad_norm": 1.5881871300306725, "learning_rate": 9.880168675075077e-06, "loss": 0.5345, "step": 5528 }, { "epoch": 0.16289792731611744, "grad_norm": 1.8775652157174492, "learning_rate": 9.88005674081357e-06, "loss": 0.4757, "step": 5529 }, { "epoch": 0.162927389773581, "grad_norm": 1.8209786818080613, "learning_rate": 9.879944754932311e-06, "loss": 0.57, "step": 5530 }, { "epoch": 0.16295685223104459, "grad_norm": 1.7919653465524052, "learning_rate": 9.879832717432484e-06, "loss": 0.5707, "step": 5531 }, { "epoch": 0.16298631468850816, "grad_norm": 1.6198545437438105, "learning_rate": 9.879720628315272e-06, "loss": 0.444, "step": 5532 }, { "epoch": 0.16301577714597174, "grad_norm": 1.6985170600562831, "learning_rate": 9.879608487581864e-06, "loss": 0.5923, "step": 5533 }, { "epoch": 0.1630452396034353, "grad_norm": 1.5207582779939688, "learning_rate": 9.879496295233444e-06, "loss": 0.4512, "step": 5534 }, { "epoch": 0.1630747020608989, "grad_norm": 1.567193708821216, "learning_rate": 9.8793840512712e-06, "loss": 0.4073, "step": 5535 }, { "epoch": 0.1631041645183625, "grad_norm": 1.5630608792757656, "learning_rate": 9.879271755696315e-06, "loss": 0.4933, "step": 5536 }, { "epoch": 0.16313362697582606, "grad_norm": 1.7960513859618186, "learning_rate": 9.879159408509984e-06, "loss": 0.6353, "step": 5537 }, { "epoch": 0.16316308943328964, "grad_norm": 1.7446302922586248, "learning_rate": 9.879047009713391e-06, "loss": 0.5924, "step": 5538 }, { "epoch": 0.1631925518907532, "grad_norm": 1.690356185507057, "learning_rate": 9.878934559307725e-06, "loss": 0.6151, "step": 5539 }, { "epoch": 0.1632220143482168, "grad_norm": 2.125199757309223, "learning_rate": 9.878822057294177e-06, "loss": 0.6184, "step": 5540 }, { "epoch": 0.16325147680568036, "grad_norm": 1.6640222069591952, "learning_rate": 9.878709503673936e-06, "loss": 0.4819, "step": 5541 }, { "epoch": 0.16328093926314394, "grad_norm": 1.6347172237520515, "learning_rate": 9.878596898448192e-06, "loss": 0.5369, "step": 5542 }, { "epoch": 0.1633104017206075, "grad_norm": 1.704637243472427, "learning_rate": 9.878484241618137e-06, "loss": 0.6077, "step": 5543 }, { "epoch": 0.1633398641780711, "grad_norm": 1.8298718586903722, "learning_rate": 9.878371533184962e-06, "loss": 0.4263, "step": 5544 }, { "epoch": 0.16336932663553466, "grad_norm": 1.8394980323663717, "learning_rate": 9.87825877314986e-06, "loss": 0.6559, "step": 5545 }, { "epoch": 0.16339878909299824, "grad_norm": 1.3699928438494264, "learning_rate": 9.878145961514025e-06, "loss": 0.3707, "step": 5546 }, { "epoch": 0.1634282515504618, "grad_norm": 1.6642475586441319, "learning_rate": 9.878033098278647e-06, "loss": 0.5644, "step": 5547 }, { "epoch": 0.16345771400792541, "grad_norm": 1.6145038335042898, "learning_rate": 9.877920183444923e-06, "loss": 0.5694, "step": 5548 }, { "epoch": 0.163487176465389, "grad_norm": 1.552766889908547, "learning_rate": 9.877807217014044e-06, "loss": 0.5505, "step": 5549 }, { "epoch": 0.16351663892285256, "grad_norm": 1.625040014049261, "learning_rate": 9.877694198987207e-06, "loss": 0.4465, "step": 5550 }, { "epoch": 0.16354610138031614, "grad_norm": 1.6036225493407756, "learning_rate": 9.877581129365607e-06, "loss": 0.4996, "step": 5551 }, { "epoch": 0.1635755638377797, "grad_norm": 1.7175236709583408, "learning_rate": 9.877468008150441e-06, "loss": 0.4852, "step": 5552 }, { "epoch": 0.1636050262952433, "grad_norm": 1.9780834414774662, "learning_rate": 9.877354835342905e-06, "loss": 0.6941, "step": 5553 }, { "epoch": 0.16363448875270686, "grad_norm": 1.6355954568278266, "learning_rate": 9.877241610944197e-06, "loss": 0.6997, "step": 5554 }, { "epoch": 0.16366395121017044, "grad_norm": 1.675232045586392, "learning_rate": 9.877128334955512e-06, "loss": 0.4522, "step": 5555 }, { "epoch": 0.163693413667634, "grad_norm": 1.5742447752539237, "learning_rate": 9.877015007378052e-06, "loss": 0.3917, "step": 5556 }, { "epoch": 0.1637228761250976, "grad_norm": 1.6876291067914004, "learning_rate": 9.876901628213012e-06, "loss": 0.4136, "step": 5557 }, { "epoch": 0.16375233858256116, "grad_norm": 1.8222766911777806, "learning_rate": 9.876788197461593e-06, "loss": 0.6372, "step": 5558 }, { "epoch": 0.16378180104002474, "grad_norm": 1.6805373089158033, "learning_rate": 9.876674715124995e-06, "loss": 0.4962, "step": 5559 }, { "epoch": 0.1638112634974883, "grad_norm": 1.5647394551157339, "learning_rate": 9.876561181204416e-06, "loss": 0.5799, "step": 5560 }, { "epoch": 0.16384072595495192, "grad_norm": 1.7464499870425847, "learning_rate": 9.876447595701061e-06, "loss": 0.5788, "step": 5561 }, { "epoch": 0.1638701884124155, "grad_norm": 1.8015157120617125, "learning_rate": 9.876333958616129e-06, "loss": 0.6464, "step": 5562 }, { "epoch": 0.16389965086987907, "grad_norm": 1.7093420221061295, "learning_rate": 9.876220269950822e-06, "loss": 0.5111, "step": 5563 }, { "epoch": 0.16392911332734264, "grad_norm": 1.7386361960319354, "learning_rate": 9.876106529706345e-06, "loss": 0.6181, "step": 5564 }, { "epoch": 0.16395857578480622, "grad_norm": 1.5310534071060669, "learning_rate": 9.875992737883897e-06, "loss": 0.3795, "step": 5565 }, { "epoch": 0.1639880382422698, "grad_norm": 1.6164672179688848, "learning_rate": 9.875878894484685e-06, "loss": 0.5701, "step": 5566 }, { "epoch": 0.16401750069973337, "grad_norm": 1.7383809696317223, "learning_rate": 9.875764999509912e-06, "loss": 0.4639, "step": 5567 }, { "epoch": 0.16404696315719694, "grad_norm": 1.670725292164863, "learning_rate": 9.875651052960783e-06, "loss": 0.5864, "step": 5568 }, { "epoch": 0.16407642561466051, "grad_norm": 1.7462287300978807, "learning_rate": 9.875537054838502e-06, "loss": 0.4565, "step": 5569 }, { "epoch": 0.1641058880721241, "grad_norm": 1.6549879858520093, "learning_rate": 9.875423005144277e-06, "loss": 0.5413, "step": 5570 }, { "epoch": 0.16413535052958766, "grad_norm": 1.6528478165281322, "learning_rate": 9.875308903879312e-06, "loss": 0.5199, "step": 5571 }, { "epoch": 0.16416481298705124, "grad_norm": 1.6252329413581477, "learning_rate": 9.875194751044816e-06, "loss": 0.5703, "step": 5572 }, { "epoch": 0.16419427544451481, "grad_norm": 1.51164643861089, "learning_rate": 9.875080546641996e-06, "loss": 0.5425, "step": 5573 }, { "epoch": 0.16422373790197842, "grad_norm": 1.711629489235107, "learning_rate": 9.87496629067206e-06, "loss": 0.5851, "step": 5574 }, { "epoch": 0.164253200359442, "grad_norm": 1.902892382826348, "learning_rate": 9.874851983136216e-06, "loss": 0.5784, "step": 5575 }, { "epoch": 0.16428266281690557, "grad_norm": 2.1315743012035506, "learning_rate": 9.874737624035673e-06, "loss": 0.4939, "step": 5576 }, { "epoch": 0.16431212527436914, "grad_norm": 1.7994657832853724, "learning_rate": 9.87462321337164e-06, "loss": 0.4464, "step": 5577 }, { "epoch": 0.16434158773183272, "grad_norm": 1.7176687284968257, "learning_rate": 9.87450875114533e-06, "loss": 0.6256, "step": 5578 }, { "epoch": 0.1643710501892963, "grad_norm": 1.8005478115574955, "learning_rate": 9.874394237357951e-06, "loss": 0.3858, "step": 5579 }, { "epoch": 0.16440051264675987, "grad_norm": 1.952979507014757, "learning_rate": 9.874279672010716e-06, "loss": 0.5928, "step": 5580 }, { "epoch": 0.16442997510422344, "grad_norm": 2.108884396872746, "learning_rate": 9.874165055104835e-06, "loss": 0.6813, "step": 5581 }, { "epoch": 0.16445943756168702, "grad_norm": 1.43490290736714, "learning_rate": 9.874050386641524e-06, "loss": 0.5179, "step": 5582 }, { "epoch": 0.1644889000191506, "grad_norm": 1.4691105430339642, "learning_rate": 9.87393566662199e-06, "loss": 0.464, "step": 5583 }, { "epoch": 0.16451836247661417, "grad_norm": 2.015697221669481, "learning_rate": 9.873820895047452e-06, "loss": 0.7462, "step": 5584 }, { "epoch": 0.16454782493407774, "grad_norm": 1.6670562058487033, "learning_rate": 9.873706071919121e-06, "loss": 0.6708, "step": 5585 }, { "epoch": 0.16457728739154132, "grad_norm": 1.9288344206279084, "learning_rate": 9.873591197238212e-06, "loss": 0.4801, "step": 5586 }, { "epoch": 0.16460674984900492, "grad_norm": 1.5982639199541138, "learning_rate": 9.873476271005942e-06, "loss": 0.4523, "step": 5587 }, { "epoch": 0.1646362123064685, "grad_norm": 1.5383044042357896, "learning_rate": 9.873361293223524e-06, "loss": 0.5321, "step": 5588 }, { "epoch": 0.16466567476393207, "grad_norm": 1.5231895064195233, "learning_rate": 9.873246263892178e-06, "loss": 0.5, "step": 5589 }, { "epoch": 0.16469513722139564, "grad_norm": 1.589561387453745, "learning_rate": 9.873131183013117e-06, "loss": 0.4251, "step": 5590 }, { "epoch": 0.16472459967885922, "grad_norm": 1.5869623270624929, "learning_rate": 9.873016050587558e-06, "loss": 0.5761, "step": 5591 }, { "epoch": 0.1647540621363228, "grad_norm": 1.604259816307401, "learning_rate": 9.872900866616722e-06, "loss": 0.5943, "step": 5592 }, { "epoch": 0.16478352459378637, "grad_norm": 1.8624969757967138, "learning_rate": 9.872785631101823e-06, "loss": 0.5442, "step": 5593 }, { "epoch": 0.16481298705124994, "grad_norm": 1.6036273082931307, "learning_rate": 9.872670344044086e-06, "loss": 0.4734, "step": 5594 }, { "epoch": 0.16484244950871352, "grad_norm": 1.5864332068133247, "learning_rate": 9.872555005444724e-06, "loss": 0.4778, "step": 5595 }, { "epoch": 0.1648719119661771, "grad_norm": 1.8644880735076599, "learning_rate": 9.872439615304962e-06, "loss": 0.6733, "step": 5596 }, { "epoch": 0.16490137442364067, "grad_norm": 1.9261691978380269, "learning_rate": 9.872324173626019e-06, "loss": 0.497, "step": 5597 }, { "epoch": 0.16493083688110424, "grad_norm": 1.6479565611863585, "learning_rate": 9.872208680409114e-06, "loss": 0.6046, "step": 5598 }, { "epoch": 0.16496029933856782, "grad_norm": 1.6759628633035268, "learning_rate": 9.872093135655471e-06, "loss": 0.5706, "step": 5599 }, { "epoch": 0.16498976179603142, "grad_norm": 1.63380108383451, "learning_rate": 9.871977539366313e-06, "loss": 0.5343, "step": 5600 }, { "epoch": 0.165019224253495, "grad_norm": 1.7212257449364847, "learning_rate": 9.871861891542861e-06, "loss": 0.6014, "step": 5601 }, { "epoch": 0.16504868671095857, "grad_norm": 1.6277783374276351, "learning_rate": 9.871746192186338e-06, "loss": 0.5371, "step": 5602 }, { "epoch": 0.16507814916842214, "grad_norm": 1.8102177049616621, "learning_rate": 9.87163044129797e-06, "loss": 0.5529, "step": 5603 }, { "epoch": 0.16510761162588572, "grad_norm": 1.4643830280790815, "learning_rate": 9.871514638878978e-06, "loss": 0.5904, "step": 5604 }, { "epoch": 0.1651370740833493, "grad_norm": 1.444809084473881, "learning_rate": 9.871398784930588e-06, "loss": 0.4989, "step": 5605 }, { "epoch": 0.16516653654081287, "grad_norm": 1.6118661225872668, "learning_rate": 9.87128287945403e-06, "loss": 0.4032, "step": 5606 }, { "epoch": 0.16519599899827644, "grad_norm": 1.9459162777087802, "learning_rate": 9.871166922450523e-06, "loss": 0.5254, "step": 5607 }, { "epoch": 0.16522546145574002, "grad_norm": 1.7483315836505806, "learning_rate": 9.871050913921297e-06, "loss": 0.5292, "step": 5608 }, { "epoch": 0.1652549239132036, "grad_norm": 1.6505952993465625, "learning_rate": 9.87093485386758e-06, "loss": 0.4328, "step": 5609 }, { "epoch": 0.16528438637066717, "grad_norm": 1.6214584217626138, "learning_rate": 9.8708187422906e-06, "loss": 0.5723, "step": 5610 }, { "epoch": 0.16531384882813074, "grad_norm": 1.4579906749165725, "learning_rate": 9.87070257919158e-06, "loss": 0.5073, "step": 5611 }, { "epoch": 0.16534331128559432, "grad_norm": 1.549340560675296, "learning_rate": 9.870586364571754e-06, "loss": 0.4676, "step": 5612 }, { "epoch": 0.16537277374305792, "grad_norm": 1.785221245913295, "learning_rate": 9.870470098432351e-06, "loss": 0.5645, "step": 5613 }, { "epoch": 0.1654022362005215, "grad_norm": 1.5791147341126197, "learning_rate": 9.870353780774599e-06, "loss": 0.4039, "step": 5614 }, { "epoch": 0.16543169865798507, "grad_norm": 1.4574990160903456, "learning_rate": 9.87023741159973e-06, "loss": 0.4651, "step": 5615 }, { "epoch": 0.16546116111544865, "grad_norm": 1.8535141640154993, "learning_rate": 9.870120990908973e-06, "loss": 0.5868, "step": 5616 }, { "epoch": 0.16549062357291222, "grad_norm": 1.462531460909763, "learning_rate": 9.870004518703559e-06, "loss": 0.4582, "step": 5617 }, { "epoch": 0.1655200860303758, "grad_norm": 1.673410776294523, "learning_rate": 9.869887994984724e-06, "loss": 0.6119, "step": 5618 }, { "epoch": 0.16554954848783937, "grad_norm": 1.8658575615552144, "learning_rate": 9.869771419753695e-06, "loss": 0.5224, "step": 5619 }, { "epoch": 0.16557901094530295, "grad_norm": 1.69864710529464, "learning_rate": 9.86965479301171e-06, "loss": 0.5924, "step": 5620 }, { "epoch": 0.16560847340276652, "grad_norm": 2.021985097898214, "learning_rate": 9.86953811476e-06, "loss": 0.4721, "step": 5621 }, { "epoch": 0.1656379358602301, "grad_norm": 1.739227605256562, "learning_rate": 9.869421384999802e-06, "loss": 0.4667, "step": 5622 }, { "epoch": 0.16566739831769367, "grad_norm": 1.7125350688830259, "learning_rate": 9.869304603732345e-06, "loss": 0.6561, "step": 5623 }, { "epoch": 0.16569686077515725, "grad_norm": 1.9519412991657283, "learning_rate": 9.86918777095887e-06, "loss": 0.6247, "step": 5624 }, { "epoch": 0.16572632323262082, "grad_norm": 1.6224632281206888, "learning_rate": 9.869070886680611e-06, "loss": 0.6116, "step": 5625 }, { "epoch": 0.16575578569008442, "grad_norm": 1.9235941865787354, "learning_rate": 9.868953950898804e-06, "loss": 0.7507, "step": 5626 }, { "epoch": 0.165785248147548, "grad_norm": 1.6136719316537502, "learning_rate": 9.868836963614684e-06, "loss": 0.5041, "step": 5627 }, { "epoch": 0.16581471060501157, "grad_norm": 1.7914824871859503, "learning_rate": 9.868719924829493e-06, "loss": 0.5823, "step": 5628 }, { "epoch": 0.16584417306247515, "grad_norm": 1.5451919368165568, "learning_rate": 9.868602834544464e-06, "loss": 0.4288, "step": 5629 }, { "epoch": 0.16587363551993872, "grad_norm": 1.5848955310087285, "learning_rate": 9.86848569276084e-06, "loss": 0.3812, "step": 5630 }, { "epoch": 0.1659030979774023, "grad_norm": 1.5476047866170561, "learning_rate": 9.868368499479856e-06, "loss": 0.5395, "step": 5631 }, { "epoch": 0.16593256043486587, "grad_norm": 1.5231444400015839, "learning_rate": 9.868251254702756e-06, "loss": 0.5383, "step": 5632 }, { "epoch": 0.16596202289232945, "grad_norm": 1.606125490259266, "learning_rate": 9.868133958430777e-06, "loss": 0.5439, "step": 5633 }, { "epoch": 0.16599148534979302, "grad_norm": 1.599661785592392, "learning_rate": 9.868016610665159e-06, "loss": 0.5672, "step": 5634 }, { "epoch": 0.1660209478072566, "grad_norm": 1.741614695956616, "learning_rate": 9.867899211407146e-06, "loss": 0.5406, "step": 5635 }, { "epoch": 0.16605041026472017, "grad_norm": 1.6024145535259946, "learning_rate": 9.867781760657979e-06, "loss": 0.6292, "step": 5636 }, { "epoch": 0.16607987272218375, "grad_norm": 1.595057951875793, "learning_rate": 9.8676642584189e-06, "loss": 0.5842, "step": 5637 }, { "epoch": 0.16610933517964732, "grad_norm": 1.7896242333965946, "learning_rate": 9.867546704691153e-06, "loss": 0.5726, "step": 5638 }, { "epoch": 0.16613879763711092, "grad_norm": 1.900441503336277, "learning_rate": 9.867429099475979e-06, "loss": 0.7083, "step": 5639 }, { "epoch": 0.1661682600945745, "grad_norm": 1.327406472879312, "learning_rate": 9.867311442774623e-06, "loss": 0.3391, "step": 5640 }, { "epoch": 0.16619772255203807, "grad_norm": 1.614943387907772, "learning_rate": 9.867193734588331e-06, "loss": 0.3199, "step": 5641 }, { "epoch": 0.16622718500950165, "grad_norm": 1.8266414802197857, "learning_rate": 9.867075974918348e-06, "loss": 0.5801, "step": 5642 }, { "epoch": 0.16625664746696522, "grad_norm": 1.5625114602969428, "learning_rate": 9.866958163765917e-06, "loss": 0.4597, "step": 5643 }, { "epoch": 0.1662861099244288, "grad_norm": 1.535237158969151, "learning_rate": 9.866840301132286e-06, "loss": 0.5287, "step": 5644 }, { "epoch": 0.16631557238189237, "grad_norm": 1.7243700285879056, "learning_rate": 9.866722387018702e-06, "loss": 0.551, "step": 5645 }, { "epoch": 0.16634503483935595, "grad_norm": 1.7298558259448908, "learning_rate": 9.86660442142641e-06, "loss": 0.5404, "step": 5646 }, { "epoch": 0.16637449729681952, "grad_norm": 1.496907877511877, "learning_rate": 9.86648640435666e-06, "loss": 0.4344, "step": 5647 }, { "epoch": 0.1664039597542831, "grad_norm": 1.6020492706684573, "learning_rate": 9.866368335810701e-06, "loss": 0.5856, "step": 5648 }, { "epoch": 0.16643342221174667, "grad_norm": 1.837286514073408, "learning_rate": 9.86625021578978e-06, "loss": 0.6053, "step": 5649 }, { "epoch": 0.16646288466921025, "grad_norm": 1.6594607029567559, "learning_rate": 9.866132044295149e-06, "loss": 0.5453, "step": 5650 }, { "epoch": 0.16649234712667382, "grad_norm": 1.681386758812013, "learning_rate": 9.866013821328056e-06, "loss": 0.4996, "step": 5651 }, { "epoch": 0.16652180958413743, "grad_norm": 1.5839479985284024, "learning_rate": 9.86589554688975e-06, "loss": 0.492, "step": 5652 }, { "epoch": 0.166551272041601, "grad_norm": 1.6600854345595668, "learning_rate": 9.865777220981485e-06, "loss": 0.5638, "step": 5653 }, { "epoch": 0.16658073449906458, "grad_norm": 1.7555462404573725, "learning_rate": 9.865658843604511e-06, "loss": 0.6634, "step": 5654 }, { "epoch": 0.16661019695652815, "grad_norm": 1.7040482211478625, "learning_rate": 9.865540414760083e-06, "loss": 0.5476, "step": 5655 }, { "epoch": 0.16663965941399173, "grad_norm": 1.5966128229000478, "learning_rate": 9.865421934449447e-06, "loss": 0.566, "step": 5656 }, { "epoch": 0.1666691218714553, "grad_norm": 1.9069011460098406, "learning_rate": 9.865303402673863e-06, "loss": 0.4466, "step": 5657 }, { "epoch": 0.16669858432891888, "grad_norm": 1.632248041371824, "learning_rate": 9.86518481943458e-06, "loss": 0.5866, "step": 5658 }, { "epoch": 0.16672804678638245, "grad_norm": 1.994094010785441, "learning_rate": 9.86506618473286e-06, "loss": 0.7296, "step": 5659 }, { "epoch": 0.16675750924384602, "grad_norm": 1.7468825316214014, "learning_rate": 9.864947498569948e-06, "loss": 0.5714, "step": 5660 }, { "epoch": 0.1667869717013096, "grad_norm": 1.7709173506020286, "learning_rate": 9.864828760947106e-06, "loss": 0.6563, "step": 5661 }, { "epoch": 0.16681643415877317, "grad_norm": 1.8310587321531269, "learning_rate": 9.864709971865587e-06, "loss": 0.6762, "step": 5662 }, { "epoch": 0.16684589661623675, "grad_norm": 1.5990652879443947, "learning_rate": 9.864591131326649e-06, "loss": 0.5387, "step": 5663 }, { "epoch": 0.16687535907370032, "grad_norm": 1.7274412605044198, "learning_rate": 9.864472239331548e-06, "loss": 0.5785, "step": 5664 }, { "epoch": 0.16690482153116393, "grad_norm": 1.7737687089599696, "learning_rate": 9.86435329588154e-06, "loss": 0.6078, "step": 5665 }, { "epoch": 0.1669342839886275, "grad_norm": 1.5546715599456848, "learning_rate": 9.86423430097789e-06, "loss": 0.499, "step": 5666 }, { "epoch": 0.16696374644609108, "grad_norm": 1.6225094314605701, "learning_rate": 9.864115254621848e-06, "loss": 0.496, "step": 5667 }, { "epoch": 0.16699320890355465, "grad_norm": 1.620632712845816, "learning_rate": 9.86399615681468e-06, "loss": 0.5584, "step": 5668 }, { "epoch": 0.16702267136101823, "grad_norm": 1.7784525261613506, "learning_rate": 9.863877007557642e-06, "loss": 0.5379, "step": 5669 }, { "epoch": 0.1670521338184818, "grad_norm": 1.598281250227573, "learning_rate": 9.863757806851996e-06, "loss": 0.4455, "step": 5670 }, { "epoch": 0.16708159627594538, "grad_norm": 1.73607037777611, "learning_rate": 9.863638554699001e-06, "loss": 0.548, "step": 5671 }, { "epoch": 0.16711105873340895, "grad_norm": 2.0653990552713397, "learning_rate": 9.86351925109992e-06, "loss": 0.5484, "step": 5672 }, { "epoch": 0.16714052119087253, "grad_norm": 1.4296951273447707, "learning_rate": 9.863399896056014e-06, "loss": 0.3631, "step": 5673 }, { "epoch": 0.1671699836483361, "grad_norm": 1.6563808348267035, "learning_rate": 9.863280489568548e-06, "loss": 0.3346, "step": 5674 }, { "epoch": 0.16719944610579968, "grad_norm": 1.685815836569984, "learning_rate": 9.863161031638782e-06, "loss": 0.4666, "step": 5675 }, { "epoch": 0.16722890856326325, "grad_norm": 1.6166015949932613, "learning_rate": 9.86304152226798e-06, "loss": 0.5751, "step": 5676 }, { "epoch": 0.16725837102072683, "grad_norm": 1.6407572954500877, "learning_rate": 9.862921961457408e-06, "loss": 0.5834, "step": 5677 }, { "epoch": 0.16728783347819043, "grad_norm": 1.93143968821512, "learning_rate": 9.862802349208328e-06, "loss": 0.4824, "step": 5678 }, { "epoch": 0.167317295935654, "grad_norm": 1.4580245416698736, "learning_rate": 9.862682685522006e-06, "loss": 0.5834, "step": 5679 }, { "epoch": 0.16734675839311758, "grad_norm": 1.6395024875771844, "learning_rate": 9.862562970399712e-06, "loss": 0.4945, "step": 5680 }, { "epoch": 0.16737622085058115, "grad_norm": 1.599532721662106, "learning_rate": 9.862443203842707e-06, "loss": 0.4911, "step": 5681 }, { "epoch": 0.16740568330804473, "grad_norm": 1.480070483445603, "learning_rate": 9.86232338585226e-06, "loss": 0.4291, "step": 5682 }, { "epoch": 0.1674351457655083, "grad_norm": 1.7155399951923491, "learning_rate": 9.862203516429637e-06, "loss": 0.5946, "step": 5683 }, { "epoch": 0.16746460822297188, "grad_norm": 1.7402630747283032, "learning_rate": 9.86208359557611e-06, "loss": 0.5548, "step": 5684 }, { "epoch": 0.16749407068043545, "grad_norm": 1.5956644526072237, "learning_rate": 9.86196362329294e-06, "loss": 0.6254, "step": 5685 }, { "epoch": 0.16752353313789903, "grad_norm": 1.7608064953794187, "learning_rate": 9.861843599581401e-06, "loss": 0.5289, "step": 5686 }, { "epoch": 0.1675529955953626, "grad_norm": 1.804253045209984, "learning_rate": 9.861723524442765e-06, "loss": 0.4876, "step": 5687 }, { "epoch": 0.16758245805282618, "grad_norm": 1.4677127700581762, "learning_rate": 9.8616033978783e-06, "loss": 0.3943, "step": 5688 }, { "epoch": 0.16761192051028975, "grad_norm": 1.5703378231707918, "learning_rate": 9.861483219889273e-06, "loss": 0.492, "step": 5689 }, { "epoch": 0.16764138296775333, "grad_norm": 1.5093719079030288, "learning_rate": 9.86136299047696e-06, "loss": 0.4258, "step": 5690 }, { "epoch": 0.16767084542521693, "grad_norm": 1.5731935807511606, "learning_rate": 9.861242709642629e-06, "loss": 0.5336, "step": 5691 }, { "epoch": 0.1677003078826805, "grad_norm": 1.5843347398324912, "learning_rate": 9.861122377387554e-06, "loss": 0.4625, "step": 5692 }, { "epoch": 0.16772977034014408, "grad_norm": 1.6444973019075657, "learning_rate": 9.861001993713008e-06, "loss": 0.6116, "step": 5693 }, { "epoch": 0.16775923279760765, "grad_norm": 1.684733677707953, "learning_rate": 9.860881558620267e-06, "loss": 0.4732, "step": 5694 }, { "epoch": 0.16778869525507123, "grad_norm": 1.5529769600506302, "learning_rate": 9.8607610721106e-06, "loss": 0.439, "step": 5695 }, { "epoch": 0.1678181577125348, "grad_norm": 1.6810220682231176, "learning_rate": 9.860640534185283e-06, "loss": 0.6049, "step": 5696 }, { "epoch": 0.16784762016999838, "grad_norm": 1.5754435846053134, "learning_rate": 9.860519944845595e-06, "loss": 0.4595, "step": 5697 }, { "epoch": 0.16787708262746195, "grad_norm": 1.8965583931829546, "learning_rate": 9.860399304092806e-06, "loss": 0.5229, "step": 5698 }, { "epoch": 0.16790654508492553, "grad_norm": 1.5654822435991396, "learning_rate": 9.860278611928195e-06, "loss": 0.4774, "step": 5699 }, { "epoch": 0.1679360075423891, "grad_norm": 1.6710848333598693, "learning_rate": 9.860157868353038e-06, "loss": 0.5601, "step": 5700 }, { "epoch": 0.16796546999985268, "grad_norm": 1.7009924856095877, "learning_rate": 9.860037073368612e-06, "loss": 0.5784, "step": 5701 }, { "epoch": 0.16799493245731625, "grad_norm": 2.1595219424845356, "learning_rate": 9.859916226976194e-06, "loss": 0.7749, "step": 5702 }, { "epoch": 0.16802439491477983, "grad_norm": 1.6728148311559887, "learning_rate": 9.859795329177064e-06, "loss": 0.5589, "step": 5703 }, { "epoch": 0.16805385737224343, "grad_norm": 1.6323728906768407, "learning_rate": 9.8596743799725e-06, "loss": 0.4787, "step": 5704 }, { "epoch": 0.168083319829707, "grad_norm": 1.7678551025172893, "learning_rate": 9.859553379363783e-06, "loss": 0.6528, "step": 5705 }, { "epoch": 0.16811278228717058, "grad_norm": 1.704625098265039, "learning_rate": 9.85943232735219e-06, "loss": 0.5953, "step": 5706 }, { "epoch": 0.16814224474463416, "grad_norm": 1.5701639501059592, "learning_rate": 9.859311223939003e-06, "loss": 0.5706, "step": 5707 }, { "epoch": 0.16817170720209773, "grad_norm": 1.7111401968541922, "learning_rate": 9.859190069125503e-06, "loss": 0.5789, "step": 5708 }, { "epoch": 0.1682011696595613, "grad_norm": 1.757522917526578, "learning_rate": 9.859068862912971e-06, "loss": 0.3511, "step": 5709 }, { "epoch": 0.16823063211702488, "grad_norm": 1.6316484798737316, "learning_rate": 9.858947605302688e-06, "loss": 0.4539, "step": 5710 }, { "epoch": 0.16826009457448846, "grad_norm": 1.6257345989818814, "learning_rate": 9.85882629629594e-06, "loss": 0.4678, "step": 5711 }, { "epoch": 0.16828955703195203, "grad_norm": 1.6085234617820112, "learning_rate": 9.858704935894008e-06, "loss": 0.5216, "step": 5712 }, { "epoch": 0.1683190194894156, "grad_norm": 1.6142695488159131, "learning_rate": 9.858583524098178e-06, "loss": 0.5599, "step": 5713 }, { "epoch": 0.16834848194687918, "grad_norm": 1.5633574557809697, "learning_rate": 9.858462060909728e-06, "loss": 0.5088, "step": 5714 }, { "epoch": 0.16837794440434276, "grad_norm": 1.4718241438865207, "learning_rate": 9.85834054632995e-06, "loss": 0.477, "step": 5715 }, { "epoch": 0.16840740686180633, "grad_norm": 1.4961846215337424, "learning_rate": 9.858218980360125e-06, "loss": 0.5178, "step": 5716 }, { "epoch": 0.16843686931926993, "grad_norm": 1.6286225793876135, "learning_rate": 9.85809736300154e-06, "loss": 0.4008, "step": 5717 }, { "epoch": 0.1684663317767335, "grad_norm": 1.509449712531553, "learning_rate": 9.85797569425548e-06, "loss": 0.4373, "step": 5718 }, { "epoch": 0.16849579423419708, "grad_norm": 1.716624070544525, "learning_rate": 9.857853974123238e-06, "loss": 0.5677, "step": 5719 }, { "epoch": 0.16852525669166066, "grad_norm": 1.6885950031173673, "learning_rate": 9.857732202606095e-06, "loss": 0.4563, "step": 5720 }, { "epoch": 0.16855471914912423, "grad_norm": 1.5387669694926083, "learning_rate": 9.85761037970534e-06, "loss": 0.4972, "step": 5721 }, { "epoch": 0.1685841816065878, "grad_norm": 1.7431929617674782, "learning_rate": 9.857488505422265e-06, "loss": 0.4622, "step": 5722 }, { "epoch": 0.16861364406405138, "grad_norm": 1.6892529652014245, "learning_rate": 9.857366579758155e-06, "loss": 0.4717, "step": 5723 }, { "epoch": 0.16864310652151496, "grad_norm": 1.7696944654102555, "learning_rate": 9.857244602714303e-06, "loss": 0.7215, "step": 5724 }, { "epoch": 0.16867256897897853, "grad_norm": 1.911014844884402, "learning_rate": 9.857122574291997e-06, "loss": 0.5936, "step": 5725 }, { "epoch": 0.1687020314364421, "grad_norm": 1.5381241162408026, "learning_rate": 9.85700049449253e-06, "loss": 0.4153, "step": 5726 }, { "epoch": 0.16873149389390568, "grad_norm": 1.7825837673842717, "learning_rate": 9.85687836331719e-06, "loss": 0.4995, "step": 5727 }, { "epoch": 0.16876095635136926, "grad_norm": 1.6330582654001586, "learning_rate": 9.856756180767273e-06, "loss": 0.4963, "step": 5728 }, { "epoch": 0.16879041880883283, "grad_norm": 1.8415676475057878, "learning_rate": 9.856633946844067e-06, "loss": 0.4832, "step": 5729 }, { "epoch": 0.16881988126629643, "grad_norm": 1.638537220914519, "learning_rate": 9.856511661548869e-06, "loss": 0.6431, "step": 5730 }, { "epoch": 0.16884934372376, "grad_norm": 1.6312134964571867, "learning_rate": 9.85638932488297e-06, "loss": 0.5823, "step": 5731 }, { "epoch": 0.16887880618122358, "grad_norm": 1.712188040169881, "learning_rate": 9.856266936847665e-06, "loss": 0.5265, "step": 5732 }, { "epoch": 0.16890826863868716, "grad_norm": 1.786321236587258, "learning_rate": 9.856144497444248e-06, "loss": 0.4502, "step": 5733 }, { "epoch": 0.16893773109615073, "grad_norm": 1.790238145817456, "learning_rate": 9.856022006674013e-06, "loss": 0.4404, "step": 5734 }, { "epoch": 0.1689671935536143, "grad_norm": 1.732587070376687, "learning_rate": 9.85589946453826e-06, "loss": 0.5957, "step": 5735 }, { "epoch": 0.16899665601107788, "grad_norm": 1.6188900623805822, "learning_rate": 9.85577687103828e-06, "loss": 0.5109, "step": 5736 }, { "epoch": 0.16902611846854146, "grad_norm": 1.94350163788028, "learning_rate": 9.855654226175373e-06, "loss": 0.5767, "step": 5737 }, { "epoch": 0.16905558092600503, "grad_norm": 1.6275213395022816, "learning_rate": 9.855531529950836e-06, "loss": 0.5463, "step": 5738 }, { "epoch": 0.1690850433834686, "grad_norm": 1.711072687414729, "learning_rate": 9.855408782365967e-06, "loss": 0.5174, "step": 5739 }, { "epoch": 0.16911450584093218, "grad_norm": 1.5886076629033072, "learning_rate": 9.855285983422063e-06, "loss": 0.5112, "step": 5740 }, { "epoch": 0.16914396829839576, "grad_norm": 1.5976299294903697, "learning_rate": 9.855163133120422e-06, "loss": 0.5082, "step": 5741 }, { "epoch": 0.16917343075585933, "grad_norm": 1.7006642612052993, "learning_rate": 9.855040231462347e-06, "loss": 0.4447, "step": 5742 }, { "epoch": 0.16920289321332294, "grad_norm": 1.958755106585026, "learning_rate": 9.854917278449135e-06, "loss": 0.5649, "step": 5743 }, { "epoch": 0.1692323556707865, "grad_norm": 1.663147854623347, "learning_rate": 9.854794274082087e-06, "loss": 0.4454, "step": 5744 }, { "epoch": 0.16926181812825009, "grad_norm": 1.7231650892342265, "learning_rate": 9.854671218362507e-06, "loss": 0.4642, "step": 5745 }, { "epoch": 0.16929128058571366, "grad_norm": 1.7612014350951515, "learning_rate": 9.854548111291691e-06, "loss": 0.4354, "step": 5746 }, { "epoch": 0.16932074304317724, "grad_norm": 1.5388498493352445, "learning_rate": 9.854424952870946e-06, "loss": 0.4614, "step": 5747 }, { "epoch": 0.1693502055006408, "grad_norm": 1.6455722309285015, "learning_rate": 9.854301743101575e-06, "loss": 0.5789, "step": 5748 }, { "epoch": 0.16937966795810439, "grad_norm": 1.7482601875855373, "learning_rate": 9.854178481984877e-06, "loss": 0.5065, "step": 5749 }, { "epoch": 0.16940913041556796, "grad_norm": 1.8002973777072544, "learning_rate": 9.854055169522162e-06, "loss": 0.4315, "step": 5750 }, { "epoch": 0.16943859287303153, "grad_norm": 1.7656723199311704, "learning_rate": 9.853931805714726e-06, "loss": 0.7211, "step": 5751 }, { "epoch": 0.1694680553304951, "grad_norm": 1.7319638885245223, "learning_rate": 9.853808390563882e-06, "loss": 0.4966, "step": 5752 }, { "epoch": 0.16949751778795868, "grad_norm": 1.788250198042961, "learning_rate": 9.85368492407093e-06, "loss": 0.6049, "step": 5753 }, { "epoch": 0.16952698024542226, "grad_norm": 1.6673318053551844, "learning_rate": 9.85356140623718e-06, "loss": 0.5244, "step": 5754 }, { "epoch": 0.16955644270288583, "grad_norm": 1.5444018606912895, "learning_rate": 9.853437837063934e-06, "loss": 0.5405, "step": 5755 }, { "epoch": 0.16958590516034944, "grad_norm": 1.5610824320283723, "learning_rate": 9.853314216552505e-06, "loss": 0.5062, "step": 5756 }, { "epoch": 0.169615367617813, "grad_norm": 1.8551695269285022, "learning_rate": 9.853190544704196e-06, "loss": 0.4809, "step": 5757 }, { "epoch": 0.1696448300752766, "grad_norm": 1.4881488688205924, "learning_rate": 9.853066821520316e-06, "loss": 0.4933, "step": 5758 }, { "epoch": 0.16967429253274016, "grad_norm": 1.8320881599196708, "learning_rate": 9.852943047002174e-06, "loss": 0.496, "step": 5759 }, { "epoch": 0.16970375499020374, "grad_norm": 1.7686167468591107, "learning_rate": 9.85281922115108e-06, "loss": 0.6248, "step": 5760 }, { "epoch": 0.1697332174476673, "grad_norm": 1.9096321355761674, "learning_rate": 9.852695343968344e-06, "loss": 0.6633, "step": 5761 }, { "epoch": 0.1697626799051309, "grad_norm": 1.5663238741741878, "learning_rate": 9.852571415455275e-06, "loss": 0.442, "step": 5762 }, { "epoch": 0.16979214236259446, "grad_norm": 1.814418184096727, "learning_rate": 9.852447435613184e-06, "loss": 0.4979, "step": 5763 }, { "epoch": 0.16982160482005804, "grad_norm": 1.7484891543817944, "learning_rate": 9.852323404443382e-06, "loss": 0.5532, "step": 5764 }, { "epoch": 0.1698510672775216, "grad_norm": 1.5674435915745462, "learning_rate": 9.852199321947182e-06, "loss": 0.5413, "step": 5765 }, { "epoch": 0.1698805297349852, "grad_norm": 1.6509327575547996, "learning_rate": 9.852075188125897e-06, "loss": 0.5177, "step": 5766 }, { "epoch": 0.16990999219244876, "grad_norm": 1.7518945063746494, "learning_rate": 9.85195100298084e-06, "loss": 0.665, "step": 5767 }, { "epoch": 0.16993945464991234, "grad_norm": 1.916396125199261, "learning_rate": 9.851826766513323e-06, "loss": 0.5602, "step": 5768 }, { "epoch": 0.16996891710737594, "grad_norm": 1.612626960154371, "learning_rate": 9.851702478724663e-06, "loss": 0.3821, "step": 5769 }, { "epoch": 0.1699983795648395, "grad_norm": 1.6104555833010883, "learning_rate": 9.85157813961617e-06, "loss": 0.4762, "step": 5770 }, { "epoch": 0.1700278420223031, "grad_norm": 1.706485811556243, "learning_rate": 9.851453749189163e-06, "loss": 0.4879, "step": 5771 }, { "epoch": 0.17005730447976666, "grad_norm": 1.572099012797493, "learning_rate": 9.851329307444958e-06, "loss": 0.4814, "step": 5772 }, { "epoch": 0.17008676693723024, "grad_norm": 1.615594434532865, "learning_rate": 9.85120481438487e-06, "loss": 0.6363, "step": 5773 }, { "epoch": 0.1701162293946938, "grad_norm": 1.7138864915202983, "learning_rate": 9.851080270010214e-06, "loss": 0.6509, "step": 5774 }, { "epoch": 0.1701456918521574, "grad_norm": 1.6072470622936053, "learning_rate": 9.85095567432231e-06, "loss": 0.3524, "step": 5775 }, { "epoch": 0.17017515430962096, "grad_norm": 1.7155986276836301, "learning_rate": 9.850831027322476e-06, "loss": 0.4786, "step": 5776 }, { "epoch": 0.17020461676708454, "grad_norm": 1.7491574210723169, "learning_rate": 9.85070632901203e-06, "loss": 0.4889, "step": 5777 }, { "epoch": 0.1702340792245481, "grad_norm": 1.5028625685145376, "learning_rate": 9.85058157939229e-06, "loss": 0.36, "step": 5778 }, { "epoch": 0.1702635416820117, "grad_norm": 1.4792396147223628, "learning_rate": 9.850456778464576e-06, "loss": 0.4196, "step": 5779 }, { "epoch": 0.17029300413947526, "grad_norm": 1.7151984791172687, "learning_rate": 9.850331926230209e-06, "loss": 0.5517, "step": 5780 }, { "epoch": 0.17032246659693884, "grad_norm": 1.6825263246433364, "learning_rate": 9.850207022690508e-06, "loss": 0.5361, "step": 5781 }, { "epoch": 0.17035192905440244, "grad_norm": 1.6615920878649593, "learning_rate": 9.850082067846799e-06, "loss": 0.4676, "step": 5782 }, { "epoch": 0.17038139151186601, "grad_norm": 1.6136624410961664, "learning_rate": 9.849957061700395e-06, "loss": 0.5319, "step": 5783 }, { "epoch": 0.1704108539693296, "grad_norm": 1.735826874499101, "learning_rate": 9.849832004252626e-06, "loss": 0.4415, "step": 5784 }, { "epoch": 0.17044031642679316, "grad_norm": 1.512528609749509, "learning_rate": 9.849706895504812e-06, "loss": 0.437, "step": 5785 }, { "epoch": 0.17046977888425674, "grad_norm": 1.6258238238154048, "learning_rate": 9.849581735458275e-06, "loss": 0.5008, "step": 5786 }, { "epoch": 0.17049924134172031, "grad_norm": 1.750661333462407, "learning_rate": 9.849456524114342e-06, "loss": 0.5291, "step": 5787 }, { "epoch": 0.1705287037991839, "grad_norm": 1.7411674321191195, "learning_rate": 9.849331261474335e-06, "loss": 0.5481, "step": 5788 }, { "epoch": 0.17055816625664746, "grad_norm": 1.603241576082196, "learning_rate": 9.849205947539579e-06, "loss": 0.3752, "step": 5789 }, { "epoch": 0.17058762871411104, "grad_norm": 1.8701565891244063, "learning_rate": 9.849080582311403e-06, "loss": 0.6257, "step": 5790 }, { "epoch": 0.17061709117157461, "grad_norm": 1.7915866775946458, "learning_rate": 9.848955165791128e-06, "loss": 0.5541, "step": 5791 }, { "epoch": 0.1706465536290382, "grad_norm": 1.6168611221641251, "learning_rate": 9.848829697980084e-06, "loss": 0.6244, "step": 5792 }, { "epoch": 0.17067601608650176, "grad_norm": 1.621782947552772, "learning_rate": 9.848704178879596e-06, "loss": 0.4975, "step": 5793 }, { "epoch": 0.17070547854396534, "grad_norm": 1.5495109350350749, "learning_rate": 9.848578608490996e-06, "loss": 0.4596, "step": 5794 }, { "epoch": 0.17073494100142894, "grad_norm": 1.5604798079103372, "learning_rate": 9.848452986815606e-06, "loss": 0.4595, "step": 5795 }, { "epoch": 0.17076440345889252, "grad_norm": 1.590767269806424, "learning_rate": 9.84832731385476e-06, "loss": 0.5589, "step": 5796 }, { "epoch": 0.1707938659163561, "grad_norm": 1.544746297938516, "learning_rate": 9.848201589609783e-06, "loss": 0.4586, "step": 5797 }, { "epoch": 0.17082332837381967, "grad_norm": 1.5663394614422, "learning_rate": 9.848075814082008e-06, "loss": 0.5069, "step": 5798 }, { "epoch": 0.17085279083128324, "grad_norm": 1.8554747823604798, "learning_rate": 9.847949987272765e-06, "loss": 0.7355, "step": 5799 }, { "epoch": 0.17088225328874682, "grad_norm": 1.702654018214681, "learning_rate": 9.847824109183384e-06, "loss": 0.5393, "step": 5800 }, { "epoch": 0.1709117157462104, "grad_norm": 2.0250169042725177, "learning_rate": 9.847698179815196e-06, "loss": 0.6456, "step": 5801 }, { "epoch": 0.17094117820367397, "grad_norm": 1.830281438053569, "learning_rate": 9.847572199169537e-06, "loss": 0.5597, "step": 5802 }, { "epoch": 0.17097064066113754, "grad_norm": 1.92858216757905, "learning_rate": 9.847446167247734e-06, "loss": 0.7316, "step": 5803 }, { "epoch": 0.17100010311860112, "grad_norm": 1.9439873990470131, "learning_rate": 9.847320084051125e-06, "loss": 0.6047, "step": 5804 }, { "epoch": 0.1710295655760647, "grad_norm": 1.7594870506110358, "learning_rate": 9.84719394958104e-06, "loss": 0.5242, "step": 5805 }, { "epoch": 0.17105902803352827, "grad_norm": 1.65659568465892, "learning_rate": 9.847067763838814e-06, "loss": 0.3735, "step": 5806 }, { "epoch": 0.17108849049099184, "grad_norm": 1.7151006742945474, "learning_rate": 9.846941526825783e-06, "loss": 0.5483, "step": 5807 }, { "epoch": 0.17111795294845544, "grad_norm": 1.806742366715117, "learning_rate": 9.846815238543282e-06, "loss": 0.4748, "step": 5808 }, { "epoch": 0.17114741540591902, "grad_norm": 1.587974263244384, "learning_rate": 9.846688898992648e-06, "loss": 0.4612, "step": 5809 }, { "epoch": 0.1711768778633826, "grad_norm": 1.7085473553247206, "learning_rate": 9.846562508175212e-06, "loss": 0.6345, "step": 5810 }, { "epoch": 0.17120634032084617, "grad_norm": 1.6523552114060263, "learning_rate": 9.846436066092317e-06, "loss": 0.4695, "step": 5811 }, { "epoch": 0.17123580277830974, "grad_norm": 1.6347518723519847, "learning_rate": 9.846309572745299e-06, "loss": 0.5663, "step": 5812 }, { "epoch": 0.17126526523577332, "grad_norm": 1.7110722922437198, "learning_rate": 9.846183028135494e-06, "loss": 0.4496, "step": 5813 }, { "epoch": 0.1712947276932369, "grad_norm": 1.645569808771757, "learning_rate": 9.846056432264244e-06, "loss": 0.5235, "step": 5814 }, { "epoch": 0.17132419015070047, "grad_norm": 1.766672084853239, "learning_rate": 9.845929785132883e-06, "loss": 0.6404, "step": 5815 }, { "epoch": 0.17135365260816404, "grad_norm": 1.4268370629104685, "learning_rate": 9.845803086742755e-06, "loss": 0.4782, "step": 5816 }, { "epoch": 0.17138311506562762, "grad_norm": 1.7266623558085143, "learning_rate": 9.845676337095199e-06, "loss": 0.5499, "step": 5817 }, { "epoch": 0.1714125775230912, "grad_norm": 1.8333346727361788, "learning_rate": 9.845549536191556e-06, "loss": 0.5834, "step": 5818 }, { "epoch": 0.17144203998055477, "grad_norm": 1.453419220690492, "learning_rate": 9.845422684033165e-06, "loss": 0.4431, "step": 5819 }, { "epoch": 0.17147150243801834, "grad_norm": 1.5745830761394788, "learning_rate": 9.845295780621371e-06, "loss": 0.5005, "step": 5820 }, { "epoch": 0.17150096489548194, "grad_norm": 1.6035878501219258, "learning_rate": 9.845168825957513e-06, "loss": 0.5668, "step": 5821 }, { "epoch": 0.17153042735294552, "grad_norm": 1.6957717823829328, "learning_rate": 9.845041820042939e-06, "loss": 0.5285, "step": 5822 }, { "epoch": 0.1715598898104091, "grad_norm": 1.598818277498341, "learning_rate": 9.844914762878987e-06, "loss": 0.5825, "step": 5823 }, { "epoch": 0.17158935226787267, "grad_norm": 1.8235735577537842, "learning_rate": 9.844787654467004e-06, "loss": 0.5498, "step": 5824 }, { "epoch": 0.17161881472533624, "grad_norm": 1.8005898110514778, "learning_rate": 9.844660494808333e-06, "loss": 0.6513, "step": 5825 }, { "epoch": 0.17164827718279982, "grad_norm": 1.6183223604855959, "learning_rate": 9.844533283904321e-06, "loss": 0.4597, "step": 5826 }, { "epoch": 0.1716777396402634, "grad_norm": 1.581518036646695, "learning_rate": 9.84440602175631e-06, "loss": 0.4457, "step": 5827 }, { "epoch": 0.17170720209772697, "grad_norm": 1.8203744543804254, "learning_rate": 9.84427870836565e-06, "loss": 0.4777, "step": 5828 }, { "epoch": 0.17173666455519054, "grad_norm": 1.6218831698452894, "learning_rate": 9.844151343733685e-06, "loss": 0.5398, "step": 5829 }, { "epoch": 0.17176612701265412, "grad_norm": 1.5517922391512466, "learning_rate": 9.844023927861765e-06, "loss": 0.4723, "step": 5830 }, { "epoch": 0.1717955894701177, "grad_norm": 1.7492900113978782, "learning_rate": 9.843896460751237e-06, "loss": 0.5129, "step": 5831 }, { "epoch": 0.17182505192758127, "grad_norm": 1.8300086423870792, "learning_rate": 9.843768942403446e-06, "loss": 0.5389, "step": 5832 }, { "epoch": 0.17185451438504484, "grad_norm": 1.752059998363607, "learning_rate": 9.843641372819743e-06, "loss": 0.5302, "step": 5833 }, { "epoch": 0.17188397684250845, "grad_norm": 1.6304677222139723, "learning_rate": 9.84351375200148e-06, "loss": 0.5821, "step": 5834 }, { "epoch": 0.17191343929997202, "grad_norm": 1.5497889126369742, "learning_rate": 9.843386079950003e-06, "loss": 0.501, "step": 5835 }, { "epoch": 0.1719429017574356, "grad_norm": 1.8238033403872607, "learning_rate": 9.843258356666664e-06, "loss": 0.7035, "step": 5836 }, { "epoch": 0.17197236421489917, "grad_norm": 1.5902948132538923, "learning_rate": 9.843130582152817e-06, "loss": 0.45, "step": 5837 }, { "epoch": 0.17200182667236275, "grad_norm": 1.5868342556464325, "learning_rate": 9.843002756409808e-06, "loss": 0.4342, "step": 5838 }, { "epoch": 0.17203128912982632, "grad_norm": 1.570731514608012, "learning_rate": 9.842874879438992e-06, "loss": 0.6064, "step": 5839 }, { "epoch": 0.1720607515872899, "grad_norm": 1.6325234541488147, "learning_rate": 9.842746951241722e-06, "loss": 0.4692, "step": 5840 }, { "epoch": 0.17209021404475347, "grad_norm": 1.6365340660411443, "learning_rate": 9.842618971819351e-06, "loss": 0.5493, "step": 5841 }, { "epoch": 0.17211967650221704, "grad_norm": 1.7633736086443923, "learning_rate": 9.842490941173231e-06, "loss": 0.715, "step": 5842 }, { "epoch": 0.17214913895968062, "grad_norm": 1.8266371845106912, "learning_rate": 9.84236285930472e-06, "loss": 0.5981, "step": 5843 }, { "epoch": 0.1721786014171442, "grad_norm": 1.5963474436247547, "learning_rate": 9.842234726215168e-06, "loss": 0.4728, "step": 5844 }, { "epoch": 0.17220806387460777, "grad_norm": 1.6791108292247632, "learning_rate": 9.842106541905933e-06, "loss": 0.437, "step": 5845 }, { "epoch": 0.17223752633207134, "grad_norm": 1.7512547693943932, "learning_rate": 9.841978306378373e-06, "loss": 0.5911, "step": 5846 }, { "epoch": 0.17226698878953495, "grad_norm": 1.6664607452101199, "learning_rate": 9.841850019633839e-06, "loss": 0.5534, "step": 5847 }, { "epoch": 0.17229645124699852, "grad_norm": 1.614302298720393, "learning_rate": 9.841721681673693e-06, "loss": 0.3654, "step": 5848 }, { "epoch": 0.1723259137044621, "grad_norm": 1.8094345836801282, "learning_rate": 9.84159329249929e-06, "loss": 0.62, "step": 5849 }, { "epoch": 0.17235537616192567, "grad_norm": 1.6180295199005552, "learning_rate": 9.841464852111988e-06, "loss": 0.4063, "step": 5850 }, { "epoch": 0.17238483861938925, "grad_norm": 1.5972756210878203, "learning_rate": 9.841336360513147e-06, "loss": 0.4617, "step": 5851 }, { "epoch": 0.17241430107685282, "grad_norm": 1.7641026180957542, "learning_rate": 9.841207817704126e-06, "loss": 0.644, "step": 5852 }, { "epoch": 0.1724437635343164, "grad_norm": 1.8366590239604887, "learning_rate": 9.841079223686284e-06, "loss": 0.5368, "step": 5853 }, { "epoch": 0.17247322599177997, "grad_norm": 1.4267631071839504, "learning_rate": 9.840950578460981e-06, "loss": 0.4951, "step": 5854 }, { "epoch": 0.17250268844924355, "grad_norm": 1.9163643160993107, "learning_rate": 9.840821882029579e-06, "loss": 0.6148, "step": 5855 }, { "epoch": 0.17253215090670712, "grad_norm": 1.6143631640206542, "learning_rate": 9.840693134393437e-06, "loss": 0.6094, "step": 5856 }, { "epoch": 0.1725616133641707, "grad_norm": 1.5388895665303808, "learning_rate": 9.840564335553918e-06, "loss": 0.4151, "step": 5857 }, { "epoch": 0.17259107582163427, "grad_norm": 1.7282773016548538, "learning_rate": 9.840435485512384e-06, "loss": 0.6305, "step": 5858 }, { "epoch": 0.17262053827909785, "grad_norm": 1.633261679163389, "learning_rate": 9.840306584270202e-06, "loss": 0.6311, "step": 5859 }, { "epoch": 0.17265000073656145, "grad_norm": 1.5855237164663376, "learning_rate": 9.84017763182873e-06, "loss": 0.4661, "step": 5860 }, { "epoch": 0.17267946319402502, "grad_norm": 1.82874166984729, "learning_rate": 9.840048628189334e-06, "loss": 0.7081, "step": 5861 }, { "epoch": 0.1727089256514886, "grad_norm": 1.9468617528785959, "learning_rate": 9.839919573353379e-06, "loss": 0.4998, "step": 5862 }, { "epoch": 0.17273838810895217, "grad_norm": 1.634971978175623, "learning_rate": 9.83979046732223e-06, "loss": 0.589, "step": 5863 }, { "epoch": 0.17276785056641575, "grad_norm": 1.9188345987663606, "learning_rate": 9.839661310097251e-06, "loss": 0.671, "step": 5864 }, { "epoch": 0.17279731302387932, "grad_norm": 1.5972193382525577, "learning_rate": 9.839532101679812e-06, "loss": 0.5754, "step": 5865 }, { "epoch": 0.1728267754813429, "grad_norm": 1.6426481007396148, "learning_rate": 9.839402842071275e-06, "loss": 0.4592, "step": 5866 }, { "epoch": 0.17285623793880647, "grad_norm": 1.7594194723683807, "learning_rate": 9.83927353127301e-06, "loss": 0.542, "step": 5867 }, { "epoch": 0.17288570039627005, "grad_norm": 1.7031351027694355, "learning_rate": 9.839144169286386e-06, "loss": 0.5044, "step": 5868 }, { "epoch": 0.17291516285373362, "grad_norm": 1.4982790423584917, "learning_rate": 9.839014756112768e-06, "loss": 0.4649, "step": 5869 }, { "epoch": 0.1729446253111972, "grad_norm": 1.651137410666226, "learning_rate": 9.838885291753528e-06, "loss": 0.5609, "step": 5870 }, { "epoch": 0.17297408776866077, "grad_norm": 1.7254625053032837, "learning_rate": 9.838755776210034e-06, "loss": 0.4418, "step": 5871 }, { "epoch": 0.17300355022612435, "grad_norm": 2.0417944281108085, "learning_rate": 9.838626209483656e-06, "loss": 0.5405, "step": 5872 }, { "epoch": 0.17303301268358795, "grad_norm": 1.6854392764617856, "learning_rate": 9.838496591575764e-06, "loss": 0.5808, "step": 5873 }, { "epoch": 0.17306247514105152, "grad_norm": 1.6806070015285577, "learning_rate": 9.838366922487729e-06, "loss": 0.5833, "step": 5874 }, { "epoch": 0.1730919375985151, "grad_norm": 1.6918623232800212, "learning_rate": 9.838237202220923e-06, "loss": 0.5937, "step": 5875 }, { "epoch": 0.17312140005597867, "grad_norm": 1.5871809736300644, "learning_rate": 9.83810743077672e-06, "loss": 0.4118, "step": 5876 }, { "epoch": 0.17315086251344225, "grad_norm": 1.5484107244299692, "learning_rate": 9.83797760815649e-06, "loss": 0.4447, "step": 5877 }, { "epoch": 0.17318032497090582, "grad_norm": 1.5940871017672693, "learning_rate": 9.837847734361607e-06, "loss": 0.4405, "step": 5878 }, { "epoch": 0.1732097874283694, "grad_norm": 1.4787206107098538, "learning_rate": 9.837717809393446e-06, "loss": 0.4119, "step": 5879 }, { "epoch": 0.17323924988583297, "grad_norm": 1.84285299455621, "learning_rate": 9.837587833253381e-06, "loss": 0.522, "step": 5880 }, { "epoch": 0.17326871234329655, "grad_norm": 1.5913633564083023, "learning_rate": 9.837457805942785e-06, "loss": 0.6638, "step": 5881 }, { "epoch": 0.17329817480076012, "grad_norm": 1.9109490332649914, "learning_rate": 9.837327727463036e-06, "loss": 0.5157, "step": 5882 }, { "epoch": 0.1733276372582237, "grad_norm": 1.7628264217480754, "learning_rate": 9.837197597815506e-06, "loss": 0.5169, "step": 5883 }, { "epoch": 0.17335709971568727, "grad_norm": 1.6420839251945092, "learning_rate": 9.837067417001576e-06, "loss": 0.6072, "step": 5884 }, { "epoch": 0.17338656217315085, "grad_norm": 1.6242144191613876, "learning_rate": 9.83693718502262e-06, "loss": 0.4861, "step": 5885 }, { "epoch": 0.17341602463061445, "grad_norm": 1.6153828944920272, "learning_rate": 9.836806901880018e-06, "loss": 0.4647, "step": 5886 }, { "epoch": 0.17344548708807803, "grad_norm": 1.8242099711118145, "learning_rate": 9.836676567575146e-06, "loss": 0.4038, "step": 5887 }, { "epoch": 0.1734749495455416, "grad_norm": 1.6457189430105168, "learning_rate": 9.83654618210938e-06, "loss": 0.5956, "step": 5888 }, { "epoch": 0.17350441200300518, "grad_norm": 1.75498890065781, "learning_rate": 9.836415745484108e-06, "loss": 0.494, "step": 5889 }, { "epoch": 0.17353387446046875, "grad_norm": 1.5733405338414619, "learning_rate": 9.8362852577007e-06, "loss": 0.4458, "step": 5890 }, { "epoch": 0.17356333691793233, "grad_norm": 1.561055642471373, "learning_rate": 9.836154718760543e-06, "loss": 0.4474, "step": 5891 }, { "epoch": 0.1735927993753959, "grad_norm": 1.9374465740771956, "learning_rate": 9.836024128665014e-06, "loss": 0.5438, "step": 5892 }, { "epoch": 0.17362226183285948, "grad_norm": 1.6017567212114665, "learning_rate": 9.835893487415495e-06, "loss": 0.3605, "step": 5893 }, { "epoch": 0.17365172429032305, "grad_norm": 1.6726299766820831, "learning_rate": 9.83576279501337e-06, "loss": 0.4935, "step": 5894 }, { "epoch": 0.17368118674778663, "grad_norm": 1.7513412561009585, "learning_rate": 9.83563205146002e-06, "loss": 0.4106, "step": 5895 }, { "epoch": 0.1737106492052502, "grad_norm": 1.877010899381854, "learning_rate": 9.835501256756827e-06, "loss": 0.6803, "step": 5896 }, { "epoch": 0.17374011166271378, "grad_norm": 1.6386885047272992, "learning_rate": 9.835370410905175e-06, "loss": 0.5522, "step": 5897 }, { "epoch": 0.17376957412017735, "grad_norm": 1.7529659432661995, "learning_rate": 9.83523951390645e-06, "loss": 0.5407, "step": 5898 }, { "epoch": 0.17379903657764095, "grad_norm": 1.5058304800576554, "learning_rate": 9.835108565762032e-06, "loss": 0.4894, "step": 5899 }, { "epoch": 0.17382849903510453, "grad_norm": 1.620375287309235, "learning_rate": 9.834977566473311e-06, "loss": 0.5783, "step": 5900 }, { "epoch": 0.1738579614925681, "grad_norm": 1.7854213803667733, "learning_rate": 9.834846516041672e-06, "loss": 0.5448, "step": 5901 }, { "epoch": 0.17388742395003168, "grad_norm": 1.9777041996544245, "learning_rate": 9.834715414468498e-06, "loss": 0.4796, "step": 5902 }, { "epoch": 0.17391688640749525, "grad_norm": 2.0234408629822096, "learning_rate": 9.834584261755178e-06, "loss": 0.7064, "step": 5903 }, { "epoch": 0.17394634886495883, "grad_norm": 2.0101630991297257, "learning_rate": 9.834453057903101e-06, "loss": 0.686, "step": 5904 }, { "epoch": 0.1739758113224224, "grad_norm": 1.6161187427544865, "learning_rate": 9.834321802913651e-06, "loss": 0.3892, "step": 5905 }, { "epoch": 0.17400527377988598, "grad_norm": 1.6667909902198117, "learning_rate": 9.83419049678822e-06, "loss": 0.5492, "step": 5906 }, { "epoch": 0.17403473623734955, "grad_norm": 1.641961388210742, "learning_rate": 9.834059139528194e-06, "loss": 0.6272, "step": 5907 }, { "epoch": 0.17406419869481313, "grad_norm": 1.6420103551459428, "learning_rate": 9.833927731134964e-06, "loss": 0.4631, "step": 5908 }, { "epoch": 0.1740936611522767, "grad_norm": 1.794045519182459, "learning_rate": 9.833796271609918e-06, "loss": 0.4955, "step": 5909 }, { "epoch": 0.17412312360974028, "grad_norm": 1.682801219237559, "learning_rate": 9.83366476095445e-06, "loss": 0.4564, "step": 5910 }, { "epoch": 0.17415258606720385, "grad_norm": 1.6977669312263688, "learning_rate": 9.833533199169948e-06, "loss": 0.4426, "step": 5911 }, { "epoch": 0.17418204852466745, "grad_norm": 1.8335423183364759, "learning_rate": 9.833401586257807e-06, "loss": 0.6659, "step": 5912 }, { "epoch": 0.17421151098213103, "grad_norm": 1.5799812423824466, "learning_rate": 9.833269922219415e-06, "loss": 0.4216, "step": 5913 }, { "epoch": 0.1742409734395946, "grad_norm": 1.6306278575611346, "learning_rate": 9.833138207056167e-06, "loss": 0.4468, "step": 5914 }, { "epoch": 0.17427043589705818, "grad_norm": 1.6742100464345844, "learning_rate": 9.833006440769458e-06, "loss": 0.452, "step": 5915 }, { "epoch": 0.17429989835452175, "grad_norm": 1.5660541219410657, "learning_rate": 9.832874623360678e-06, "loss": 0.4206, "step": 5916 }, { "epoch": 0.17432936081198533, "grad_norm": 1.6121059118942946, "learning_rate": 9.832742754831224e-06, "loss": 0.6325, "step": 5917 }, { "epoch": 0.1743588232694489, "grad_norm": 1.7642026616848459, "learning_rate": 9.832610835182487e-06, "loss": 0.5685, "step": 5918 }, { "epoch": 0.17438828572691248, "grad_norm": 1.7081176623939962, "learning_rate": 9.832478864415868e-06, "loss": 0.6452, "step": 5919 }, { "epoch": 0.17441774818437605, "grad_norm": 1.5945671216491757, "learning_rate": 9.832346842532761e-06, "loss": 0.4786, "step": 5920 }, { "epoch": 0.17444721064183963, "grad_norm": 1.7948995417863507, "learning_rate": 9.832214769534561e-06, "loss": 0.5406, "step": 5921 }, { "epoch": 0.1744766730993032, "grad_norm": 1.68816329569226, "learning_rate": 9.832082645422666e-06, "loss": 0.5448, "step": 5922 }, { "epoch": 0.17450613555676678, "grad_norm": 1.5803669318124316, "learning_rate": 9.831950470198472e-06, "loss": 0.494, "step": 5923 }, { "epoch": 0.17453559801423035, "grad_norm": 1.7208221806653303, "learning_rate": 9.83181824386338e-06, "loss": 0.7181, "step": 5924 }, { "epoch": 0.17456506047169396, "grad_norm": 1.8115076634361826, "learning_rate": 9.831685966418787e-06, "loss": 0.5947, "step": 5925 }, { "epoch": 0.17459452292915753, "grad_norm": 1.4654316457395704, "learning_rate": 9.831553637866093e-06, "loss": 0.3899, "step": 5926 }, { "epoch": 0.1746239853866211, "grad_norm": 1.5878141141231155, "learning_rate": 9.831421258206694e-06, "loss": 0.3583, "step": 5927 }, { "epoch": 0.17465344784408468, "grad_norm": 1.5398068490057495, "learning_rate": 9.831288827441997e-06, "loss": 0.5352, "step": 5928 }, { "epoch": 0.17468291030154826, "grad_norm": 1.5573842749560165, "learning_rate": 9.831156345573398e-06, "loss": 0.5439, "step": 5929 }, { "epoch": 0.17471237275901183, "grad_norm": 1.596569047314751, "learning_rate": 9.831023812602299e-06, "loss": 0.5558, "step": 5930 }, { "epoch": 0.1747418352164754, "grad_norm": 1.956586592017866, "learning_rate": 9.830891228530102e-06, "loss": 0.5622, "step": 5931 }, { "epoch": 0.17477129767393898, "grad_norm": 1.8342197844557144, "learning_rate": 9.830758593358212e-06, "loss": 0.5545, "step": 5932 }, { "epoch": 0.17480076013140255, "grad_norm": 1.6785006006727907, "learning_rate": 9.830625907088027e-06, "loss": 0.518, "step": 5933 }, { "epoch": 0.17483022258886613, "grad_norm": 1.9001102632792986, "learning_rate": 9.830493169720955e-06, "loss": 0.5017, "step": 5934 }, { "epoch": 0.1748596850463297, "grad_norm": 2.0868604551176415, "learning_rate": 9.830360381258399e-06, "loss": 0.5407, "step": 5935 }, { "epoch": 0.17488914750379328, "grad_norm": 1.528069948215494, "learning_rate": 9.830227541701761e-06, "loss": 0.4528, "step": 5936 }, { "epoch": 0.17491860996125685, "grad_norm": 1.8298771924354638, "learning_rate": 9.830094651052449e-06, "loss": 0.5934, "step": 5937 }, { "epoch": 0.17494807241872046, "grad_norm": 1.6920485731600499, "learning_rate": 9.829961709311869e-06, "loss": 0.3843, "step": 5938 }, { "epoch": 0.17497753487618403, "grad_norm": 1.5948184880626697, "learning_rate": 9.829828716481425e-06, "loss": 0.4262, "step": 5939 }, { "epoch": 0.1750069973336476, "grad_norm": 1.7820435419879825, "learning_rate": 9.829695672562524e-06, "loss": 0.5175, "step": 5940 }, { "epoch": 0.17503645979111118, "grad_norm": 1.7502551222624574, "learning_rate": 9.829562577556575e-06, "loss": 0.42, "step": 5941 }, { "epoch": 0.17506592224857476, "grad_norm": 1.5711920336312093, "learning_rate": 9.829429431464984e-06, "loss": 0.3408, "step": 5942 }, { "epoch": 0.17509538470603833, "grad_norm": 1.6454445192941904, "learning_rate": 9.82929623428916e-06, "loss": 0.4996, "step": 5943 }, { "epoch": 0.1751248471635019, "grad_norm": 1.8960969202522235, "learning_rate": 9.829162986030514e-06, "loss": 0.5708, "step": 5944 }, { "epoch": 0.17515430962096548, "grad_norm": 1.4883647651621907, "learning_rate": 9.829029686690454e-06, "loss": 0.2711, "step": 5945 }, { "epoch": 0.17518377207842906, "grad_norm": 1.6787347541389435, "learning_rate": 9.828896336270388e-06, "loss": 0.5214, "step": 5946 }, { "epoch": 0.17521323453589263, "grad_norm": 1.6752035156507994, "learning_rate": 9.82876293477173e-06, "loss": 0.4703, "step": 5947 }, { "epoch": 0.1752426969933562, "grad_norm": 1.5781771897116899, "learning_rate": 9.828629482195886e-06, "loss": 0.4777, "step": 5948 }, { "epoch": 0.17527215945081978, "grad_norm": 1.6562453047233963, "learning_rate": 9.828495978544274e-06, "loss": 0.6196, "step": 5949 }, { "epoch": 0.17530162190828338, "grad_norm": 1.6344523898764165, "learning_rate": 9.828362423818302e-06, "loss": 0.5401, "step": 5950 }, { "epoch": 0.17533108436574696, "grad_norm": 1.6548095481948655, "learning_rate": 9.828228818019384e-06, "loss": 0.6141, "step": 5951 }, { "epoch": 0.17536054682321053, "grad_norm": 1.5748361339180812, "learning_rate": 9.828095161148931e-06, "loss": 0.5618, "step": 5952 }, { "epoch": 0.1753900092806741, "grad_norm": 1.622188416711994, "learning_rate": 9.827961453208361e-06, "loss": 0.6313, "step": 5953 }, { "epoch": 0.17541947173813768, "grad_norm": 1.6083721971520926, "learning_rate": 9.827827694199086e-06, "loss": 0.6404, "step": 5954 }, { "epoch": 0.17544893419560126, "grad_norm": 1.5835621241709317, "learning_rate": 9.827693884122521e-06, "loss": 0.6032, "step": 5955 }, { "epoch": 0.17547839665306483, "grad_norm": 1.486557362387591, "learning_rate": 9.827560022980081e-06, "loss": 0.3458, "step": 5956 }, { "epoch": 0.1755078591105284, "grad_norm": 1.428822349599486, "learning_rate": 9.827426110773183e-06, "loss": 0.3697, "step": 5957 }, { "epoch": 0.17553732156799198, "grad_norm": 1.609467429630444, "learning_rate": 9.827292147503242e-06, "loss": 0.5976, "step": 5958 }, { "epoch": 0.17556678402545556, "grad_norm": 1.5258019355160861, "learning_rate": 9.827158133171675e-06, "loss": 0.566, "step": 5959 }, { "epoch": 0.17559624648291913, "grad_norm": 1.7337505763226648, "learning_rate": 9.827024067779902e-06, "loss": 0.6845, "step": 5960 }, { "epoch": 0.1756257089403827, "grad_norm": 1.6096882926988818, "learning_rate": 9.82688995132934e-06, "loss": 0.6241, "step": 5961 }, { "epoch": 0.17565517139784628, "grad_norm": 1.7793574549445792, "learning_rate": 9.826755783821405e-06, "loss": 0.4961, "step": 5962 }, { "epoch": 0.17568463385530989, "grad_norm": 1.6853670861851144, "learning_rate": 9.82662156525752e-06, "loss": 0.6199, "step": 5963 }, { "epoch": 0.17571409631277346, "grad_norm": 1.6248847600299827, "learning_rate": 9.826487295639102e-06, "loss": 0.518, "step": 5964 }, { "epoch": 0.17574355877023703, "grad_norm": 1.4488906970010151, "learning_rate": 9.826352974967572e-06, "loss": 0.4044, "step": 5965 }, { "epoch": 0.1757730212277006, "grad_norm": 1.6667757272836385, "learning_rate": 9.826218603244353e-06, "loss": 0.4417, "step": 5966 }, { "epoch": 0.17580248368516418, "grad_norm": 1.6264640370149104, "learning_rate": 9.826084180470863e-06, "loss": 0.6179, "step": 5967 }, { "epoch": 0.17583194614262776, "grad_norm": 1.7141302757277708, "learning_rate": 9.825949706648525e-06, "loss": 0.5072, "step": 5968 }, { "epoch": 0.17586140860009133, "grad_norm": 1.6418682576117551, "learning_rate": 9.825815181778764e-06, "loss": 0.3981, "step": 5969 }, { "epoch": 0.1758908710575549, "grad_norm": 1.7657305106689118, "learning_rate": 9.825680605863e-06, "loss": 0.594, "step": 5970 }, { "epoch": 0.17592033351501848, "grad_norm": 1.6023707963532454, "learning_rate": 9.825545978902656e-06, "loss": 0.6005, "step": 5971 }, { "epoch": 0.17594979597248206, "grad_norm": 2.1170087449616575, "learning_rate": 9.825411300899157e-06, "loss": 0.8127, "step": 5972 }, { "epoch": 0.17597925842994563, "grad_norm": 1.7258807756616608, "learning_rate": 9.825276571853928e-06, "loss": 0.4169, "step": 5973 }, { "epoch": 0.1760087208874092, "grad_norm": 1.5219023482019363, "learning_rate": 9.825141791768395e-06, "loss": 0.4128, "step": 5974 }, { "epoch": 0.17603818334487278, "grad_norm": 1.718606801124011, "learning_rate": 9.825006960643982e-06, "loss": 0.5211, "step": 5975 }, { "epoch": 0.1760676458023364, "grad_norm": 1.6620308569430249, "learning_rate": 9.824872078482115e-06, "loss": 0.4281, "step": 5976 }, { "epoch": 0.17609710825979996, "grad_norm": 1.6434533150431196, "learning_rate": 9.824737145284222e-06, "loss": 0.526, "step": 5977 }, { "epoch": 0.17612657071726354, "grad_norm": 1.7012582511487804, "learning_rate": 9.82460216105173e-06, "loss": 0.532, "step": 5978 }, { "epoch": 0.1761560331747271, "grad_norm": 1.6697587863528522, "learning_rate": 9.824467125786068e-06, "loss": 0.5718, "step": 5979 }, { "epoch": 0.1761854956321907, "grad_norm": 1.580003101509267, "learning_rate": 9.824332039488662e-06, "loss": 0.5017, "step": 5980 }, { "epoch": 0.17621495808965426, "grad_norm": 1.8299307757146315, "learning_rate": 9.824196902160942e-06, "loss": 0.4622, "step": 5981 }, { "epoch": 0.17624442054711784, "grad_norm": 1.4875222651615767, "learning_rate": 9.824061713804337e-06, "loss": 0.4779, "step": 5982 }, { "epoch": 0.1762738830045814, "grad_norm": 2.0251977652862454, "learning_rate": 9.823926474420278e-06, "loss": 0.7143, "step": 5983 }, { "epoch": 0.17630334546204499, "grad_norm": 1.7234791034124572, "learning_rate": 9.823791184010196e-06, "loss": 0.4938, "step": 5984 }, { "epoch": 0.17633280791950856, "grad_norm": 1.8348140305417957, "learning_rate": 9.823655842575518e-06, "loss": 0.6356, "step": 5985 }, { "epoch": 0.17636227037697214, "grad_norm": 1.5127197700180586, "learning_rate": 9.82352045011768e-06, "loss": 0.4792, "step": 5986 }, { "epoch": 0.1763917328344357, "grad_norm": 1.760379985780638, "learning_rate": 9.823385006638113e-06, "loss": 0.4683, "step": 5987 }, { "epoch": 0.17642119529189929, "grad_norm": 1.797587654161347, "learning_rate": 9.82324951213825e-06, "loss": 0.5112, "step": 5988 }, { "epoch": 0.1764506577493629, "grad_norm": 1.7661662356130143, "learning_rate": 9.823113966619523e-06, "loss": 0.4819, "step": 5989 }, { "epoch": 0.17648012020682646, "grad_norm": 1.7866715226324508, "learning_rate": 9.822978370083366e-06, "loss": 0.3863, "step": 5990 }, { "epoch": 0.17650958266429004, "grad_norm": 1.4239794161208605, "learning_rate": 9.822842722531213e-06, "loss": 0.4047, "step": 5991 }, { "epoch": 0.1765390451217536, "grad_norm": 1.7266465836425668, "learning_rate": 9.822707023964501e-06, "loss": 0.5863, "step": 5992 }, { "epoch": 0.1765685075792172, "grad_norm": 1.753485947788583, "learning_rate": 9.822571274384663e-06, "loss": 0.5802, "step": 5993 }, { "epoch": 0.17659797003668076, "grad_norm": 1.562711576967825, "learning_rate": 9.822435473793138e-06, "loss": 0.4808, "step": 5994 }, { "epoch": 0.17662743249414434, "grad_norm": 1.5445582537927742, "learning_rate": 9.822299622191356e-06, "loss": 0.5361, "step": 5995 }, { "epoch": 0.1766568949516079, "grad_norm": 1.6992742448905207, "learning_rate": 9.822163719580761e-06, "loss": 0.6165, "step": 5996 }, { "epoch": 0.1766863574090715, "grad_norm": 1.6106255677192791, "learning_rate": 9.822027765962788e-06, "loss": 0.5549, "step": 5997 }, { "epoch": 0.17671581986653506, "grad_norm": 1.5977409991972438, "learning_rate": 9.821891761338873e-06, "loss": 0.5353, "step": 5998 }, { "epoch": 0.17674528232399864, "grad_norm": 1.7083184561260247, "learning_rate": 9.821755705710458e-06, "loss": 0.6054, "step": 5999 }, { "epoch": 0.1767747447814622, "grad_norm": 1.6920659055077911, "learning_rate": 9.82161959907898e-06, "loss": 0.5116, "step": 6000 }, { "epoch": 0.1768042072389258, "grad_norm": 1.7979500605957366, "learning_rate": 9.82148344144588e-06, "loss": 0.5449, "step": 6001 }, { "epoch": 0.1768336696963894, "grad_norm": 1.7854867553209117, "learning_rate": 9.821347232812597e-06, "loss": 0.5236, "step": 6002 }, { "epoch": 0.17686313215385296, "grad_norm": 1.5412356405872951, "learning_rate": 9.821210973180572e-06, "loss": 0.4144, "step": 6003 }, { "epoch": 0.17689259461131654, "grad_norm": 1.590129159453018, "learning_rate": 9.821074662551247e-06, "loss": 0.6387, "step": 6004 }, { "epoch": 0.17692205706878011, "grad_norm": 1.7552948692382506, "learning_rate": 9.820938300926064e-06, "loss": 0.561, "step": 6005 }, { "epoch": 0.1769515195262437, "grad_norm": 1.610229358927239, "learning_rate": 9.820801888306464e-06, "loss": 0.3885, "step": 6006 }, { "epoch": 0.17698098198370726, "grad_norm": 1.471569594729919, "learning_rate": 9.820665424693892e-06, "loss": 0.4684, "step": 6007 }, { "epoch": 0.17701044444117084, "grad_norm": 1.9706750340898955, "learning_rate": 9.82052891008979e-06, "loss": 0.6247, "step": 6008 }, { "epoch": 0.1770399068986344, "grad_norm": 1.4917573154558401, "learning_rate": 9.820392344495601e-06, "loss": 0.4559, "step": 6009 }, { "epoch": 0.177069369356098, "grad_norm": 1.5631021454438438, "learning_rate": 9.820255727912771e-06, "loss": 0.5549, "step": 6010 }, { "epoch": 0.17709883181356156, "grad_norm": 1.661241162572709, "learning_rate": 9.820119060342745e-06, "loss": 0.4281, "step": 6011 }, { "epoch": 0.17712829427102514, "grad_norm": 1.6189696249207415, "learning_rate": 9.81998234178697e-06, "loss": 0.5123, "step": 6012 }, { "epoch": 0.1771577567284887, "grad_norm": 1.3275293567211144, "learning_rate": 9.81984557224689e-06, "loss": 0.3678, "step": 6013 }, { "epoch": 0.1771872191859523, "grad_norm": 1.8689742874162567, "learning_rate": 9.819708751723953e-06, "loss": 0.4425, "step": 6014 }, { "epoch": 0.1772166816434159, "grad_norm": 1.5823916261019633, "learning_rate": 9.819571880219606e-06, "loss": 0.4245, "step": 6015 }, { "epoch": 0.17724614410087947, "grad_norm": 1.6317100386505394, "learning_rate": 9.819434957735294e-06, "loss": 0.5694, "step": 6016 }, { "epoch": 0.17727560655834304, "grad_norm": 1.5307600799002357, "learning_rate": 9.819297984272469e-06, "loss": 0.4604, "step": 6017 }, { "epoch": 0.17730506901580662, "grad_norm": 1.7188219484490326, "learning_rate": 9.819160959832579e-06, "loss": 0.6256, "step": 6018 }, { "epoch": 0.1773345314732702, "grad_norm": 1.553618320050278, "learning_rate": 9.819023884417073e-06, "loss": 0.6135, "step": 6019 }, { "epoch": 0.17736399393073377, "grad_norm": 1.552813853647298, "learning_rate": 9.818886758027401e-06, "loss": 0.569, "step": 6020 }, { "epoch": 0.17739345638819734, "grad_norm": 1.5076953686707353, "learning_rate": 9.818749580665013e-06, "loss": 0.3993, "step": 6021 }, { "epoch": 0.17742291884566092, "grad_norm": 2.0629712983372617, "learning_rate": 9.81861235233136e-06, "loss": 0.5086, "step": 6022 }, { "epoch": 0.1774523813031245, "grad_norm": 1.6056603443861794, "learning_rate": 9.818475073027895e-06, "loss": 0.4447, "step": 6023 }, { "epoch": 0.17748184376058806, "grad_norm": 1.6044560326778308, "learning_rate": 9.81833774275607e-06, "loss": 0.4367, "step": 6024 }, { "epoch": 0.17751130621805164, "grad_norm": 2.0398064237615148, "learning_rate": 9.818200361517337e-06, "loss": 0.7235, "step": 6025 }, { "epoch": 0.17754076867551521, "grad_norm": 1.558568307607871, "learning_rate": 9.818062929313146e-06, "loss": 0.5731, "step": 6026 }, { "epoch": 0.1775702311329788, "grad_norm": 1.4668095711200873, "learning_rate": 9.817925446144955e-06, "loss": 0.4553, "step": 6027 }, { "epoch": 0.1775996935904424, "grad_norm": 1.5847944576147663, "learning_rate": 9.817787912014219e-06, "loss": 0.4893, "step": 6028 }, { "epoch": 0.17762915604790597, "grad_norm": 1.590304586475067, "learning_rate": 9.817650326922389e-06, "loss": 0.5467, "step": 6029 }, { "epoch": 0.17765861850536954, "grad_norm": 1.5480222546872378, "learning_rate": 9.817512690870922e-06, "loss": 0.474, "step": 6030 }, { "epoch": 0.17768808096283312, "grad_norm": 1.67705392127375, "learning_rate": 9.817375003861274e-06, "loss": 0.5516, "step": 6031 }, { "epoch": 0.1777175434202967, "grad_norm": 1.525248653668417, "learning_rate": 9.817237265894902e-06, "loss": 0.4118, "step": 6032 }, { "epoch": 0.17774700587776027, "grad_norm": 1.7053342790959187, "learning_rate": 9.817099476973262e-06, "loss": 0.5603, "step": 6033 }, { "epoch": 0.17777646833522384, "grad_norm": 1.502618285317058, "learning_rate": 9.816961637097811e-06, "loss": 0.5506, "step": 6034 }, { "epoch": 0.17780593079268742, "grad_norm": 1.7519259355668273, "learning_rate": 9.81682374627001e-06, "loss": 0.5054, "step": 6035 }, { "epoch": 0.177835393250151, "grad_norm": 1.772249662920498, "learning_rate": 9.816685804491313e-06, "loss": 0.5425, "step": 6036 }, { "epoch": 0.17786485570761457, "grad_norm": 1.6974633217747366, "learning_rate": 9.816547811763182e-06, "loss": 0.5446, "step": 6037 }, { "epoch": 0.17789431816507814, "grad_norm": 1.5771603984493296, "learning_rate": 9.816409768087075e-06, "loss": 0.6048, "step": 6038 }, { "epoch": 0.17792378062254172, "grad_norm": 1.6635686131840564, "learning_rate": 9.816271673464457e-06, "loss": 0.5297, "step": 6039 }, { "epoch": 0.1779532430800053, "grad_norm": 1.6791421283844135, "learning_rate": 9.816133527896784e-06, "loss": 0.4692, "step": 6040 }, { "epoch": 0.1779827055374689, "grad_norm": 1.6312145420663304, "learning_rate": 9.815995331385515e-06, "loss": 0.5427, "step": 6041 }, { "epoch": 0.17801216799493247, "grad_norm": 1.7418629612827734, "learning_rate": 9.815857083932118e-06, "loss": 0.7084, "step": 6042 }, { "epoch": 0.17804163045239604, "grad_norm": 1.6134693409786218, "learning_rate": 9.815718785538053e-06, "loss": 0.5278, "step": 6043 }, { "epoch": 0.17807109290985962, "grad_norm": 1.7069677897501658, "learning_rate": 9.81558043620478e-06, "loss": 0.5495, "step": 6044 }, { "epoch": 0.1781005553673232, "grad_norm": 1.6817752059964388, "learning_rate": 9.815442035933766e-06, "loss": 0.5625, "step": 6045 }, { "epoch": 0.17813001782478677, "grad_norm": 1.540437589424911, "learning_rate": 9.815303584726473e-06, "loss": 0.4799, "step": 6046 }, { "epoch": 0.17815948028225034, "grad_norm": 1.6362394357308787, "learning_rate": 9.815165082584367e-06, "loss": 0.6689, "step": 6047 }, { "epoch": 0.17818894273971392, "grad_norm": 1.8830244486910144, "learning_rate": 9.815026529508913e-06, "loss": 0.6553, "step": 6048 }, { "epoch": 0.1782184051971775, "grad_norm": 1.4554684314710227, "learning_rate": 9.814887925501572e-06, "loss": 0.4454, "step": 6049 }, { "epoch": 0.17824786765464107, "grad_norm": 1.704416201768859, "learning_rate": 9.814749270563818e-06, "loss": 0.6247, "step": 6050 }, { "epoch": 0.17827733011210464, "grad_norm": 1.8050086838899397, "learning_rate": 9.814610564697113e-06, "loss": 0.593, "step": 6051 }, { "epoch": 0.17830679256956822, "grad_norm": 1.7576645958508819, "learning_rate": 9.814471807902923e-06, "loss": 0.5975, "step": 6052 }, { "epoch": 0.1783362550270318, "grad_norm": 1.6384299224633498, "learning_rate": 9.814333000182719e-06, "loss": 0.4603, "step": 6053 }, { "epoch": 0.1783657174844954, "grad_norm": 1.4300345421620193, "learning_rate": 9.814194141537967e-06, "loss": 0.46, "step": 6054 }, { "epoch": 0.17839517994195897, "grad_norm": 1.6014786730643995, "learning_rate": 9.814055231970137e-06, "loss": 0.4843, "step": 6055 }, { "epoch": 0.17842464239942255, "grad_norm": 1.4610660409134293, "learning_rate": 9.813916271480697e-06, "loss": 0.4849, "step": 6056 }, { "epoch": 0.17845410485688612, "grad_norm": 1.6866576943884168, "learning_rate": 9.813777260071119e-06, "loss": 0.3868, "step": 6057 }, { "epoch": 0.1784835673143497, "grad_norm": 1.6631883754571257, "learning_rate": 9.813638197742872e-06, "loss": 0.5054, "step": 6058 }, { "epoch": 0.17851302977181327, "grad_norm": 1.782671120610801, "learning_rate": 9.813499084497426e-06, "loss": 0.6719, "step": 6059 }, { "epoch": 0.17854249222927684, "grad_norm": 1.704886907767669, "learning_rate": 9.813359920336254e-06, "loss": 0.5427, "step": 6060 }, { "epoch": 0.17857195468674042, "grad_norm": 2.0656686749642494, "learning_rate": 9.813220705260829e-06, "loss": 0.5896, "step": 6061 }, { "epoch": 0.178601417144204, "grad_norm": 1.8410559437829694, "learning_rate": 9.81308143927262e-06, "loss": 0.5791, "step": 6062 }, { "epoch": 0.17863087960166757, "grad_norm": 1.5880378554272165, "learning_rate": 9.812942122373103e-06, "loss": 0.6288, "step": 6063 }, { "epoch": 0.17866034205913114, "grad_norm": 1.824954321638085, "learning_rate": 9.812802754563752e-06, "loss": 0.6956, "step": 6064 }, { "epoch": 0.17868980451659472, "grad_norm": 1.596852853867056, "learning_rate": 9.81266333584604e-06, "loss": 0.4193, "step": 6065 }, { "epoch": 0.1787192669740583, "grad_norm": 1.6800150847991335, "learning_rate": 9.812523866221441e-06, "loss": 0.498, "step": 6066 }, { "epoch": 0.1787487294315219, "grad_norm": 1.560505584379079, "learning_rate": 9.812384345691433e-06, "loss": 0.5238, "step": 6067 }, { "epoch": 0.17877819188898547, "grad_norm": 1.6087786197326963, "learning_rate": 9.812244774257489e-06, "loss": 0.5232, "step": 6068 }, { "epoch": 0.17880765434644905, "grad_norm": 1.6398928164483757, "learning_rate": 9.812105151921086e-06, "loss": 0.5859, "step": 6069 }, { "epoch": 0.17883711680391262, "grad_norm": 1.6267527061172218, "learning_rate": 9.811965478683702e-06, "loss": 0.56, "step": 6070 }, { "epoch": 0.1788665792613762, "grad_norm": 1.6208338157601327, "learning_rate": 9.811825754546814e-06, "loss": 0.5881, "step": 6071 }, { "epoch": 0.17889604171883977, "grad_norm": 1.71466025401467, "learning_rate": 9.811685979511897e-06, "loss": 0.5847, "step": 6072 }, { "epoch": 0.17892550417630335, "grad_norm": 1.5386974329173146, "learning_rate": 9.811546153580436e-06, "loss": 0.3278, "step": 6073 }, { "epoch": 0.17895496663376692, "grad_norm": 1.7240548156173454, "learning_rate": 9.811406276753903e-06, "loss": 0.4705, "step": 6074 }, { "epoch": 0.1789844290912305, "grad_norm": 1.6710451850569226, "learning_rate": 9.811266349033782e-06, "loss": 0.5568, "step": 6075 }, { "epoch": 0.17901389154869407, "grad_norm": 1.5847072647864853, "learning_rate": 9.811126370421553e-06, "loss": 0.4101, "step": 6076 }, { "epoch": 0.17904335400615765, "grad_norm": 1.4735641293618496, "learning_rate": 9.810986340918694e-06, "loss": 0.4599, "step": 6077 }, { "epoch": 0.17907281646362122, "grad_norm": 1.8350708597484897, "learning_rate": 9.810846260526687e-06, "loss": 0.4808, "step": 6078 }, { "epoch": 0.1791022789210848, "grad_norm": 1.9096011619687336, "learning_rate": 9.810706129247016e-06, "loss": 0.5853, "step": 6079 }, { "epoch": 0.1791317413785484, "grad_norm": 1.4831651253749893, "learning_rate": 9.81056594708116e-06, "loss": 0.3156, "step": 6080 }, { "epoch": 0.17916120383601197, "grad_norm": 1.5758150393008539, "learning_rate": 9.810425714030604e-06, "loss": 0.5805, "step": 6081 }, { "epoch": 0.17919066629347555, "grad_norm": 1.3570868260563078, "learning_rate": 9.810285430096833e-06, "loss": 0.3132, "step": 6082 }, { "epoch": 0.17922012875093912, "grad_norm": 1.6346744945446865, "learning_rate": 9.810145095281328e-06, "loss": 0.5342, "step": 6083 }, { "epoch": 0.1792495912084027, "grad_norm": 1.5036567496558355, "learning_rate": 9.810004709585572e-06, "loss": 0.4986, "step": 6084 }, { "epoch": 0.17927905366586627, "grad_norm": 1.4437747269003416, "learning_rate": 9.809864273011052e-06, "loss": 0.4692, "step": 6085 }, { "epoch": 0.17930851612332985, "grad_norm": 1.692573684671607, "learning_rate": 9.809723785559255e-06, "loss": 0.5166, "step": 6086 }, { "epoch": 0.17933797858079342, "grad_norm": 1.5460123639828223, "learning_rate": 9.809583247231667e-06, "loss": 0.5038, "step": 6087 }, { "epoch": 0.179367441038257, "grad_norm": 1.5985555797609348, "learning_rate": 9.80944265802977e-06, "loss": 0.4586, "step": 6088 }, { "epoch": 0.17939690349572057, "grad_norm": 1.8203590143722332, "learning_rate": 9.809302017955055e-06, "loss": 0.6248, "step": 6089 }, { "epoch": 0.17942636595318415, "grad_norm": 1.6843597915339754, "learning_rate": 9.809161327009012e-06, "loss": 0.4739, "step": 6090 }, { "epoch": 0.17945582841064772, "grad_norm": 1.6108908966197653, "learning_rate": 9.809020585193124e-06, "loss": 0.5948, "step": 6091 }, { "epoch": 0.1794852908681113, "grad_norm": 1.6429091207231183, "learning_rate": 9.80887979250888e-06, "loss": 0.5455, "step": 6092 }, { "epoch": 0.1795147533255749, "grad_norm": 2.0559783693306377, "learning_rate": 9.808738948957774e-06, "loss": 0.6233, "step": 6093 }, { "epoch": 0.17954421578303847, "grad_norm": 1.7563612624873897, "learning_rate": 9.808598054541293e-06, "loss": 0.5504, "step": 6094 }, { "epoch": 0.17957367824050205, "grad_norm": 1.7681635003038532, "learning_rate": 9.808457109260925e-06, "loss": 0.6882, "step": 6095 }, { "epoch": 0.17960314069796562, "grad_norm": 1.600885015256687, "learning_rate": 9.808316113118163e-06, "loss": 0.46, "step": 6096 }, { "epoch": 0.1796326031554292, "grad_norm": 1.7673346534374714, "learning_rate": 9.8081750661145e-06, "loss": 0.5536, "step": 6097 }, { "epoch": 0.17966206561289277, "grad_norm": 1.4225901306107238, "learning_rate": 9.808033968251428e-06, "loss": 0.4711, "step": 6098 }, { "epoch": 0.17969152807035635, "grad_norm": 1.650556439852696, "learning_rate": 9.807892819530434e-06, "loss": 0.6245, "step": 6099 }, { "epoch": 0.17972099052781992, "grad_norm": 1.5996493247237118, "learning_rate": 9.807751619953018e-06, "loss": 0.5815, "step": 6100 }, { "epoch": 0.1797504529852835, "grad_norm": 1.6972720854422856, "learning_rate": 9.80761036952067e-06, "loss": 0.5599, "step": 6101 }, { "epoch": 0.17977991544274707, "grad_norm": 1.7099965719958583, "learning_rate": 9.807469068234885e-06, "loss": 0.6252, "step": 6102 }, { "epoch": 0.17980937790021065, "grad_norm": 1.5840306448237431, "learning_rate": 9.807327716097155e-06, "loss": 0.4666, "step": 6103 }, { "epoch": 0.17983884035767422, "grad_norm": 1.5143882807658116, "learning_rate": 9.80718631310898e-06, "loss": 0.5464, "step": 6104 }, { "epoch": 0.1798683028151378, "grad_norm": 1.8242174736444425, "learning_rate": 9.807044859271853e-06, "loss": 0.6618, "step": 6105 }, { "epoch": 0.1798977652726014, "grad_norm": 1.7024662591704558, "learning_rate": 9.80690335458727e-06, "loss": 0.5283, "step": 6106 }, { "epoch": 0.17992722773006498, "grad_norm": 1.5498473719604835, "learning_rate": 9.806761799056727e-06, "loss": 0.4818, "step": 6107 }, { "epoch": 0.17995669018752855, "grad_norm": 1.6046189344809727, "learning_rate": 9.806620192681723e-06, "loss": 0.5817, "step": 6108 }, { "epoch": 0.17998615264499213, "grad_norm": 1.869498825117646, "learning_rate": 9.806478535463756e-06, "loss": 0.4234, "step": 6109 }, { "epoch": 0.1800156151024557, "grad_norm": 1.7528275788789944, "learning_rate": 9.806336827404326e-06, "loss": 0.5195, "step": 6110 }, { "epoch": 0.18004507755991928, "grad_norm": 1.593182338513174, "learning_rate": 9.806195068504927e-06, "loss": 0.436, "step": 6111 }, { "epoch": 0.18007454001738285, "grad_norm": 1.6843722222796422, "learning_rate": 9.806053258767063e-06, "loss": 0.5119, "step": 6112 }, { "epoch": 0.18010400247484643, "grad_norm": 1.5751240504013218, "learning_rate": 9.805911398192232e-06, "loss": 0.3385, "step": 6113 }, { "epoch": 0.18013346493231, "grad_norm": 1.725802119498101, "learning_rate": 9.805769486781932e-06, "loss": 0.5444, "step": 6114 }, { "epoch": 0.18016292738977358, "grad_norm": 1.5806286936645007, "learning_rate": 9.80562752453767e-06, "loss": 0.5259, "step": 6115 }, { "epoch": 0.18019238984723715, "grad_norm": 1.7219380141136342, "learning_rate": 9.805485511460945e-06, "loss": 0.3905, "step": 6116 }, { "epoch": 0.18022185230470072, "grad_norm": 1.7154724472959186, "learning_rate": 9.805343447553258e-06, "loss": 0.4878, "step": 6117 }, { "epoch": 0.1802513147621643, "grad_norm": 1.8197364726470802, "learning_rate": 9.805201332816113e-06, "loss": 0.5589, "step": 6118 }, { "epoch": 0.1802807772196279, "grad_norm": 1.472646426963261, "learning_rate": 9.80505916725101e-06, "loss": 0.413, "step": 6119 }, { "epoch": 0.18031023967709148, "grad_norm": 1.4984903797283444, "learning_rate": 9.80491695085946e-06, "loss": 0.4917, "step": 6120 }, { "epoch": 0.18033970213455505, "grad_norm": 2.0784080553152573, "learning_rate": 9.804774683642958e-06, "loss": 0.7044, "step": 6121 }, { "epoch": 0.18036916459201863, "grad_norm": 1.6251707914572087, "learning_rate": 9.804632365603017e-06, "loss": 0.4082, "step": 6122 }, { "epoch": 0.1803986270494822, "grad_norm": 1.5851286831789695, "learning_rate": 9.804489996741137e-06, "loss": 0.5048, "step": 6123 }, { "epoch": 0.18042808950694578, "grad_norm": 1.7826868003652834, "learning_rate": 9.804347577058828e-06, "loss": 0.7039, "step": 6124 }, { "epoch": 0.18045755196440935, "grad_norm": 1.531987625760499, "learning_rate": 9.804205106557594e-06, "loss": 0.4173, "step": 6125 }, { "epoch": 0.18048701442187293, "grad_norm": 1.4167941405009805, "learning_rate": 9.804062585238942e-06, "loss": 0.3628, "step": 6126 }, { "epoch": 0.1805164768793365, "grad_norm": 1.8989919894858738, "learning_rate": 9.80392001310438e-06, "loss": 0.6009, "step": 6127 }, { "epoch": 0.18054593933680008, "grad_norm": 1.8680261268790082, "learning_rate": 9.803777390155418e-06, "loss": 0.6236, "step": 6128 }, { "epoch": 0.18057540179426365, "grad_norm": 1.6218145957550603, "learning_rate": 9.80363471639356e-06, "loss": 0.5149, "step": 6129 }, { "epoch": 0.18060486425172723, "grad_norm": 1.5720211293064597, "learning_rate": 9.80349199182032e-06, "loss": 0.4891, "step": 6130 }, { "epoch": 0.1806343267091908, "grad_norm": 1.926681952701315, "learning_rate": 9.803349216437203e-06, "loss": 0.3289, "step": 6131 }, { "epoch": 0.1806637891666544, "grad_norm": 1.7781189932836574, "learning_rate": 9.803206390245725e-06, "loss": 0.5447, "step": 6132 }, { "epoch": 0.18069325162411798, "grad_norm": 1.5428891345526685, "learning_rate": 9.803063513247392e-06, "loss": 0.4503, "step": 6133 }, { "epoch": 0.18072271408158155, "grad_norm": 1.5339465441754592, "learning_rate": 9.802920585443717e-06, "loss": 0.5289, "step": 6134 }, { "epoch": 0.18075217653904513, "grad_norm": 1.4651238900277475, "learning_rate": 9.802777606836212e-06, "loss": 0.5739, "step": 6135 }, { "epoch": 0.1807816389965087, "grad_norm": 1.6886881806609804, "learning_rate": 9.802634577426388e-06, "loss": 0.5829, "step": 6136 }, { "epoch": 0.18081110145397228, "grad_norm": 1.45814735630258, "learning_rate": 9.802491497215762e-06, "loss": 0.4216, "step": 6137 }, { "epoch": 0.18084056391143585, "grad_norm": 1.8429692081460578, "learning_rate": 9.802348366205842e-06, "loss": 0.3424, "step": 6138 }, { "epoch": 0.18087002636889943, "grad_norm": 1.6572500324262383, "learning_rate": 9.802205184398145e-06, "loss": 0.6449, "step": 6139 }, { "epoch": 0.180899488826363, "grad_norm": 1.6329698281367957, "learning_rate": 9.802061951794186e-06, "loss": 0.5265, "step": 6140 }, { "epoch": 0.18092895128382658, "grad_norm": 1.7109193715724855, "learning_rate": 9.801918668395478e-06, "loss": 0.5532, "step": 6141 }, { "epoch": 0.18095841374129015, "grad_norm": 1.5118387375126634, "learning_rate": 9.801775334203539e-06, "loss": 0.3701, "step": 6142 }, { "epoch": 0.18098787619875373, "grad_norm": 1.820225052712256, "learning_rate": 9.801631949219884e-06, "loss": 0.5218, "step": 6143 }, { "epoch": 0.1810173386562173, "grad_norm": 1.4907528764372993, "learning_rate": 9.801488513446027e-06, "loss": 0.3448, "step": 6144 }, { "epoch": 0.1810468011136809, "grad_norm": 1.7260756852643884, "learning_rate": 9.80134502688349e-06, "loss": 0.3879, "step": 6145 }, { "epoch": 0.18107626357114448, "grad_norm": 1.8169964146946191, "learning_rate": 9.801201489533788e-06, "loss": 0.5083, "step": 6146 }, { "epoch": 0.18110572602860806, "grad_norm": 1.6178207681124417, "learning_rate": 9.801057901398441e-06, "loss": 0.4875, "step": 6147 }, { "epoch": 0.18113518848607163, "grad_norm": 1.675901193901548, "learning_rate": 9.800914262478964e-06, "loss": 0.5177, "step": 6148 }, { "epoch": 0.1811646509435352, "grad_norm": 1.6014485326599646, "learning_rate": 9.800770572776881e-06, "loss": 0.3864, "step": 6149 }, { "epoch": 0.18119411340099878, "grad_norm": 1.6143135994044764, "learning_rate": 9.80062683229371e-06, "loss": 0.6111, "step": 6150 }, { "epoch": 0.18122357585846235, "grad_norm": 1.8470044592318962, "learning_rate": 9.800483041030971e-06, "loss": 0.5931, "step": 6151 }, { "epoch": 0.18125303831592593, "grad_norm": 1.753275459731552, "learning_rate": 9.800339198990186e-06, "loss": 0.7618, "step": 6152 }, { "epoch": 0.1812825007733895, "grad_norm": 1.5594381594661282, "learning_rate": 9.800195306172876e-06, "loss": 0.4279, "step": 6153 }, { "epoch": 0.18131196323085308, "grad_norm": 1.7095438677041093, "learning_rate": 9.800051362580561e-06, "loss": 0.4863, "step": 6154 }, { "epoch": 0.18134142568831665, "grad_norm": 1.8074647896204257, "learning_rate": 9.799907368214766e-06, "loss": 0.5297, "step": 6155 }, { "epoch": 0.18137088814578023, "grad_norm": 1.535762104082049, "learning_rate": 9.799763323077015e-06, "loss": 0.5738, "step": 6156 }, { "epoch": 0.1814003506032438, "grad_norm": 1.6113463511842487, "learning_rate": 9.799619227168831e-06, "loss": 0.549, "step": 6157 }, { "epoch": 0.1814298130607074, "grad_norm": 1.4065928880160972, "learning_rate": 9.799475080491737e-06, "loss": 0.489, "step": 6158 }, { "epoch": 0.18145927551817098, "grad_norm": 1.6056196764139143, "learning_rate": 9.799330883047258e-06, "loss": 0.4729, "step": 6159 }, { "epoch": 0.18148873797563456, "grad_norm": 1.8090162015710547, "learning_rate": 9.799186634836919e-06, "loss": 0.4339, "step": 6160 }, { "epoch": 0.18151820043309813, "grad_norm": 1.7376625850081366, "learning_rate": 9.799042335862245e-06, "loss": 0.4854, "step": 6161 }, { "epoch": 0.1815476628905617, "grad_norm": 1.8068140603576344, "learning_rate": 9.798897986124767e-06, "loss": 0.5185, "step": 6162 }, { "epoch": 0.18157712534802528, "grad_norm": 1.6708737111377219, "learning_rate": 9.798753585626006e-06, "loss": 0.562, "step": 6163 }, { "epoch": 0.18160658780548886, "grad_norm": 1.7168912518064412, "learning_rate": 9.798609134367494e-06, "loss": 0.6366, "step": 6164 }, { "epoch": 0.18163605026295243, "grad_norm": 1.8362938741165296, "learning_rate": 9.798464632350755e-06, "loss": 0.4806, "step": 6165 }, { "epoch": 0.181665512720416, "grad_norm": 1.803753908311926, "learning_rate": 9.798320079577321e-06, "loss": 0.5389, "step": 6166 }, { "epoch": 0.18169497517787958, "grad_norm": 1.7492520748923375, "learning_rate": 9.798175476048719e-06, "loss": 0.5794, "step": 6167 }, { "epoch": 0.18172443763534316, "grad_norm": 1.6961014244295796, "learning_rate": 9.79803082176648e-06, "loss": 0.4195, "step": 6168 }, { "epoch": 0.18175390009280673, "grad_norm": 1.612061788657328, "learning_rate": 9.797886116732131e-06, "loss": 0.6797, "step": 6169 }, { "epoch": 0.1817833625502703, "grad_norm": 1.6346426813364547, "learning_rate": 9.797741360947208e-06, "loss": 0.5302, "step": 6170 }, { "epoch": 0.1818128250077339, "grad_norm": 1.6119616610383685, "learning_rate": 9.797596554413237e-06, "loss": 0.4211, "step": 6171 }, { "epoch": 0.18184228746519748, "grad_norm": 1.6635450774724045, "learning_rate": 9.797451697131753e-06, "loss": 0.6047, "step": 6172 }, { "epoch": 0.18187174992266106, "grad_norm": 1.5362440328929974, "learning_rate": 9.797306789104284e-06, "loss": 0.5156, "step": 6173 }, { "epoch": 0.18190121238012463, "grad_norm": 1.8080782948157514, "learning_rate": 9.79716183033237e-06, "loss": 0.4213, "step": 6174 }, { "epoch": 0.1819306748375882, "grad_norm": 1.7796831131014494, "learning_rate": 9.797016820817538e-06, "loss": 0.643, "step": 6175 }, { "epoch": 0.18196013729505178, "grad_norm": 1.502524261596387, "learning_rate": 9.796871760561324e-06, "loss": 0.4451, "step": 6176 }, { "epoch": 0.18198959975251536, "grad_norm": 1.6194093118877644, "learning_rate": 9.796726649565263e-06, "loss": 0.4562, "step": 6177 }, { "epoch": 0.18201906220997893, "grad_norm": 1.5573428421958555, "learning_rate": 9.796581487830887e-06, "loss": 0.5756, "step": 6178 }, { "epoch": 0.1820485246674425, "grad_norm": 1.578257446285084, "learning_rate": 9.796436275359735e-06, "loss": 0.5524, "step": 6179 }, { "epoch": 0.18207798712490608, "grad_norm": 1.7594423038160243, "learning_rate": 9.796291012153344e-06, "loss": 0.4847, "step": 6180 }, { "epoch": 0.18210744958236966, "grad_norm": 1.444850760493952, "learning_rate": 9.796145698213247e-06, "loss": 0.4112, "step": 6181 }, { "epoch": 0.18213691203983323, "grad_norm": 1.678186942681465, "learning_rate": 9.796000333540982e-06, "loss": 0.4912, "step": 6182 }, { "epoch": 0.1821663744972968, "grad_norm": 1.6254964797826537, "learning_rate": 9.795854918138088e-06, "loss": 0.4423, "step": 6183 }, { "epoch": 0.1821958369547604, "grad_norm": 1.8700161931865953, "learning_rate": 9.795709452006101e-06, "loss": 0.5439, "step": 6184 }, { "epoch": 0.18222529941222398, "grad_norm": 1.4277922523041735, "learning_rate": 9.795563935146561e-06, "loss": 0.4821, "step": 6185 }, { "epoch": 0.18225476186968756, "grad_norm": 1.7254933673670898, "learning_rate": 9.79541836756101e-06, "loss": 0.6676, "step": 6186 }, { "epoch": 0.18228422432715113, "grad_norm": 1.5859187966704762, "learning_rate": 9.795272749250982e-06, "loss": 0.4538, "step": 6187 }, { "epoch": 0.1823136867846147, "grad_norm": 1.54187849287013, "learning_rate": 9.79512708021802e-06, "loss": 0.4876, "step": 6188 }, { "epoch": 0.18234314924207828, "grad_norm": 1.6284192020765327, "learning_rate": 9.794981360463666e-06, "loss": 0.6116, "step": 6189 }, { "epoch": 0.18237261169954186, "grad_norm": 1.9547728486420448, "learning_rate": 9.794835589989462e-06, "loss": 0.7079, "step": 6190 }, { "epoch": 0.18240207415700543, "grad_norm": 1.8258746632895397, "learning_rate": 9.794689768796947e-06, "loss": 0.4864, "step": 6191 }, { "epoch": 0.182431536614469, "grad_norm": 1.7583466594753243, "learning_rate": 9.794543896887665e-06, "loss": 0.4652, "step": 6192 }, { "epoch": 0.18246099907193258, "grad_norm": 1.648498044062666, "learning_rate": 9.794397974263158e-06, "loss": 0.6217, "step": 6193 }, { "epoch": 0.18249046152939616, "grad_norm": 1.7048001828040091, "learning_rate": 9.794252000924971e-06, "loss": 0.4879, "step": 6194 }, { "epoch": 0.18251992398685973, "grad_norm": 1.7879947341082942, "learning_rate": 9.794105976874648e-06, "loss": 0.5973, "step": 6195 }, { "epoch": 0.1825493864443233, "grad_norm": 1.5609126452407849, "learning_rate": 9.793959902113732e-06, "loss": 0.4956, "step": 6196 }, { "epoch": 0.1825788489017869, "grad_norm": 1.745223274840175, "learning_rate": 9.79381377664377e-06, "loss": 0.5269, "step": 6197 }, { "epoch": 0.18260831135925049, "grad_norm": 1.6060637261781934, "learning_rate": 9.793667600466307e-06, "loss": 0.5334, "step": 6198 }, { "epoch": 0.18263777381671406, "grad_norm": 1.5972993426833209, "learning_rate": 9.793521373582888e-06, "loss": 0.5731, "step": 6199 }, { "epoch": 0.18266723627417764, "grad_norm": 1.6978986080879912, "learning_rate": 9.793375095995062e-06, "loss": 0.5563, "step": 6200 }, { "epoch": 0.1826966987316412, "grad_norm": 1.5878782424897322, "learning_rate": 9.793228767704374e-06, "loss": 0.4999, "step": 6201 }, { "epoch": 0.18272616118910479, "grad_norm": 1.8760514835938313, "learning_rate": 9.793082388712375e-06, "loss": 0.3736, "step": 6202 }, { "epoch": 0.18275562364656836, "grad_norm": 1.4805303612102465, "learning_rate": 9.792935959020608e-06, "loss": 0.5135, "step": 6203 }, { "epoch": 0.18278508610403194, "grad_norm": 1.6899951187202333, "learning_rate": 9.792789478630628e-06, "loss": 0.4396, "step": 6204 }, { "epoch": 0.1828145485614955, "grad_norm": 1.584787700878659, "learning_rate": 9.79264294754398e-06, "loss": 0.665, "step": 6205 }, { "epoch": 0.18284401101895909, "grad_norm": 1.9192537086798112, "learning_rate": 9.792496365762217e-06, "loss": 0.7099, "step": 6206 }, { "epoch": 0.18287347347642266, "grad_norm": 1.541916449970908, "learning_rate": 9.792349733286885e-06, "loss": 0.4983, "step": 6207 }, { "epoch": 0.18290293593388623, "grad_norm": 1.5730655347643534, "learning_rate": 9.79220305011954e-06, "loss": 0.3985, "step": 6208 }, { "epoch": 0.1829323983913498, "grad_norm": 1.5103829679728051, "learning_rate": 9.792056316261732e-06, "loss": 0.4903, "step": 6209 }, { "epoch": 0.1829618608488134, "grad_norm": 1.7016253868581186, "learning_rate": 9.791909531715012e-06, "loss": 0.5817, "step": 6210 }, { "epoch": 0.182991323306277, "grad_norm": 1.6435950052694266, "learning_rate": 9.791762696480933e-06, "loss": 0.6027, "step": 6211 }, { "epoch": 0.18302078576374056, "grad_norm": 1.651828319435908, "learning_rate": 9.79161581056105e-06, "loss": 0.5426, "step": 6212 }, { "epoch": 0.18305024822120414, "grad_norm": 1.6170500227349254, "learning_rate": 9.791468873956913e-06, "loss": 0.5182, "step": 6213 }, { "epoch": 0.1830797106786677, "grad_norm": 1.6037802408805604, "learning_rate": 9.79132188667008e-06, "loss": 0.5737, "step": 6214 }, { "epoch": 0.1831091731361313, "grad_norm": 1.5053854334725087, "learning_rate": 9.791174848702105e-06, "loss": 0.503, "step": 6215 }, { "epoch": 0.18313863559359486, "grad_norm": 1.8882539710250803, "learning_rate": 9.791027760054541e-06, "loss": 0.6273, "step": 6216 }, { "epoch": 0.18316809805105844, "grad_norm": 1.546815969487821, "learning_rate": 9.790880620728946e-06, "loss": 0.4097, "step": 6217 }, { "epoch": 0.183197560508522, "grad_norm": 1.5518409291347772, "learning_rate": 9.790733430726877e-06, "loss": 0.5639, "step": 6218 }, { "epoch": 0.1832270229659856, "grad_norm": 1.738764696144097, "learning_rate": 9.790586190049888e-06, "loss": 0.4319, "step": 6219 }, { "epoch": 0.18325648542344916, "grad_norm": 1.5794065115636293, "learning_rate": 9.790438898699539e-06, "loss": 0.5685, "step": 6220 }, { "epoch": 0.18328594788091274, "grad_norm": 1.5994716037617125, "learning_rate": 9.790291556677387e-06, "loss": 0.4759, "step": 6221 }, { "epoch": 0.1833154103383763, "grad_norm": 1.6357720742237885, "learning_rate": 9.79014416398499e-06, "loss": 0.3892, "step": 6222 }, { "epoch": 0.1833448727958399, "grad_norm": 1.5161480988873899, "learning_rate": 9.78999672062391e-06, "loss": 0.3953, "step": 6223 }, { "epoch": 0.1833743352533035, "grad_norm": 1.5175988427172986, "learning_rate": 9.789849226595704e-06, "loss": 0.4825, "step": 6224 }, { "epoch": 0.18340379771076706, "grad_norm": 1.7416216400598923, "learning_rate": 9.789701681901933e-06, "loss": 0.4998, "step": 6225 }, { "epoch": 0.18343326016823064, "grad_norm": 1.6959419422543445, "learning_rate": 9.789554086544156e-06, "loss": 0.631, "step": 6226 }, { "epoch": 0.1834627226256942, "grad_norm": 1.4796618933212684, "learning_rate": 9.789406440523936e-06, "loss": 0.4851, "step": 6227 }, { "epoch": 0.1834921850831578, "grad_norm": 1.6273175527598858, "learning_rate": 9.789258743842833e-06, "loss": 0.4582, "step": 6228 }, { "epoch": 0.18352164754062136, "grad_norm": 1.5984050934210758, "learning_rate": 9.789110996502414e-06, "loss": 0.5074, "step": 6229 }, { "epoch": 0.18355110999808494, "grad_norm": 1.8565088762784234, "learning_rate": 9.788963198504236e-06, "loss": 0.6727, "step": 6230 }, { "epoch": 0.1835805724555485, "grad_norm": 1.919239042890794, "learning_rate": 9.788815349849864e-06, "loss": 0.627, "step": 6231 }, { "epoch": 0.1836100349130121, "grad_norm": 1.5406207431972425, "learning_rate": 9.788667450540863e-06, "loss": 0.4475, "step": 6232 }, { "epoch": 0.18363949737047566, "grad_norm": 1.5829054907608928, "learning_rate": 9.788519500578797e-06, "loss": 0.4668, "step": 6233 }, { "epoch": 0.18366895982793924, "grad_norm": 1.73929590041258, "learning_rate": 9.788371499965232e-06, "loss": 0.54, "step": 6234 }, { "epoch": 0.1836984222854028, "grad_norm": 1.532219282641597, "learning_rate": 9.788223448701733e-06, "loss": 0.5142, "step": 6235 }, { "epoch": 0.18372788474286642, "grad_norm": 1.5182193578982002, "learning_rate": 9.788075346789866e-06, "loss": 0.4583, "step": 6236 }, { "epoch": 0.18375734720033, "grad_norm": 1.540764880467133, "learning_rate": 9.787927194231196e-06, "loss": 0.4597, "step": 6237 }, { "epoch": 0.18378680965779357, "grad_norm": 1.581835455061316, "learning_rate": 9.78777899102729e-06, "loss": 0.5534, "step": 6238 }, { "epoch": 0.18381627211525714, "grad_norm": 1.6132617467901453, "learning_rate": 9.787630737179721e-06, "loss": 0.4961, "step": 6239 }, { "epoch": 0.18384573457272071, "grad_norm": 1.578571694825731, "learning_rate": 9.78748243269005e-06, "loss": 0.5395, "step": 6240 }, { "epoch": 0.1838751970301843, "grad_norm": 1.6158851872092581, "learning_rate": 9.787334077559849e-06, "loss": 0.584, "step": 6241 }, { "epoch": 0.18390465948764786, "grad_norm": 1.6535599497430784, "learning_rate": 9.787185671790688e-06, "loss": 0.3847, "step": 6242 }, { "epoch": 0.18393412194511144, "grad_norm": 1.6333966832209696, "learning_rate": 9.787037215384135e-06, "loss": 0.3761, "step": 6243 }, { "epoch": 0.18396358440257501, "grad_norm": 1.6557250339275127, "learning_rate": 9.786888708341761e-06, "loss": 0.4821, "step": 6244 }, { "epoch": 0.1839930468600386, "grad_norm": 1.6176652580200654, "learning_rate": 9.786740150665139e-06, "loss": 0.4743, "step": 6245 }, { "epoch": 0.18402250931750216, "grad_norm": 1.637267552311981, "learning_rate": 9.786591542355837e-06, "loss": 0.5346, "step": 6246 }, { "epoch": 0.18405197177496574, "grad_norm": 1.7000652733739552, "learning_rate": 9.78644288341543e-06, "loss": 0.4618, "step": 6247 }, { "epoch": 0.18408143423242931, "grad_norm": 1.6493564898169106, "learning_rate": 9.786294173845488e-06, "loss": 0.53, "step": 6248 }, { "epoch": 0.18411089668989292, "grad_norm": 1.7361810507955133, "learning_rate": 9.786145413647583e-06, "loss": 0.5815, "step": 6249 }, { "epoch": 0.1841403591473565, "grad_norm": 1.8579479392947071, "learning_rate": 9.785996602823292e-06, "loss": 0.5827, "step": 6250 }, { "epoch": 0.18416982160482007, "grad_norm": 1.626305860849348, "learning_rate": 9.785847741374188e-06, "loss": 0.3703, "step": 6251 }, { "epoch": 0.18419928406228364, "grad_norm": 1.695252415318405, "learning_rate": 9.785698829301844e-06, "loss": 0.5063, "step": 6252 }, { "epoch": 0.18422874651974722, "grad_norm": 1.6657343288624256, "learning_rate": 9.785549866607837e-06, "loss": 0.6036, "step": 6253 }, { "epoch": 0.1842582089772108, "grad_norm": 1.6466689987603091, "learning_rate": 9.785400853293743e-06, "loss": 0.6114, "step": 6254 }, { "epoch": 0.18428767143467437, "grad_norm": 1.7103906217994784, "learning_rate": 9.785251789361136e-06, "loss": 0.642, "step": 6255 }, { "epoch": 0.18431713389213794, "grad_norm": 1.8109692693055786, "learning_rate": 9.785102674811592e-06, "loss": 0.5998, "step": 6256 }, { "epoch": 0.18434659634960152, "grad_norm": 1.4537866975223777, "learning_rate": 9.784953509646694e-06, "loss": 0.333, "step": 6257 }, { "epoch": 0.1843760588070651, "grad_norm": 1.5523450972132402, "learning_rate": 9.784804293868014e-06, "loss": 0.6012, "step": 6258 }, { "epoch": 0.18440552126452867, "grad_norm": 1.5807666311812958, "learning_rate": 9.784655027477132e-06, "loss": 0.5093, "step": 6259 }, { "epoch": 0.18443498372199224, "grad_norm": 1.6636978317961466, "learning_rate": 9.784505710475628e-06, "loss": 0.499, "step": 6260 }, { "epoch": 0.18446444617945582, "grad_norm": 1.6506465795272351, "learning_rate": 9.784356342865082e-06, "loss": 0.5437, "step": 6261 }, { "epoch": 0.18449390863691942, "grad_norm": 1.817972091448421, "learning_rate": 9.784206924647071e-06, "loss": 0.622, "step": 6262 }, { "epoch": 0.184523371094383, "grad_norm": 1.6522885856839857, "learning_rate": 9.784057455823177e-06, "loss": 0.5493, "step": 6263 }, { "epoch": 0.18455283355184657, "grad_norm": 1.7910176587108266, "learning_rate": 9.783907936394981e-06, "loss": 0.606, "step": 6264 }, { "epoch": 0.18458229600931014, "grad_norm": 2.099690802354741, "learning_rate": 9.783758366364067e-06, "loss": 0.5229, "step": 6265 }, { "epoch": 0.18461175846677372, "grad_norm": 1.6720909992193804, "learning_rate": 9.783608745732012e-06, "loss": 0.5244, "step": 6266 }, { "epoch": 0.1846412209242373, "grad_norm": 1.7221741526290868, "learning_rate": 9.783459074500403e-06, "loss": 0.5974, "step": 6267 }, { "epoch": 0.18467068338170087, "grad_norm": 1.5940977998940704, "learning_rate": 9.783309352670822e-06, "loss": 0.5616, "step": 6268 }, { "epoch": 0.18470014583916444, "grad_norm": 1.6334657204604717, "learning_rate": 9.783159580244853e-06, "loss": 0.5099, "step": 6269 }, { "epoch": 0.18472960829662802, "grad_norm": 1.7664716611209064, "learning_rate": 9.783009757224077e-06, "loss": 0.6478, "step": 6270 }, { "epoch": 0.1847590707540916, "grad_norm": 1.4569054699858845, "learning_rate": 9.782859883610083e-06, "loss": 0.4195, "step": 6271 }, { "epoch": 0.18478853321155517, "grad_norm": 1.4809533861624045, "learning_rate": 9.782709959404456e-06, "loss": 0.4056, "step": 6272 }, { "epoch": 0.18481799566901874, "grad_norm": 1.6119945105930638, "learning_rate": 9.782559984608779e-06, "loss": 0.5757, "step": 6273 }, { "epoch": 0.18484745812648232, "grad_norm": 1.6453060411397336, "learning_rate": 9.78240995922464e-06, "loss": 0.6749, "step": 6274 }, { "epoch": 0.18487692058394592, "grad_norm": 1.5910744052110177, "learning_rate": 9.782259883253629e-06, "loss": 0.4256, "step": 6275 }, { "epoch": 0.1849063830414095, "grad_norm": 2.132072933199423, "learning_rate": 9.782109756697326e-06, "loss": 0.7136, "step": 6276 }, { "epoch": 0.18493584549887307, "grad_norm": 1.5656453258976044, "learning_rate": 9.781959579557327e-06, "loss": 0.5345, "step": 6277 }, { "epoch": 0.18496530795633664, "grad_norm": 1.5648511272720806, "learning_rate": 9.781809351835214e-06, "loss": 0.4791, "step": 6278 }, { "epoch": 0.18499477041380022, "grad_norm": 1.6235840144737532, "learning_rate": 9.781659073532581e-06, "loss": 0.4306, "step": 6279 }, { "epoch": 0.1850242328712638, "grad_norm": 1.7173607393739048, "learning_rate": 9.781508744651014e-06, "loss": 0.6131, "step": 6280 }, { "epoch": 0.18505369532872737, "grad_norm": 1.6590463008907044, "learning_rate": 9.781358365192107e-06, "loss": 0.552, "step": 6281 }, { "epoch": 0.18508315778619094, "grad_norm": 1.52971438813973, "learning_rate": 9.781207935157447e-06, "loss": 0.4423, "step": 6282 }, { "epoch": 0.18511262024365452, "grad_norm": 1.7036703340195425, "learning_rate": 9.781057454548626e-06, "loss": 0.5854, "step": 6283 }, { "epoch": 0.1851420827011181, "grad_norm": 1.5873003072062835, "learning_rate": 9.780906923367238e-06, "loss": 0.4881, "step": 6284 }, { "epoch": 0.18517154515858167, "grad_norm": 1.4889275291442312, "learning_rate": 9.780756341614872e-06, "loss": 0.3642, "step": 6285 }, { "epoch": 0.18520100761604524, "grad_norm": 1.5352944729414724, "learning_rate": 9.780605709293123e-06, "loss": 0.4666, "step": 6286 }, { "epoch": 0.18523047007350882, "grad_norm": 1.5506251304914391, "learning_rate": 9.780455026403583e-06, "loss": 0.4819, "step": 6287 }, { "epoch": 0.18525993253097242, "grad_norm": 1.5997781248641496, "learning_rate": 9.780304292947847e-06, "loss": 0.44, "step": 6288 }, { "epoch": 0.185289394988436, "grad_norm": 1.6224802326789955, "learning_rate": 9.78015350892751e-06, "loss": 0.5541, "step": 6289 }, { "epoch": 0.18531885744589957, "grad_norm": 1.6686198061024466, "learning_rate": 9.780002674344164e-06, "loss": 0.58, "step": 6290 }, { "epoch": 0.18534831990336315, "grad_norm": 1.4442196289551972, "learning_rate": 9.779851789199408e-06, "loss": 0.4356, "step": 6291 }, { "epoch": 0.18537778236082672, "grad_norm": 1.792042366095398, "learning_rate": 9.779700853494835e-06, "loss": 0.4826, "step": 6292 }, { "epoch": 0.1854072448182903, "grad_norm": 1.5545158875308809, "learning_rate": 9.779549867232046e-06, "loss": 0.3978, "step": 6293 }, { "epoch": 0.18543670727575387, "grad_norm": 1.5770999689946437, "learning_rate": 9.779398830412632e-06, "loss": 0.4734, "step": 6294 }, { "epoch": 0.18546616973321745, "grad_norm": 1.58640022359524, "learning_rate": 9.779247743038196e-06, "loss": 0.4236, "step": 6295 }, { "epoch": 0.18549563219068102, "grad_norm": 1.604480702282579, "learning_rate": 9.779096605110332e-06, "loss": 0.3518, "step": 6296 }, { "epoch": 0.1855250946481446, "grad_norm": 1.6005965209542985, "learning_rate": 9.778945416630643e-06, "loss": 0.5872, "step": 6297 }, { "epoch": 0.18555455710560817, "grad_norm": 1.5722284039591206, "learning_rate": 9.778794177600724e-06, "loss": 0.5359, "step": 6298 }, { "epoch": 0.18558401956307174, "grad_norm": 1.619128263111766, "learning_rate": 9.778642888022176e-06, "loss": 0.4783, "step": 6299 }, { "epoch": 0.18561348202053532, "grad_norm": 1.5716811691498056, "learning_rate": 9.778491547896601e-06, "loss": 0.5835, "step": 6300 }, { "epoch": 0.18564294447799892, "grad_norm": 1.712022141815272, "learning_rate": 9.778340157225599e-06, "loss": 0.4592, "step": 6301 }, { "epoch": 0.1856724069354625, "grad_norm": 1.7636076078784646, "learning_rate": 9.77818871601077e-06, "loss": 0.6877, "step": 6302 }, { "epoch": 0.18570186939292607, "grad_norm": 1.7339710306128198, "learning_rate": 9.778037224253717e-06, "loss": 0.5842, "step": 6303 }, { "epoch": 0.18573133185038965, "grad_norm": 1.6488289896115345, "learning_rate": 9.777885681956044e-06, "loss": 0.57, "step": 6304 }, { "epoch": 0.18576079430785322, "grad_norm": 1.9785940877124542, "learning_rate": 9.777734089119351e-06, "loss": 0.5769, "step": 6305 }, { "epoch": 0.1857902567653168, "grad_norm": 1.6924796076411517, "learning_rate": 9.777582445745244e-06, "loss": 0.5488, "step": 6306 }, { "epoch": 0.18581971922278037, "grad_norm": 1.6039827376017284, "learning_rate": 9.777430751835324e-06, "loss": 0.466, "step": 6307 }, { "epoch": 0.18584918168024395, "grad_norm": 1.6987283124707173, "learning_rate": 9.777279007391199e-06, "loss": 0.4847, "step": 6308 }, { "epoch": 0.18587864413770752, "grad_norm": 1.5866706153337276, "learning_rate": 9.777127212414473e-06, "loss": 0.5365, "step": 6309 }, { "epoch": 0.1859081065951711, "grad_norm": 2.0415302659302075, "learning_rate": 9.776975366906749e-06, "loss": 0.5247, "step": 6310 }, { "epoch": 0.18593756905263467, "grad_norm": 1.686804098580697, "learning_rate": 9.776823470869636e-06, "loss": 0.7008, "step": 6311 }, { "epoch": 0.18596703151009825, "grad_norm": 1.6428395646158729, "learning_rate": 9.77667152430474e-06, "loss": 0.5141, "step": 6312 }, { "epoch": 0.18599649396756182, "grad_norm": 1.7696340460669735, "learning_rate": 9.77651952721367e-06, "loss": 0.5911, "step": 6313 }, { "epoch": 0.18602595642502542, "grad_norm": 1.5361207572669218, "learning_rate": 9.776367479598032e-06, "loss": 0.3499, "step": 6314 }, { "epoch": 0.186055418882489, "grad_norm": 1.6180692773340868, "learning_rate": 9.776215381459434e-06, "loss": 0.4248, "step": 6315 }, { "epoch": 0.18608488133995257, "grad_norm": 1.7178705979084976, "learning_rate": 9.776063232799486e-06, "loss": 0.5308, "step": 6316 }, { "epoch": 0.18611434379741615, "grad_norm": 1.6706067864851633, "learning_rate": 9.775911033619796e-06, "loss": 0.4651, "step": 6317 }, { "epoch": 0.18614380625487972, "grad_norm": 1.7112860133087358, "learning_rate": 9.775758783921973e-06, "loss": 0.4599, "step": 6318 }, { "epoch": 0.1861732687123433, "grad_norm": 1.6484036964021695, "learning_rate": 9.77560648370763e-06, "loss": 0.7299, "step": 6319 }, { "epoch": 0.18620273116980687, "grad_norm": 1.6323150061258536, "learning_rate": 9.775454132978378e-06, "loss": 0.4863, "step": 6320 }, { "epoch": 0.18623219362727045, "grad_norm": 1.5070108152479995, "learning_rate": 9.775301731735828e-06, "loss": 0.3624, "step": 6321 }, { "epoch": 0.18626165608473402, "grad_norm": 1.711366198184163, "learning_rate": 9.775149279981589e-06, "loss": 0.4718, "step": 6322 }, { "epoch": 0.1862911185421976, "grad_norm": 1.7042919248355346, "learning_rate": 9.774996777717278e-06, "loss": 0.4678, "step": 6323 }, { "epoch": 0.18632058099966117, "grad_norm": 1.645345441816525, "learning_rate": 9.774844224944506e-06, "loss": 0.6047, "step": 6324 }, { "epoch": 0.18635004345712475, "grad_norm": 1.6878741686572718, "learning_rate": 9.774691621664887e-06, "loss": 0.3294, "step": 6325 }, { "epoch": 0.18637950591458832, "grad_norm": 1.6584194242167691, "learning_rate": 9.774538967880036e-06, "loss": 0.4866, "step": 6326 }, { "epoch": 0.18640896837205193, "grad_norm": 1.6314977466688767, "learning_rate": 9.774386263591565e-06, "loss": 0.5356, "step": 6327 }, { "epoch": 0.1864384308295155, "grad_norm": 1.5114934225550156, "learning_rate": 9.774233508801093e-06, "loss": 0.4209, "step": 6328 }, { "epoch": 0.18646789328697908, "grad_norm": 1.794463117252513, "learning_rate": 9.774080703510233e-06, "loss": 0.4783, "step": 6329 }, { "epoch": 0.18649735574444265, "grad_norm": 1.7265266661154246, "learning_rate": 9.773927847720603e-06, "loss": 0.4167, "step": 6330 }, { "epoch": 0.18652681820190622, "grad_norm": 1.7722457089937227, "learning_rate": 9.773774941433817e-06, "loss": 0.6214, "step": 6331 }, { "epoch": 0.1865562806593698, "grad_norm": 1.918942188830298, "learning_rate": 9.773621984651496e-06, "loss": 0.7779, "step": 6332 }, { "epoch": 0.18658574311683337, "grad_norm": 1.754503994729996, "learning_rate": 9.773468977375258e-06, "loss": 0.511, "step": 6333 }, { "epoch": 0.18661520557429695, "grad_norm": 1.8056195018969081, "learning_rate": 9.773315919606718e-06, "loss": 0.4404, "step": 6334 }, { "epoch": 0.18664466803176052, "grad_norm": 1.611212365443912, "learning_rate": 9.773162811347498e-06, "loss": 0.6123, "step": 6335 }, { "epoch": 0.1866741304892241, "grad_norm": 1.6282763574881607, "learning_rate": 9.773009652599215e-06, "loss": 0.5884, "step": 6336 }, { "epoch": 0.18670359294668767, "grad_norm": 1.92995921261005, "learning_rate": 9.772856443363491e-06, "loss": 0.5078, "step": 6337 }, { "epoch": 0.18673305540415125, "grad_norm": 1.6997935014312564, "learning_rate": 9.772703183641947e-06, "loss": 0.5203, "step": 6338 }, { "epoch": 0.18676251786161482, "grad_norm": 1.7551697722543544, "learning_rate": 9.772549873436203e-06, "loss": 0.5988, "step": 6339 }, { "epoch": 0.18679198031907843, "grad_norm": 1.6737376910450834, "learning_rate": 9.77239651274788e-06, "loss": 0.4563, "step": 6340 }, { "epoch": 0.186821442776542, "grad_norm": 1.7583460203816823, "learning_rate": 9.772243101578602e-06, "loss": 0.5489, "step": 6341 }, { "epoch": 0.18685090523400558, "grad_norm": 1.5379582528407292, "learning_rate": 9.77208963992999e-06, "loss": 0.4761, "step": 6342 }, { "epoch": 0.18688036769146915, "grad_norm": 2.1213404672825624, "learning_rate": 9.771936127803669e-06, "loss": 0.5083, "step": 6343 }, { "epoch": 0.18690983014893273, "grad_norm": 1.694031048584376, "learning_rate": 9.771782565201261e-06, "loss": 0.5248, "step": 6344 }, { "epoch": 0.1869392926063963, "grad_norm": 1.8767012645091004, "learning_rate": 9.771628952124392e-06, "loss": 0.741, "step": 6345 }, { "epoch": 0.18696875506385988, "grad_norm": 1.5478362790486964, "learning_rate": 9.771475288574687e-06, "loss": 0.4827, "step": 6346 }, { "epoch": 0.18699821752132345, "grad_norm": 1.7637105118214473, "learning_rate": 9.77132157455377e-06, "loss": 0.5812, "step": 6347 }, { "epoch": 0.18702767997878703, "grad_norm": 1.646265482130673, "learning_rate": 9.771167810063266e-06, "loss": 0.6652, "step": 6348 }, { "epoch": 0.1870571424362506, "grad_norm": 1.4805337242222234, "learning_rate": 9.771013995104805e-06, "loss": 0.4763, "step": 6349 }, { "epoch": 0.18708660489371418, "grad_norm": 1.9979211598366988, "learning_rate": 9.770860129680011e-06, "loss": 0.6062, "step": 6350 }, { "epoch": 0.18711606735117775, "grad_norm": 1.6114549776061382, "learning_rate": 9.770706213790514e-06, "loss": 0.5121, "step": 6351 }, { "epoch": 0.18714552980864133, "grad_norm": 1.403550144358939, "learning_rate": 9.770552247437937e-06, "loss": 0.4765, "step": 6352 }, { "epoch": 0.18717499226610493, "grad_norm": 1.4017209845961567, "learning_rate": 9.770398230623915e-06, "loss": 0.3892, "step": 6353 }, { "epoch": 0.1872044547235685, "grad_norm": 1.496137243030983, "learning_rate": 9.770244163350074e-06, "loss": 0.3861, "step": 6354 }, { "epoch": 0.18723391718103208, "grad_norm": 1.6068560822185258, "learning_rate": 9.770090045618044e-06, "loss": 0.5834, "step": 6355 }, { "epoch": 0.18726337963849565, "grad_norm": 1.6264994102014187, "learning_rate": 9.769935877429455e-06, "loss": 0.5892, "step": 6356 }, { "epoch": 0.18729284209595923, "grad_norm": 1.7786282883517537, "learning_rate": 9.769781658785937e-06, "loss": 0.5205, "step": 6357 }, { "epoch": 0.1873223045534228, "grad_norm": 1.7566424490930643, "learning_rate": 9.769627389689123e-06, "loss": 0.7383, "step": 6358 }, { "epoch": 0.18735176701088638, "grad_norm": 1.6019767163091545, "learning_rate": 9.769473070140645e-06, "loss": 0.4162, "step": 6359 }, { "epoch": 0.18738122946834995, "grad_norm": 1.5273095341168457, "learning_rate": 9.769318700142134e-06, "loss": 0.5192, "step": 6360 }, { "epoch": 0.18741069192581353, "grad_norm": 1.8034160153709817, "learning_rate": 9.769164279695222e-06, "loss": 0.5081, "step": 6361 }, { "epoch": 0.1874401543832771, "grad_norm": 1.7256338118073673, "learning_rate": 9.769009808801546e-06, "loss": 0.4842, "step": 6362 }, { "epoch": 0.18746961684074068, "grad_norm": 1.4974976496802628, "learning_rate": 9.768855287462736e-06, "loss": 0.5142, "step": 6363 }, { "epoch": 0.18749907929820425, "grad_norm": 1.4135082689273912, "learning_rate": 9.768700715680428e-06, "loss": 0.4145, "step": 6364 }, { "epoch": 0.18752854175566783, "grad_norm": 1.5379013777981392, "learning_rate": 9.768546093456258e-06, "loss": 0.4844, "step": 6365 }, { "epoch": 0.18755800421313143, "grad_norm": 1.7354893937542273, "learning_rate": 9.76839142079186e-06, "loss": 0.5915, "step": 6366 }, { "epoch": 0.187587466670595, "grad_norm": 1.6235889741830907, "learning_rate": 9.768236697688871e-06, "loss": 0.4764, "step": 6367 }, { "epoch": 0.18761692912805858, "grad_norm": 1.4469164938391044, "learning_rate": 9.768081924148926e-06, "loss": 0.3943, "step": 6368 }, { "epoch": 0.18764639158552215, "grad_norm": 1.5630521044063728, "learning_rate": 9.767927100173666e-06, "loss": 0.5259, "step": 6369 }, { "epoch": 0.18767585404298573, "grad_norm": 1.505391840496138, "learning_rate": 9.767772225764724e-06, "loss": 0.443, "step": 6370 }, { "epoch": 0.1877053165004493, "grad_norm": 1.6737481042976483, "learning_rate": 9.767617300923743e-06, "loss": 0.573, "step": 6371 }, { "epoch": 0.18773477895791288, "grad_norm": 1.497888666794108, "learning_rate": 9.767462325652357e-06, "loss": 0.5291, "step": 6372 }, { "epoch": 0.18776424141537645, "grad_norm": 1.5803383938311886, "learning_rate": 9.767307299952208e-06, "loss": 0.4745, "step": 6373 }, { "epoch": 0.18779370387284003, "grad_norm": 1.5584600477415569, "learning_rate": 9.767152223824937e-06, "loss": 0.5149, "step": 6374 }, { "epoch": 0.1878231663303036, "grad_norm": 1.8409363026623091, "learning_rate": 9.76699709727218e-06, "loss": 0.6529, "step": 6375 }, { "epoch": 0.18785262878776718, "grad_norm": 1.5613522009247784, "learning_rate": 9.766841920295583e-06, "loss": 0.5395, "step": 6376 }, { "epoch": 0.18788209124523075, "grad_norm": 1.6649169058340554, "learning_rate": 9.766686692896782e-06, "loss": 0.4797, "step": 6377 }, { "epoch": 0.18791155370269433, "grad_norm": 1.5263997016040851, "learning_rate": 9.766531415077422e-06, "loss": 0.4784, "step": 6378 }, { "epoch": 0.18794101616015793, "grad_norm": 1.5087483261433177, "learning_rate": 9.766376086839149e-06, "loss": 0.449, "step": 6379 }, { "epoch": 0.1879704786176215, "grad_norm": 1.6015565688492608, "learning_rate": 9.7662207081836e-06, "loss": 0.4263, "step": 6380 }, { "epoch": 0.18799994107508508, "grad_norm": 1.5635095289781362, "learning_rate": 9.76606527911242e-06, "loss": 0.5129, "step": 6381 }, { "epoch": 0.18802940353254866, "grad_norm": 1.3822224012440718, "learning_rate": 9.765909799627256e-06, "loss": 0.4244, "step": 6382 }, { "epoch": 0.18805886599001223, "grad_norm": 1.405754394623296, "learning_rate": 9.765754269729747e-06, "loss": 0.36, "step": 6383 }, { "epoch": 0.1880883284474758, "grad_norm": 1.4686202964950588, "learning_rate": 9.765598689421545e-06, "loss": 0.5495, "step": 6384 }, { "epoch": 0.18811779090493938, "grad_norm": 1.6046554604027885, "learning_rate": 9.765443058704292e-06, "loss": 0.6378, "step": 6385 }, { "epoch": 0.18814725336240296, "grad_norm": 1.6941622427893417, "learning_rate": 9.765287377579635e-06, "loss": 0.6474, "step": 6386 }, { "epoch": 0.18817671581986653, "grad_norm": 1.5831761061282625, "learning_rate": 9.765131646049219e-06, "loss": 0.3493, "step": 6387 }, { "epoch": 0.1882061782773301, "grad_norm": 1.6553263612961193, "learning_rate": 9.764975864114694e-06, "loss": 0.4809, "step": 6388 }, { "epoch": 0.18823564073479368, "grad_norm": 1.699316650362435, "learning_rate": 9.764820031777705e-06, "loss": 0.5057, "step": 6389 }, { "epoch": 0.18826510319225725, "grad_norm": 1.6825442124435148, "learning_rate": 9.764664149039902e-06, "loss": 0.6549, "step": 6390 }, { "epoch": 0.18829456564972083, "grad_norm": 1.515444832389561, "learning_rate": 9.764508215902935e-06, "loss": 0.5032, "step": 6391 }, { "epoch": 0.18832402810718443, "grad_norm": 1.6471375531520098, "learning_rate": 9.764352232368452e-06, "loss": 0.5255, "step": 6392 }, { "epoch": 0.188353490564648, "grad_norm": 1.8030370501165338, "learning_rate": 9.764196198438103e-06, "loss": 0.5184, "step": 6393 }, { "epoch": 0.18838295302211158, "grad_norm": 1.7239922732498707, "learning_rate": 9.764040114113538e-06, "loss": 0.3954, "step": 6394 }, { "epoch": 0.18841241547957516, "grad_norm": 1.648354858895402, "learning_rate": 9.763883979396407e-06, "loss": 0.4904, "step": 6395 }, { "epoch": 0.18844187793703873, "grad_norm": 1.5290988199541662, "learning_rate": 9.763727794288366e-06, "loss": 0.5328, "step": 6396 }, { "epoch": 0.1884713403945023, "grad_norm": 1.527462179338811, "learning_rate": 9.763571558791062e-06, "loss": 0.5468, "step": 6397 }, { "epoch": 0.18850080285196588, "grad_norm": 1.6601730626463647, "learning_rate": 9.763415272906152e-06, "loss": 0.324, "step": 6398 }, { "epoch": 0.18853026530942946, "grad_norm": 1.7079006812176785, "learning_rate": 9.763258936635284e-06, "loss": 0.5389, "step": 6399 }, { "epoch": 0.18855972776689303, "grad_norm": 1.6340095686804978, "learning_rate": 9.763102549980117e-06, "loss": 0.5523, "step": 6400 }, { "epoch": 0.1885891902243566, "grad_norm": 1.6543726333782414, "learning_rate": 9.762946112942303e-06, "loss": 0.6017, "step": 6401 }, { "epoch": 0.18861865268182018, "grad_norm": 1.6512979962487317, "learning_rate": 9.762789625523495e-06, "loss": 0.4627, "step": 6402 }, { "epoch": 0.18864811513928376, "grad_norm": 1.9803802459807887, "learning_rate": 9.762633087725351e-06, "loss": 0.6269, "step": 6403 }, { "epoch": 0.18867757759674733, "grad_norm": 1.7191457422833112, "learning_rate": 9.762476499549525e-06, "loss": 0.4882, "step": 6404 }, { "epoch": 0.18870704005421093, "grad_norm": 1.5060067894737783, "learning_rate": 9.762319860997675e-06, "loss": 0.4319, "step": 6405 }, { "epoch": 0.1887365025116745, "grad_norm": 1.771371596627016, "learning_rate": 9.762163172071455e-06, "loss": 0.5553, "step": 6406 }, { "epoch": 0.18876596496913808, "grad_norm": 1.6109955196748005, "learning_rate": 9.762006432772527e-06, "loss": 0.5071, "step": 6407 }, { "epoch": 0.18879542742660166, "grad_norm": 1.9653008115613415, "learning_rate": 9.761849643102543e-06, "loss": 0.5419, "step": 6408 }, { "epoch": 0.18882488988406523, "grad_norm": 1.8061631589390053, "learning_rate": 9.761692803063167e-06, "loss": 0.4823, "step": 6409 }, { "epoch": 0.1888543523415288, "grad_norm": 1.48824535770901, "learning_rate": 9.761535912656054e-06, "loss": 0.446, "step": 6410 }, { "epoch": 0.18888381479899238, "grad_norm": 1.5873245051412692, "learning_rate": 9.761378971882867e-06, "loss": 0.4622, "step": 6411 }, { "epoch": 0.18891327725645596, "grad_norm": 1.5701735147206604, "learning_rate": 9.761221980745263e-06, "loss": 0.491, "step": 6412 }, { "epoch": 0.18894273971391953, "grad_norm": 1.61305135893666, "learning_rate": 9.761064939244905e-06, "loss": 0.4412, "step": 6413 }, { "epoch": 0.1889722021713831, "grad_norm": 1.6955901911893951, "learning_rate": 9.760907847383454e-06, "loss": 0.5193, "step": 6414 }, { "epoch": 0.18900166462884668, "grad_norm": 1.5675233681755631, "learning_rate": 9.760750705162567e-06, "loss": 0.6032, "step": 6415 }, { "epoch": 0.18903112708631026, "grad_norm": 1.6780480866535745, "learning_rate": 9.760593512583913e-06, "loss": 0.4913, "step": 6416 }, { "epoch": 0.18906058954377383, "grad_norm": 1.4785988337709643, "learning_rate": 9.760436269649152e-06, "loss": 0.4077, "step": 6417 }, { "epoch": 0.18909005200123744, "grad_norm": 1.7589733602159496, "learning_rate": 9.760278976359945e-06, "loss": 0.7243, "step": 6418 }, { "epoch": 0.189119514458701, "grad_norm": 1.6108958232463688, "learning_rate": 9.760121632717957e-06, "loss": 0.596, "step": 6419 }, { "epoch": 0.18914897691616459, "grad_norm": 1.5724949199667855, "learning_rate": 9.759964238724855e-06, "loss": 0.4443, "step": 6420 }, { "epoch": 0.18917843937362816, "grad_norm": 1.6788494141123027, "learning_rate": 9.759806794382303e-06, "loss": 0.4162, "step": 6421 }, { "epoch": 0.18920790183109173, "grad_norm": 1.896879782921572, "learning_rate": 9.759649299691962e-06, "loss": 0.6433, "step": 6422 }, { "epoch": 0.1892373642885553, "grad_norm": 1.596676768127979, "learning_rate": 9.759491754655504e-06, "loss": 0.516, "step": 6423 }, { "epoch": 0.18926682674601888, "grad_norm": 1.6492068794725143, "learning_rate": 9.759334159274591e-06, "loss": 0.6413, "step": 6424 }, { "epoch": 0.18929628920348246, "grad_norm": 1.7423692795014298, "learning_rate": 9.759176513550894e-06, "loss": 0.3547, "step": 6425 }, { "epoch": 0.18932575166094603, "grad_norm": 1.5991964075875655, "learning_rate": 9.759018817486077e-06, "loss": 0.6023, "step": 6426 }, { "epoch": 0.1893552141184096, "grad_norm": 1.6593691574659248, "learning_rate": 9.75886107108181e-06, "loss": 0.5644, "step": 6427 }, { "epoch": 0.18938467657587318, "grad_norm": 1.5579184925467378, "learning_rate": 9.758703274339759e-06, "loss": 0.5556, "step": 6428 }, { "epoch": 0.18941413903333676, "grad_norm": 1.4681627100524326, "learning_rate": 9.758545427261596e-06, "loss": 0.5494, "step": 6429 }, { "epoch": 0.18944360149080033, "grad_norm": 1.9235747919892332, "learning_rate": 9.75838752984899e-06, "loss": 0.4847, "step": 6430 }, { "epoch": 0.18947306394826394, "grad_norm": 1.7406222615044729, "learning_rate": 9.758229582103608e-06, "loss": 0.4755, "step": 6431 }, { "epoch": 0.1895025264057275, "grad_norm": 1.7959427295204216, "learning_rate": 9.758071584027128e-06, "loss": 0.5951, "step": 6432 }, { "epoch": 0.1895319888631911, "grad_norm": 1.5676829309363225, "learning_rate": 9.757913535621216e-06, "loss": 0.5583, "step": 6433 }, { "epoch": 0.18956145132065466, "grad_norm": 1.4119010879401197, "learning_rate": 9.757755436887541e-06, "loss": 0.3922, "step": 6434 }, { "epoch": 0.18959091377811824, "grad_norm": 1.524035811443321, "learning_rate": 9.757597287827782e-06, "loss": 0.499, "step": 6435 }, { "epoch": 0.1896203762355818, "grad_norm": 1.5315324566998225, "learning_rate": 9.75743908844361e-06, "loss": 0.4676, "step": 6436 }, { "epoch": 0.18964983869304539, "grad_norm": 1.5336342335797497, "learning_rate": 9.757280838736694e-06, "loss": 0.4288, "step": 6437 }, { "epoch": 0.18967930115050896, "grad_norm": 1.7609381161711162, "learning_rate": 9.757122538708715e-06, "loss": 0.4812, "step": 6438 }, { "epoch": 0.18970876360797254, "grad_norm": 1.4723723411232337, "learning_rate": 9.75696418836134e-06, "loss": 0.4705, "step": 6439 }, { "epoch": 0.1897382260654361, "grad_norm": 1.7620474514751445, "learning_rate": 9.756805787696248e-06, "loss": 0.544, "step": 6440 }, { "epoch": 0.18976768852289969, "grad_norm": 1.3608008232405666, "learning_rate": 9.756647336715115e-06, "loss": 0.4143, "step": 6441 }, { "epoch": 0.18979715098036326, "grad_norm": 1.739087680623689, "learning_rate": 9.756488835419617e-06, "loss": 0.593, "step": 6442 }, { "epoch": 0.18982661343782684, "grad_norm": 1.721090571583614, "learning_rate": 9.756330283811429e-06, "loss": 0.5855, "step": 6443 }, { "epoch": 0.18985607589529044, "grad_norm": 1.5294763436557814, "learning_rate": 9.756171681892228e-06, "loss": 0.5205, "step": 6444 }, { "epoch": 0.189885538352754, "grad_norm": 1.5717603732144767, "learning_rate": 9.756013029663694e-06, "loss": 0.5889, "step": 6445 }, { "epoch": 0.1899150008102176, "grad_norm": 1.7253434113895347, "learning_rate": 9.7558543271275e-06, "loss": 0.4472, "step": 6446 }, { "epoch": 0.18994446326768116, "grad_norm": 1.6949117745972917, "learning_rate": 9.755695574285331e-06, "loss": 0.5959, "step": 6447 }, { "epoch": 0.18997392572514474, "grad_norm": 1.5808879876061412, "learning_rate": 9.755536771138865e-06, "loss": 0.4506, "step": 6448 }, { "epoch": 0.1900033881826083, "grad_norm": 1.5984974028405983, "learning_rate": 9.75537791768978e-06, "loss": 0.4935, "step": 6449 }, { "epoch": 0.1900328506400719, "grad_norm": 1.6471941943924107, "learning_rate": 9.755219013939755e-06, "loss": 0.5305, "step": 6450 }, { "epoch": 0.19006231309753546, "grad_norm": 1.5301894248945078, "learning_rate": 9.75506005989047e-06, "loss": 0.4342, "step": 6451 }, { "epoch": 0.19009177555499904, "grad_norm": 1.938577922229204, "learning_rate": 9.754901055543613e-06, "loss": 0.6405, "step": 6452 }, { "epoch": 0.1901212380124626, "grad_norm": 2.008020879924788, "learning_rate": 9.75474200090086e-06, "loss": 0.5475, "step": 6453 }, { "epoch": 0.1901507004699262, "grad_norm": 1.5141373450414022, "learning_rate": 9.754582895963896e-06, "loss": 0.46, "step": 6454 }, { "epoch": 0.19018016292738976, "grad_norm": 1.6932051632421838, "learning_rate": 9.754423740734404e-06, "loss": 0.6149, "step": 6455 }, { "epoch": 0.19020962538485334, "grad_norm": 1.4544451845404995, "learning_rate": 9.754264535214064e-06, "loss": 0.5269, "step": 6456 }, { "epoch": 0.19023908784231694, "grad_norm": 1.7209629861715072, "learning_rate": 9.754105279404564e-06, "loss": 0.5626, "step": 6457 }, { "epoch": 0.19026855029978051, "grad_norm": 1.506893209804969, "learning_rate": 9.753945973307589e-06, "loss": 0.4957, "step": 6458 }, { "epoch": 0.1902980127572441, "grad_norm": 1.7124551897350286, "learning_rate": 9.753786616924819e-06, "loss": 0.6121, "step": 6459 }, { "epoch": 0.19032747521470766, "grad_norm": 1.6633469418941824, "learning_rate": 9.753627210257945e-06, "loss": 0.521, "step": 6460 }, { "epoch": 0.19035693767217124, "grad_norm": 1.6685695397422216, "learning_rate": 9.75346775330865e-06, "loss": 0.5133, "step": 6461 }, { "epoch": 0.19038640012963481, "grad_norm": 1.6176929088974137, "learning_rate": 9.753308246078623e-06, "loss": 0.4122, "step": 6462 }, { "epoch": 0.1904158625870984, "grad_norm": 1.4378140546944957, "learning_rate": 9.753148688569549e-06, "loss": 0.4154, "step": 6463 }, { "epoch": 0.19044532504456196, "grad_norm": 1.6358288480452283, "learning_rate": 9.752989080783117e-06, "loss": 0.4604, "step": 6464 }, { "epoch": 0.19047478750202554, "grad_norm": 1.680590196503885, "learning_rate": 9.752829422721015e-06, "loss": 0.5915, "step": 6465 }, { "epoch": 0.1905042499594891, "grad_norm": 1.4713646247752528, "learning_rate": 9.752669714384932e-06, "loss": 0.4753, "step": 6466 }, { "epoch": 0.1905337124169527, "grad_norm": 1.597737696767592, "learning_rate": 9.752509955776556e-06, "loss": 0.415, "step": 6467 }, { "epoch": 0.19056317487441626, "grad_norm": 1.7010314853330082, "learning_rate": 9.75235014689758e-06, "loss": 0.7008, "step": 6468 }, { "epoch": 0.19059263733187984, "grad_norm": 1.583781286635615, "learning_rate": 9.752190287749692e-06, "loss": 0.4338, "step": 6469 }, { "epoch": 0.19062209978934344, "grad_norm": 1.9850266017667688, "learning_rate": 9.752030378334582e-06, "loss": 0.5893, "step": 6470 }, { "epoch": 0.19065156224680702, "grad_norm": 1.54367619190646, "learning_rate": 9.751870418653944e-06, "loss": 0.663, "step": 6471 }, { "epoch": 0.1906810247042706, "grad_norm": 1.4525997049650607, "learning_rate": 9.75171040870947e-06, "loss": 0.4307, "step": 6472 }, { "epoch": 0.19071048716173417, "grad_norm": 1.7736075167448433, "learning_rate": 9.75155034850285e-06, "loss": 0.5732, "step": 6473 }, { "epoch": 0.19073994961919774, "grad_norm": 1.6917043801460445, "learning_rate": 9.751390238035778e-06, "loss": 0.586, "step": 6474 }, { "epoch": 0.19076941207666132, "grad_norm": 1.6093462290094163, "learning_rate": 9.75123007730995e-06, "loss": 0.4622, "step": 6475 }, { "epoch": 0.1907988745341249, "grad_norm": 1.5001951034233447, "learning_rate": 9.751069866327056e-06, "loss": 0.5069, "step": 6476 }, { "epoch": 0.19082833699158847, "grad_norm": 1.7008157029170388, "learning_rate": 9.750909605088794e-06, "loss": 0.5607, "step": 6477 }, { "epoch": 0.19085779944905204, "grad_norm": 1.7307607827621052, "learning_rate": 9.75074929359686e-06, "loss": 0.511, "step": 6478 }, { "epoch": 0.19088726190651562, "grad_norm": 1.5941233605504974, "learning_rate": 9.750588931852944e-06, "loss": 0.5117, "step": 6479 }, { "epoch": 0.1909167243639792, "grad_norm": 1.6394993181429065, "learning_rate": 9.750428519858749e-06, "loss": 0.4953, "step": 6480 }, { "epoch": 0.19094618682144276, "grad_norm": 1.8431500695275826, "learning_rate": 9.750268057615968e-06, "loss": 0.4546, "step": 6481 }, { "epoch": 0.19097564927890634, "grad_norm": 1.7599917588613665, "learning_rate": 9.750107545126297e-06, "loss": 0.4896, "step": 6482 }, { "epoch": 0.19100511173636994, "grad_norm": 1.490688812717949, "learning_rate": 9.74994698239144e-06, "loss": 0.5282, "step": 6483 }, { "epoch": 0.19103457419383352, "grad_norm": 1.5899386415261805, "learning_rate": 9.749786369413089e-06, "loss": 0.5537, "step": 6484 }, { "epoch": 0.1910640366512971, "grad_norm": 1.7596831309906793, "learning_rate": 9.749625706192946e-06, "loss": 0.4917, "step": 6485 }, { "epoch": 0.19109349910876067, "grad_norm": 1.5178163366724742, "learning_rate": 9.74946499273271e-06, "loss": 0.5497, "step": 6486 }, { "epoch": 0.19112296156622424, "grad_norm": 1.6445966830466354, "learning_rate": 9.74930422903408e-06, "loss": 0.3713, "step": 6487 }, { "epoch": 0.19115242402368782, "grad_norm": 1.716513165367898, "learning_rate": 9.749143415098756e-06, "loss": 0.5482, "step": 6488 }, { "epoch": 0.1911818864811514, "grad_norm": 1.6333169587540826, "learning_rate": 9.748982550928444e-06, "loss": 0.5484, "step": 6489 }, { "epoch": 0.19121134893861497, "grad_norm": 1.8217276739071036, "learning_rate": 9.74882163652484e-06, "loss": 0.5578, "step": 6490 }, { "epoch": 0.19124081139607854, "grad_norm": 1.7282799092910208, "learning_rate": 9.748660671889645e-06, "loss": 0.6417, "step": 6491 }, { "epoch": 0.19127027385354212, "grad_norm": 1.6916761832497855, "learning_rate": 9.748499657024569e-06, "loss": 0.5629, "step": 6492 }, { "epoch": 0.1912997363110057, "grad_norm": 1.6939575426443017, "learning_rate": 9.748338591931308e-06, "loss": 0.6238, "step": 6493 }, { "epoch": 0.19132919876846927, "grad_norm": 1.6966967739794352, "learning_rate": 9.74817747661157e-06, "loss": 0.4737, "step": 6494 }, { "epoch": 0.19135866122593284, "grad_norm": 1.6302363492325749, "learning_rate": 9.748016311067057e-06, "loss": 0.5402, "step": 6495 }, { "epoch": 0.19138812368339644, "grad_norm": 1.81654093060296, "learning_rate": 9.747855095299474e-06, "loss": 0.6087, "step": 6496 }, { "epoch": 0.19141758614086002, "grad_norm": 1.655699846768678, "learning_rate": 9.747693829310528e-06, "loss": 0.3883, "step": 6497 }, { "epoch": 0.1914470485983236, "grad_norm": 1.6711339466166555, "learning_rate": 9.747532513101923e-06, "loss": 0.537, "step": 6498 }, { "epoch": 0.19147651105578717, "grad_norm": 1.6908564584788592, "learning_rate": 9.747371146675366e-06, "loss": 0.5618, "step": 6499 }, { "epoch": 0.19150597351325074, "grad_norm": 1.753477037511017, "learning_rate": 9.747209730032564e-06, "loss": 0.5095, "step": 6500 }, { "epoch": 0.19153543597071432, "grad_norm": 1.502997517915703, "learning_rate": 9.747048263175225e-06, "loss": 0.4541, "step": 6501 }, { "epoch": 0.1915648984281779, "grad_norm": 1.5747914403840115, "learning_rate": 9.746886746105055e-06, "loss": 0.4923, "step": 6502 }, { "epoch": 0.19159436088564147, "grad_norm": 1.6225234307817502, "learning_rate": 9.746725178823764e-06, "loss": 0.5845, "step": 6503 }, { "epoch": 0.19162382334310504, "grad_norm": 1.5341814959674007, "learning_rate": 9.74656356133306e-06, "loss": 0.3766, "step": 6504 }, { "epoch": 0.19165328580056862, "grad_norm": 1.5364401812266943, "learning_rate": 9.746401893634653e-06, "loss": 0.4043, "step": 6505 }, { "epoch": 0.1916827482580322, "grad_norm": 1.5782451564765683, "learning_rate": 9.746240175730256e-06, "loss": 0.4352, "step": 6506 }, { "epoch": 0.19171221071549577, "grad_norm": 1.8326087202158818, "learning_rate": 9.746078407621573e-06, "loss": 0.4747, "step": 6507 }, { "epoch": 0.19174167317295937, "grad_norm": 1.5106744061272552, "learning_rate": 9.745916589310323e-06, "loss": 0.5131, "step": 6508 }, { "epoch": 0.19177113563042295, "grad_norm": 1.7155299339735193, "learning_rate": 9.74575472079821e-06, "loss": 0.555, "step": 6509 }, { "epoch": 0.19180059808788652, "grad_norm": 1.6761720961524194, "learning_rate": 9.745592802086952e-06, "loss": 0.6116, "step": 6510 }, { "epoch": 0.1918300605453501, "grad_norm": 1.7842828220248612, "learning_rate": 9.74543083317826e-06, "loss": 0.6594, "step": 6511 }, { "epoch": 0.19185952300281367, "grad_norm": 1.5697549803286284, "learning_rate": 9.745268814073845e-06, "loss": 0.4289, "step": 6512 }, { "epoch": 0.19188898546027724, "grad_norm": 1.602317921274868, "learning_rate": 9.745106744775424e-06, "loss": 0.6127, "step": 6513 }, { "epoch": 0.19191844791774082, "grad_norm": 1.6851080126369538, "learning_rate": 9.74494462528471e-06, "loss": 0.4132, "step": 6514 }, { "epoch": 0.1919479103752044, "grad_norm": 1.829193266243941, "learning_rate": 9.744782455603416e-06, "loss": 0.6083, "step": 6515 }, { "epoch": 0.19197737283266797, "grad_norm": 1.6798903794872646, "learning_rate": 9.744620235733262e-06, "loss": 0.6666, "step": 6516 }, { "epoch": 0.19200683529013154, "grad_norm": 1.7809648145957775, "learning_rate": 9.74445796567596e-06, "loss": 0.4691, "step": 6517 }, { "epoch": 0.19203629774759512, "grad_norm": 1.603510149426777, "learning_rate": 9.744295645433226e-06, "loss": 0.5714, "step": 6518 }, { "epoch": 0.1920657602050587, "grad_norm": 1.920547080840905, "learning_rate": 9.744133275006779e-06, "loss": 0.6381, "step": 6519 }, { "epoch": 0.19209522266252227, "grad_norm": 1.555089218061633, "learning_rate": 9.743970854398339e-06, "loss": 0.5368, "step": 6520 }, { "epoch": 0.19212468511998587, "grad_norm": 1.6265294043752265, "learning_rate": 9.74380838360962e-06, "loss": 0.3558, "step": 6521 }, { "epoch": 0.19215414757744945, "grad_norm": 1.728795716831799, "learning_rate": 9.743645862642339e-06, "loss": 0.542, "step": 6522 }, { "epoch": 0.19218361003491302, "grad_norm": 1.6501925151949963, "learning_rate": 9.74348329149822e-06, "loss": 0.5702, "step": 6523 }, { "epoch": 0.1922130724923766, "grad_norm": 1.4577016435373824, "learning_rate": 9.74332067017898e-06, "loss": 0.5171, "step": 6524 }, { "epoch": 0.19224253494984017, "grad_norm": 1.6231523357337958, "learning_rate": 9.743157998686339e-06, "loss": 0.5087, "step": 6525 }, { "epoch": 0.19227199740730375, "grad_norm": 1.6295728760099473, "learning_rate": 9.742995277022018e-06, "loss": 0.4567, "step": 6526 }, { "epoch": 0.19230145986476732, "grad_norm": 1.5891546797172778, "learning_rate": 9.742832505187738e-06, "loss": 0.541, "step": 6527 }, { "epoch": 0.1923309223222309, "grad_norm": 1.7167536242120247, "learning_rate": 9.742669683185223e-06, "loss": 0.4528, "step": 6528 }, { "epoch": 0.19236038477969447, "grad_norm": 1.6753846839563078, "learning_rate": 9.742506811016192e-06, "loss": 0.5377, "step": 6529 }, { "epoch": 0.19238984723715805, "grad_norm": 1.5845140105378193, "learning_rate": 9.742343888682371e-06, "loss": 0.4606, "step": 6530 }, { "epoch": 0.19241930969462162, "grad_norm": 1.4477601252634733, "learning_rate": 9.74218091618548e-06, "loss": 0.5731, "step": 6531 }, { "epoch": 0.1924487721520852, "grad_norm": 1.8492258305268323, "learning_rate": 9.742017893527245e-06, "loss": 0.5559, "step": 6532 }, { "epoch": 0.19247823460954877, "grad_norm": 1.5731464417513903, "learning_rate": 9.741854820709389e-06, "loss": 0.4646, "step": 6533 }, { "epoch": 0.19250769706701237, "grad_norm": 1.5430326308004478, "learning_rate": 9.741691697733638e-06, "loss": 0.4135, "step": 6534 }, { "epoch": 0.19253715952447595, "grad_norm": 1.6623127151362491, "learning_rate": 9.741528524601719e-06, "loss": 0.599, "step": 6535 }, { "epoch": 0.19256662198193952, "grad_norm": 1.5782245487540854, "learning_rate": 9.741365301315356e-06, "loss": 0.5268, "step": 6536 }, { "epoch": 0.1925960844394031, "grad_norm": 1.4484789136627065, "learning_rate": 9.741202027876272e-06, "loss": 0.4491, "step": 6537 }, { "epoch": 0.19262554689686667, "grad_norm": 1.2968437542159306, "learning_rate": 9.741038704286202e-06, "loss": 0.3396, "step": 6538 }, { "epoch": 0.19265500935433025, "grad_norm": 1.6905485711769646, "learning_rate": 9.74087533054687e-06, "loss": 0.5968, "step": 6539 }, { "epoch": 0.19268447181179382, "grad_norm": 1.5489851887711852, "learning_rate": 9.74071190666e-06, "loss": 0.5745, "step": 6540 }, { "epoch": 0.1927139342692574, "grad_norm": 1.5117326082829925, "learning_rate": 9.740548432627328e-06, "loss": 0.4779, "step": 6541 }, { "epoch": 0.19274339672672097, "grad_norm": 1.52259432153111, "learning_rate": 9.740384908450578e-06, "loss": 0.3743, "step": 6542 }, { "epoch": 0.19277285918418455, "grad_norm": 1.4086229283374423, "learning_rate": 9.74022133413148e-06, "loss": 0.3365, "step": 6543 }, { "epoch": 0.19280232164164812, "grad_norm": 1.7932700835321826, "learning_rate": 9.740057709671767e-06, "loss": 0.5816, "step": 6544 }, { "epoch": 0.1928317840991117, "grad_norm": 1.6290669399846682, "learning_rate": 9.739894035073166e-06, "loss": 0.5462, "step": 6545 }, { "epoch": 0.19286124655657527, "grad_norm": 1.7734320035083795, "learning_rate": 9.739730310337411e-06, "loss": 0.5399, "step": 6546 }, { "epoch": 0.19289070901403887, "grad_norm": 1.4297003098237262, "learning_rate": 9.739566535466234e-06, "loss": 0.4782, "step": 6547 }, { "epoch": 0.19292017147150245, "grad_norm": 1.4830729857831582, "learning_rate": 9.739402710461367e-06, "loss": 0.444, "step": 6548 }, { "epoch": 0.19294963392896602, "grad_norm": 1.7792852583987904, "learning_rate": 9.739238835324543e-06, "loss": 0.6347, "step": 6549 }, { "epoch": 0.1929790963864296, "grad_norm": 1.6303597104553145, "learning_rate": 9.739074910057493e-06, "loss": 0.4948, "step": 6550 }, { "epoch": 0.19300855884389317, "grad_norm": 1.8127353000394508, "learning_rate": 9.738910934661953e-06, "loss": 0.6613, "step": 6551 }, { "epoch": 0.19303802130135675, "grad_norm": 1.5869617740333, "learning_rate": 9.73874690913966e-06, "loss": 0.5532, "step": 6552 }, { "epoch": 0.19306748375882032, "grad_norm": 1.6016942664467064, "learning_rate": 9.738582833492344e-06, "loss": 0.5581, "step": 6553 }, { "epoch": 0.1930969462162839, "grad_norm": 1.7047903219423537, "learning_rate": 9.738418707721744e-06, "loss": 0.4624, "step": 6554 }, { "epoch": 0.19312640867374747, "grad_norm": 1.8717131241203182, "learning_rate": 9.738254531829594e-06, "loss": 0.5751, "step": 6555 }, { "epoch": 0.19315587113121105, "grad_norm": 1.4806403267784882, "learning_rate": 9.738090305817632e-06, "loss": 0.3478, "step": 6556 }, { "epoch": 0.19318533358867462, "grad_norm": 1.6895221067596786, "learning_rate": 9.737926029687595e-06, "loss": 0.5257, "step": 6557 }, { "epoch": 0.1932147960461382, "grad_norm": 1.6955440953570895, "learning_rate": 9.73776170344122e-06, "loss": 0.5751, "step": 6558 }, { "epoch": 0.19324425850360177, "grad_norm": 1.823070931052678, "learning_rate": 9.737597327080246e-06, "loss": 0.5457, "step": 6559 }, { "epoch": 0.19327372096106538, "grad_norm": 1.5513956563914852, "learning_rate": 9.737432900606411e-06, "loss": 0.4718, "step": 6560 }, { "epoch": 0.19330318341852895, "grad_norm": 1.600877110943095, "learning_rate": 9.737268424021456e-06, "loss": 0.512, "step": 6561 }, { "epoch": 0.19333264587599253, "grad_norm": 1.8699556338441388, "learning_rate": 9.737103897327116e-06, "loss": 0.5449, "step": 6562 }, { "epoch": 0.1933621083334561, "grad_norm": 1.6170176938579666, "learning_rate": 9.736939320525138e-06, "loss": 0.6294, "step": 6563 }, { "epoch": 0.19339157079091968, "grad_norm": 1.4762860663602924, "learning_rate": 9.736774693617258e-06, "loss": 0.4878, "step": 6564 }, { "epoch": 0.19342103324838325, "grad_norm": 1.6600201276206503, "learning_rate": 9.736610016605219e-06, "loss": 0.5263, "step": 6565 }, { "epoch": 0.19345049570584683, "grad_norm": 1.7462402776268047, "learning_rate": 9.736445289490762e-06, "loss": 0.4754, "step": 6566 }, { "epoch": 0.1934799581633104, "grad_norm": 1.4758417077518653, "learning_rate": 9.73628051227563e-06, "loss": 0.4998, "step": 6567 }, { "epoch": 0.19350942062077398, "grad_norm": 1.653525987449944, "learning_rate": 9.736115684961569e-06, "loss": 0.4764, "step": 6568 }, { "epoch": 0.19353888307823755, "grad_norm": 1.4850738853234666, "learning_rate": 9.735950807550317e-06, "loss": 0.4301, "step": 6569 }, { "epoch": 0.19356834553570113, "grad_norm": 1.822708549303704, "learning_rate": 9.735785880043622e-06, "loss": 0.4963, "step": 6570 }, { "epoch": 0.1935978079931647, "grad_norm": 1.763070292820383, "learning_rate": 9.735620902443225e-06, "loss": 0.5402, "step": 6571 }, { "epoch": 0.19362727045062827, "grad_norm": 1.5395167158165828, "learning_rate": 9.735455874750876e-06, "loss": 0.4213, "step": 6572 }, { "epoch": 0.19365673290809188, "grad_norm": 1.5916011492515723, "learning_rate": 9.735290796968317e-06, "loss": 0.4346, "step": 6573 }, { "epoch": 0.19368619536555545, "grad_norm": 1.3899736002832663, "learning_rate": 9.735125669097296e-06, "loss": 0.4462, "step": 6574 }, { "epoch": 0.19371565782301903, "grad_norm": 1.7692151426617644, "learning_rate": 9.734960491139557e-06, "loss": 0.6415, "step": 6575 }, { "epoch": 0.1937451202804826, "grad_norm": 1.5964817159376976, "learning_rate": 9.73479526309685e-06, "loss": 0.4871, "step": 6576 }, { "epoch": 0.19377458273794618, "grad_norm": 2.0108046264866606, "learning_rate": 9.734629984970922e-06, "loss": 0.5483, "step": 6577 }, { "epoch": 0.19380404519540975, "grad_norm": 1.6115315719655459, "learning_rate": 9.73446465676352e-06, "loss": 0.5355, "step": 6578 }, { "epoch": 0.19383350765287333, "grad_norm": 1.7708204763919484, "learning_rate": 9.734299278476393e-06, "loss": 0.5742, "step": 6579 }, { "epoch": 0.1938629701103369, "grad_norm": 1.539174581014644, "learning_rate": 9.734133850111293e-06, "loss": 0.4819, "step": 6580 }, { "epoch": 0.19389243256780048, "grad_norm": 1.9314304964635591, "learning_rate": 9.733968371669968e-06, "loss": 0.5657, "step": 6581 }, { "epoch": 0.19392189502526405, "grad_norm": 1.6393292406832465, "learning_rate": 9.733802843154167e-06, "loss": 0.3915, "step": 6582 }, { "epoch": 0.19395135748272763, "grad_norm": 1.6236256119223895, "learning_rate": 9.733637264565643e-06, "loss": 0.3851, "step": 6583 }, { "epoch": 0.1939808199401912, "grad_norm": 1.724529304815544, "learning_rate": 9.733471635906145e-06, "loss": 0.5279, "step": 6584 }, { "epoch": 0.19401028239765478, "grad_norm": 1.6155359167300476, "learning_rate": 9.733305957177428e-06, "loss": 0.5448, "step": 6585 }, { "epoch": 0.19403974485511838, "grad_norm": 1.547915557512825, "learning_rate": 9.733140228381244e-06, "loss": 0.4989, "step": 6586 }, { "epoch": 0.19406920731258195, "grad_norm": 1.6187325077958359, "learning_rate": 9.732974449519344e-06, "loss": 0.5379, "step": 6587 }, { "epoch": 0.19409866977004553, "grad_norm": 1.6324454123745389, "learning_rate": 9.732808620593483e-06, "loss": 0.6835, "step": 6588 }, { "epoch": 0.1941281322275091, "grad_norm": 1.6513279495717146, "learning_rate": 9.732642741605413e-06, "loss": 0.5608, "step": 6589 }, { "epoch": 0.19415759468497268, "grad_norm": 1.9762521026201092, "learning_rate": 9.732476812556892e-06, "loss": 0.6512, "step": 6590 }, { "epoch": 0.19418705714243625, "grad_norm": 1.744146678400455, "learning_rate": 9.732310833449673e-06, "loss": 0.5021, "step": 6591 }, { "epoch": 0.19421651959989983, "grad_norm": 1.8957173813704735, "learning_rate": 9.732144804285513e-06, "loss": 0.7056, "step": 6592 }, { "epoch": 0.1942459820573634, "grad_norm": 1.7464188633663347, "learning_rate": 9.731978725066168e-06, "loss": 0.6804, "step": 6593 }, { "epoch": 0.19427544451482698, "grad_norm": 1.602082609069408, "learning_rate": 9.731812595793394e-06, "loss": 0.5034, "step": 6594 }, { "epoch": 0.19430490697229055, "grad_norm": 1.7237953100660783, "learning_rate": 9.731646416468947e-06, "loss": 0.4426, "step": 6595 }, { "epoch": 0.19433436942975413, "grad_norm": 1.5735940839463216, "learning_rate": 9.731480187094586e-06, "loss": 0.4874, "step": 6596 }, { "epoch": 0.1943638318872177, "grad_norm": 1.6395586833179971, "learning_rate": 9.731313907672072e-06, "loss": 0.4837, "step": 6597 }, { "epoch": 0.19439329434468128, "grad_norm": 1.9598837206469382, "learning_rate": 9.73114757820316e-06, "loss": 0.5099, "step": 6598 }, { "epoch": 0.19442275680214488, "grad_norm": 1.687377053611067, "learning_rate": 9.730981198689611e-06, "loss": 0.7628, "step": 6599 }, { "epoch": 0.19445221925960846, "grad_norm": 1.5416931155102662, "learning_rate": 9.730814769133184e-06, "loss": 0.5325, "step": 6600 }, { "epoch": 0.19448168171707203, "grad_norm": 1.5485816868199773, "learning_rate": 9.73064828953564e-06, "loss": 0.4756, "step": 6601 }, { "epoch": 0.1945111441745356, "grad_norm": 1.657534193470153, "learning_rate": 9.73048175989874e-06, "loss": 0.514, "step": 6602 }, { "epoch": 0.19454060663199918, "grad_norm": 1.5638369714324403, "learning_rate": 9.730315180224247e-06, "loss": 0.5325, "step": 6603 }, { "epoch": 0.19457006908946275, "grad_norm": 1.9166128087698622, "learning_rate": 9.73014855051392e-06, "loss": 0.4808, "step": 6604 }, { "epoch": 0.19459953154692633, "grad_norm": 1.5606122483684801, "learning_rate": 9.729981870769524e-06, "loss": 0.5064, "step": 6605 }, { "epoch": 0.1946289940043899, "grad_norm": 1.7045734182951535, "learning_rate": 9.729815140992822e-06, "loss": 0.3955, "step": 6606 }, { "epoch": 0.19465845646185348, "grad_norm": 1.5346468029793885, "learning_rate": 9.729648361185576e-06, "loss": 0.5409, "step": 6607 }, { "epoch": 0.19468791891931705, "grad_norm": 1.5411340346358087, "learning_rate": 9.729481531349552e-06, "loss": 0.4847, "step": 6608 }, { "epoch": 0.19471738137678063, "grad_norm": 1.681824268270203, "learning_rate": 9.729314651486513e-06, "loss": 0.5871, "step": 6609 }, { "epoch": 0.1947468438342442, "grad_norm": 1.6931992356109662, "learning_rate": 9.729147721598225e-06, "loss": 0.5999, "step": 6610 }, { "epoch": 0.19477630629170778, "grad_norm": 1.6923625785983354, "learning_rate": 9.728980741686453e-06, "loss": 0.6031, "step": 6611 }, { "epoch": 0.19480576874917138, "grad_norm": 1.5632948302513365, "learning_rate": 9.728813711752964e-06, "loss": 0.5268, "step": 6612 }, { "epoch": 0.19483523120663496, "grad_norm": 1.5579797514613933, "learning_rate": 9.728646631799525e-06, "loss": 0.5102, "step": 6613 }, { "epoch": 0.19486469366409853, "grad_norm": 1.5831184880350428, "learning_rate": 9.728479501827903e-06, "loss": 0.5831, "step": 6614 }, { "epoch": 0.1948941561215621, "grad_norm": 1.5821039968576627, "learning_rate": 9.728312321839866e-06, "loss": 0.5855, "step": 6615 }, { "epoch": 0.19492361857902568, "grad_norm": 1.58659202723747, "learning_rate": 9.728145091837183e-06, "loss": 0.4799, "step": 6616 }, { "epoch": 0.19495308103648926, "grad_norm": 1.5924784100326201, "learning_rate": 9.727977811821621e-06, "loss": 0.3908, "step": 6617 }, { "epoch": 0.19498254349395283, "grad_norm": 1.7710797767892137, "learning_rate": 9.727810481794952e-06, "loss": 0.5845, "step": 6618 }, { "epoch": 0.1950120059514164, "grad_norm": 1.8501195890982518, "learning_rate": 9.727643101758944e-06, "loss": 0.6797, "step": 6619 }, { "epoch": 0.19504146840887998, "grad_norm": 1.7517172707942903, "learning_rate": 9.727475671715367e-06, "loss": 0.5303, "step": 6620 }, { "epoch": 0.19507093086634356, "grad_norm": 1.5825252127122893, "learning_rate": 9.727308191665995e-06, "loss": 0.4344, "step": 6621 }, { "epoch": 0.19510039332380713, "grad_norm": 1.525952162541973, "learning_rate": 9.727140661612595e-06, "loss": 0.4861, "step": 6622 }, { "epoch": 0.1951298557812707, "grad_norm": 1.9378287514046433, "learning_rate": 9.726973081556945e-06, "loss": 0.6572, "step": 6623 }, { "epoch": 0.19515931823873428, "grad_norm": 1.6539775481224468, "learning_rate": 9.726805451500811e-06, "loss": 0.5935, "step": 6624 }, { "epoch": 0.19518878069619788, "grad_norm": 1.5130016675685554, "learning_rate": 9.726637771445972e-06, "loss": 0.5418, "step": 6625 }, { "epoch": 0.19521824315366146, "grad_norm": 1.7556524217988612, "learning_rate": 9.726470041394199e-06, "loss": 0.5427, "step": 6626 }, { "epoch": 0.19524770561112503, "grad_norm": 1.4211249499289784, "learning_rate": 9.726302261347265e-06, "loss": 0.4617, "step": 6627 }, { "epoch": 0.1952771680685886, "grad_norm": 1.9135707281256362, "learning_rate": 9.726134431306947e-06, "loss": 0.4841, "step": 6628 }, { "epoch": 0.19530663052605218, "grad_norm": 1.578468531053877, "learning_rate": 9.725966551275017e-06, "loss": 0.4851, "step": 6629 }, { "epoch": 0.19533609298351576, "grad_norm": 1.6696577146408278, "learning_rate": 9.725798621253254e-06, "loss": 0.5835, "step": 6630 }, { "epoch": 0.19536555544097933, "grad_norm": 1.6406940861991748, "learning_rate": 9.725630641243436e-06, "loss": 0.5995, "step": 6631 }, { "epoch": 0.1953950178984429, "grad_norm": 1.667086953130881, "learning_rate": 9.725462611247336e-06, "loss": 0.6001, "step": 6632 }, { "epoch": 0.19542448035590648, "grad_norm": 1.5246967273455312, "learning_rate": 9.725294531266732e-06, "loss": 0.5335, "step": 6633 }, { "epoch": 0.19545394281337006, "grad_norm": 1.8040960589184725, "learning_rate": 9.725126401303403e-06, "loss": 0.5223, "step": 6634 }, { "epoch": 0.19548340527083363, "grad_norm": 1.5077203635801022, "learning_rate": 9.724958221359126e-06, "loss": 0.4908, "step": 6635 }, { "epoch": 0.1955128677282972, "grad_norm": 1.6202362654900242, "learning_rate": 9.724789991435682e-06, "loss": 0.41, "step": 6636 }, { "epoch": 0.19554233018576078, "grad_norm": 1.6037450252824657, "learning_rate": 9.72462171153485e-06, "loss": 0.5198, "step": 6637 }, { "epoch": 0.19557179264322438, "grad_norm": 1.6402132066357507, "learning_rate": 9.724453381658407e-06, "loss": 0.4963, "step": 6638 }, { "epoch": 0.19560125510068796, "grad_norm": 1.4057841978410837, "learning_rate": 9.724285001808138e-06, "loss": 0.2972, "step": 6639 }, { "epoch": 0.19563071755815153, "grad_norm": 1.661728044767853, "learning_rate": 9.724116571985823e-06, "loss": 0.5315, "step": 6640 }, { "epoch": 0.1956601800156151, "grad_norm": 1.616223378408852, "learning_rate": 9.723948092193241e-06, "loss": 0.514, "step": 6641 }, { "epoch": 0.19568964247307868, "grad_norm": 1.5212038181715812, "learning_rate": 9.723779562432175e-06, "loss": 0.4252, "step": 6642 }, { "epoch": 0.19571910493054226, "grad_norm": 1.74008330114765, "learning_rate": 9.72361098270441e-06, "loss": 0.5688, "step": 6643 }, { "epoch": 0.19574856738800583, "grad_norm": 1.8223307262821657, "learning_rate": 9.723442353011727e-06, "loss": 0.4099, "step": 6644 }, { "epoch": 0.1957780298454694, "grad_norm": 1.6467385797011187, "learning_rate": 9.723273673355909e-06, "loss": 0.439, "step": 6645 }, { "epoch": 0.19580749230293298, "grad_norm": 1.8904795903392522, "learning_rate": 9.723104943738744e-06, "loss": 0.4557, "step": 6646 }, { "epoch": 0.19583695476039656, "grad_norm": 1.572850642894994, "learning_rate": 9.722936164162012e-06, "loss": 0.5179, "step": 6647 }, { "epoch": 0.19586641721786013, "grad_norm": 1.3305313456072485, "learning_rate": 9.722767334627502e-06, "loss": 0.3585, "step": 6648 }, { "epoch": 0.1958958796753237, "grad_norm": 1.8382669059795904, "learning_rate": 9.722598455136998e-06, "loss": 0.6068, "step": 6649 }, { "epoch": 0.19592534213278728, "grad_norm": 1.816514054204965, "learning_rate": 9.722429525692287e-06, "loss": 0.6238, "step": 6650 }, { "epoch": 0.1959548045902509, "grad_norm": 1.6048619907232191, "learning_rate": 9.722260546295155e-06, "loss": 0.568, "step": 6651 }, { "epoch": 0.19598426704771446, "grad_norm": 1.809070055621272, "learning_rate": 9.722091516947393e-06, "loss": 0.5486, "step": 6652 }, { "epoch": 0.19601372950517804, "grad_norm": 1.6491723344746312, "learning_rate": 9.721922437650781e-06, "loss": 0.4733, "step": 6653 }, { "epoch": 0.1960431919626416, "grad_norm": 1.6331580453153152, "learning_rate": 9.721753308407116e-06, "loss": 0.4852, "step": 6654 }, { "epoch": 0.19607265442010519, "grad_norm": 1.7352769826419996, "learning_rate": 9.721584129218183e-06, "loss": 0.6102, "step": 6655 }, { "epoch": 0.19610211687756876, "grad_norm": 1.5846121684977523, "learning_rate": 9.721414900085771e-06, "loss": 0.5094, "step": 6656 }, { "epoch": 0.19613157933503234, "grad_norm": 1.536775262178847, "learning_rate": 9.721245621011673e-06, "loss": 0.386, "step": 6657 }, { "epoch": 0.1961610417924959, "grad_norm": 1.7310070117505953, "learning_rate": 9.721076291997675e-06, "loss": 0.4305, "step": 6658 }, { "epoch": 0.19619050424995949, "grad_norm": 1.6451705843267597, "learning_rate": 9.720906913045574e-06, "loss": 0.451, "step": 6659 }, { "epoch": 0.19621996670742306, "grad_norm": 1.7933943681371012, "learning_rate": 9.720737484157156e-06, "loss": 0.5812, "step": 6660 }, { "epoch": 0.19624942916488664, "grad_norm": 1.6294143317426215, "learning_rate": 9.720568005334216e-06, "loss": 0.4491, "step": 6661 }, { "epoch": 0.1962788916223502, "grad_norm": 1.6111487620498066, "learning_rate": 9.720398476578546e-06, "loss": 0.4581, "step": 6662 }, { "epoch": 0.19630835407981378, "grad_norm": 1.8794854511348644, "learning_rate": 9.720228897891942e-06, "loss": 0.472, "step": 6663 }, { "epoch": 0.1963378165372774, "grad_norm": 1.5827664473902365, "learning_rate": 9.720059269276192e-06, "loss": 0.4551, "step": 6664 }, { "epoch": 0.19636727899474096, "grad_norm": 1.686993912543704, "learning_rate": 9.719889590733095e-06, "loss": 0.5404, "step": 6665 }, { "epoch": 0.19639674145220454, "grad_norm": 1.4370229453438126, "learning_rate": 9.719719862264444e-06, "loss": 0.4347, "step": 6666 }, { "epoch": 0.1964262039096681, "grad_norm": 1.5575221412691191, "learning_rate": 9.719550083872036e-06, "loss": 0.4673, "step": 6667 }, { "epoch": 0.1964556663671317, "grad_norm": 1.619280428801088, "learning_rate": 9.719380255557662e-06, "loss": 0.4819, "step": 6668 }, { "epoch": 0.19648512882459526, "grad_norm": 1.6829587045570558, "learning_rate": 9.719210377323124e-06, "loss": 0.6837, "step": 6669 }, { "epoch": 0.19651459128205884, "grad_norm": 1.7470794264555882, "learning_rate": 9.719040449170219e-06, "loss": 0.4865, "step": 6670 }, { "epoch": 0.1965440537395224, "grad_norm": 1.5785602411186572, "learning_rate": 9.71887047110074e-06, "loss": 0.5793, "step": 6671 }, { "epoch": 0.196573516196986, "grad_norm": 1.6582660349090708, "learning_rate": 9.718700443116488e-06, "loss": 0.4612, "step": 6672 }, { "epoch": 0.19660297865444956, "grad_norm": 1.737935946188807, "learning_rate": 9.71853036521926e-06, "loss": 0.3709, "step": 6673 }, { "epoch": 0.19663244111191314, "grad_norm": 1.8662868537539334, "learning_rate": 9.718360237410857e-06, "loss": 0.7645, "step": 6674 }, { "epoch": 0.1966619035693767, "grad_norm": 1.7376701349158394, "learning_rate": 9.718190059693077e-06, "loss": 0.5578, "step": 6675 }, { "epoch": 0.1966913660268403, "grad_norm": 1.6286501478522046, "learning_rate": 9.71801983206772e-06, "loss": 0.4092, "step": 6676 }, { "epoch": 0.1967208284843039, "grad_norm": 1.370421963414147, "learning_rate": 9.717849554536586e-06, "loss": 0.4077, "step": 6677 }, { "epoch": 0.19675029094176746, "grad_norm": 1.811690433158738, "learning_rate": 9.717679227101479e-06, "loss": 0.5454, "step": 6678 }, { "epoch": 0.19677975339923104, "grad_norm": 1.672363450950249, "learning_rate": 9.717508849764198e-06, "loss": 0.4718, "step": 6679 }, { "epoch": 0.1968092158566946, "grad_norm": 1.7951945434401742, "learning_rate": 9.717338422526548e-06, "loss": 0.5442, "step": 6680 }, { "epoch": 0.1968386783141582, "grad_norm": 1.7133666864053234, "learning_rate": 9.717167945390329e-06, "loss": 0.4555, "step": 6681 }, { "epoch": 0.19686814077162176, "grad_norm": 1.5526708593468541, "learning_rate": 9.716997418357344e-06, "loss": 0.5415, "step": 6682 }, { "epoch": 0.19689760322908534, "grad_norm": 1.4563924904076764, "learning_rate": 9.716826841429398e-06, "loss": 0.4851, "step": 6683 }, { "epoch": 0.1969270656865489, "grad_norm": 1.5220282798247904, "learning_rate": 9.716656214608295e-06, "loss": 0.5432, "step": 6684 }, { "epoch": 0.1969565281440125, "grad_norm": 1.4479423320548963, "learning_rate": 9.716485537895842e-06, "loss": 0.434, "step": 6685 }, { "epoch": 0.19698599060147606, "grad_norm": 1.57879241180069, "learning_rate": 9.71631481129384e-06, "loss": 0.5864, "step": 6686 }, { "epoch": 0.19701545305893964, "grad_norm": 2.0561045611273747, "learning_rate": 9.716144034804098e-06, "loss": 0.4948, "step": 6687 }, { "epoch": 0.1970449155164032, "grad_norm": 1.6911989434950183, "learning_rate": 9.715973208428423e-06, "loss": 0.5319, "step": 6688 }, { "epoch": 0.1970743779738668, "grad_norm": 1.6688090428782407, "learning_rate": 9.715802332168619e-06, "loss": 0.5316, "step": 6689 }, { "epoch": 0.1971038404313304, "grad_norm": 1.7134521169327042, "learning_rate": 9.715631406026494e-06, "loss": 0.7327, "step": 6690 }, { "epoch": 0.19713330288879397, "grad_norm": 1.5610735031272467, "learning_rate": 9.715460430003861e-06, "loss": 0.5285, "step": 6691 }, { "epoch": 0.19716276534625754, "grad_norm": 1.5650160957840518, "learning_rate": 9.71528940410252e-06, "loss": 0.4868, "step": 6692 }, { "epoch": 0.19719222780372112, "grad_norm": 1.6659425228376314, "learning_rate": 9.715118328324287e-06, "loss": 0.582, "step": 6693 }, { "epoch": 0.1972216902611847, "grad_norm": 1.5692177626558352, "learning_rate": 9.71494720267097e-06, "loss": 0.4769, "step": 6694 }, { "epoch": 0.19725115271864826, "grad_norm": 1.430338388488715, "learning_rate": 9.714776027144379e-06, "loss": 0.4995, "step": 6695 }, { "epoch": 0.19728061517611184, "grad_norm": 1.7885387827698391, "learning_rate": 9.714604801746322e-06, "loss": 0.5476, "step": 6696 }, { "epoch": 0.19731007763357541, "grad_norm": 2.032787995043583, "learning_rate": 9.714433526478612e-06, "loss": 0.4265, "step": 6697 }, { "epoch": 0.197339540091039, "grad_norm": 1.602115867287088, "learning_rate": 9.714262201343063e-06, "loss": 0.5501, "step": 6698 }, { "epoch": 0.19736900254850256, "grad_norm": 1.5797424670198572, "learning_rate": 9.714090826341484e-06, "loss": 0.5906, "step": 6699 }, { "epoch": 0.19739846500596614, "grad_norm": 1.8115574181317533, "learning_rate": 9.71391940147569e-06, "loss": 0.5222, "step": 6700 }, { "epoch": 0.19742792746342971, "grad_norm": 1.5552342642499979, "learning_rate": 9.71374792674749e-06, "loss": 0.368, "step": 6701 }, { "epoch": 0.1974573899208933, "grad_norm": 1.7287686145791648, "learning_rate": 9.713576402158705e-06, "loss": 0.5756, "step": 6702 }, { "epoch": 0.1974868523783569, "grad_norm": 1.8742445960576672, "learning_rate": 9.713404827711143e-06, "loss": 0.6141, "step": 6703 }, { "epoch": 0.19751631483582047, "grad_norm": 1.6965265633300863, "learning_rate": 9.713233203406623e-06, "loss": 0.6439, "step": 6704 }, { "epoch": 0.19754577729328404, "grad_norm": 1.4824209555148127, "learning_rate": 9.713061529246957e-06, "loss": 0.5852, "step": 6705 }, { "epoch": 0.19757523975074762, "grad_norm": 1.496558518679548, "learning_rate": 9.712889805233961e-06, "loss": 0.5174, "step": 6706 }, { "epoch": 0.1976047022082112, "grad_norm": 1.4493445637356828, "learning_rate": 9.712718031369456e-06, "loss": 0.4487, "step": 6707 }, { "epoch": 0.19763416466567477, "grad_norm": 1.5030655635471326, "learning_rate": 9.712546207655253e-06, "loss": 0.3849, "step": 6708 }, { "epoch": 0.19766362712313834, "grad_norm": 1.7534140918051335, "learning_rate": 9.712374334093175e-06, "loss": 0.5753, "step": 6709 }, { "epoch": 0.19769308958060192, "grad_norm": 1.6402409628570496, "learning_rate": 9.712202410685036e-06, "loss": 0.6095, "step": 6710 }, { "epoch": 0.1977225520380655, "grad_norm": 1.6257271715888215, "learning_rate": 9.712030437432657e-06, "loss": 0.4823, "step": 6711 }, { "epoch": 0.19775201449552907, "grad_norm": 1.8042802713357668, "learning_rate": 9.711858414337856e-06, "loss": 0.4992, "step": 6712 }, { "epoch": 0.19778147695299264, "grad_norm": 1.8187376094839975, "learning_rate": 9.711686341402452e-06, "loss": 0.6216, "step": 6713 }, { "epoch": 0.19781093941045622, "grad_norm": 1.723784199114971, "learning_rate": 9.711514218628267e-06, "loss": 0.6404, "step": 6714 }, { "epoch": 0.1978404018679198, "grad_norm": 1.811935208085766, "learning_rate": 9.711342046017118e-06, "loss": 0.4476, "step": 6715 }, { "epoch": 0.1978698643253834, "grad_norm": 1.8451098124945027, "learning_rate": 9.71116982357083e-06, "loss": 0.5111, "step": 6716 }, { "epoch": 0.19789932678284697, "grad_norm": 1.5902570606566162, "learning_rate": 9.710997551291223e-06, "loss": 0.5583, "step": 6717 }, { "epoch": 0.19792878924031054, "grad_norm": 1.846291727702589, "learning_rate": 9.710825229180121e-06, "loss": 0.6842, "step": 6718 }, { "epoch": 0.19795825169777412, "grad_norm": 1.7367891388281729, "learning_rate": 9.710652857239344e-06, "loss": 0.6163, "step": 6719 }, { "epoch": 0.1979877141552377, "grad_norm": 1.4292208584894446, "learning_rate": 9.710480435470719e-06, "loss": 0.4124, "step": 6720 }, { "epoch": 0.19801717661270127, "grad_norm": 1.5282487563871932, "learning_rate": 9.710307963876065e-06, "loss": 0.45, "step": 6721 }, { "epoch": 0.19804663907016484, "grad_norm": 1.7771143470918922, "learning_rate": 9.710135442457209e-06, "loss": 0.7122, "step": 6722 }, { "epoch": 0.19807610152762842, "grad_norm": 1.6079059545005938, "learning_rate": 9.709962871215976e-06, "loss": 0.5958, "step": 6723 }, { "epoch": 0.198105563985092, "grad_norm": 1.7088438524008889, "learning_rate": 9.709790250154194e-06, "loss": 0.3719, "step": 6724 }, { "epoch": 0.19813502644255557, "grad_norm": 1.7146929967888764, "learning_rate": 9.709617579273682e-06, "loss": 0.5921, "step": 6725 }, { "epoch": 0.19816448890001914, "grad_norm": 1.8071501615619718, "learning_rate": 9.709444858576274e-06, "loss": 0.7389, "step": 6726 }, { "epoch": 0.19819395135748272, "grad_norm": 1.6992648926943101, "learning_rate": 9.70927208806379e-06, "loss": 0.6477, "step": 6727 }, { "epoch": 0.1982234138149463, "grad_norm": 1.446455445352414, "learning_rate": 9.709099267738064e-06, "loss": 0.4332, "step": 6728 }, { "epoch": 0.1982528762724099, "grad_norm": 1.4524773185553546, "learning_rate": 9.70892639760092e-06, "loss": 0.4661, "step": 6729 }, { "epoch": 0.19828233872987347, "grad_norm": 1.6537203591894092, "learning_rate": 9.708753477654187e-06, "loss": 0.4872, "step": 6730 }, { "epoch": 0.19831180118733704, "grad_norm": 1.7590844731198234, "learning_rate": 9.708580507899696e-06, "loss": 0.5426, "step": 6731 }, { "epoch": 0.19834126364480062, "grad_norm": 1.491505997120277, "learning_rate": 9.708407488339272e-06, "loss": 0.4373, "step": 6732 }, { "epoch": 0.1983707261022642, "grad_norm": 1.5630643113747684, "learning_rate": 9.708234418974753e-06, "loss": 0.5194, "step": 6733 }, { "epoch": 0.19840018855972777, "grad_norm": 1.257424631341204, "learning_rate": 9.708061299807963e-06, "loss": 0.3073, "step": 6734 }, { "epoch": 0.19842965101719134, "grad_norm": 1.693691360793431, "learning_rate": 9.707888130840737e-06, "loss": 0.6283, "step": 6735 }, { "epoch": 0.19845911347465492, "grad_norm": 1.642157873684288, "learning_rate": 9.707714912074903e-06, "loss": 0.4611, "step": 6736 }, { "epoch": 0.1984885759321185, "grad_norm": 1.5435257231851822, "learning_rate": 9.707541643512298e-06, "loss": 0.4784, "step": 6737 }, { "epoch": 0.19851803838958207, "grad_norm": 1.5889377278440495, "learning_rate": 9.70736832515475e-06, "loss": 0.458, "step": 6738 }, { "epoch": 0.19854750084704564, "grad_norm": 1.627991712230878, "learning_rate": 9.707194957004093e-06, "loss": 0.6015, "step": 6739 }, { "epoch": 0.19857696330450922, "grad_norm": 1.7050261078774644, "learning_rate": 9.707021539062165e-06, "loss": 0.5137, "step": 6740 }, { "epoch": 0.1986064257619728, "grad_norm": 1.8520286346226476, "learning_rate": 9.706848071330797e-06, "loss": 0.6291, "step": 6741 }, { "epoch": 0.1986358882194364, "grad_norm": 1.5328616141640443, "learning_rate": 9.706674553811825e-06, "loss": 0.5183, "step": 6742 }, { "epoch": 0.19866535067689997, "grad_norm": 1.7841142100958403, "learning_rate": 9.706500986507084e-06, "loss": 0.4981, "step": 6743 }, { "epoch": 0.19869481313436355, "grad_norm": 1.8277712288940122, "learning_rate": 9.706327369418408e-06, "loss": 0.5823, "step": 6744 }, { "epoch": 0.19872427559182712, "grad_norm": 1.7095123821084, "learning_rate": 9.706153702547638e-06, "loss": 0.3848, "step": 6745 }, { "epoch": 0.1987537380492907, "grad_norm": 1.5470933251240777, "learning_rate": 9.705979985896607e-06, "loss": 0.4817, "step": 6746 }, { "epoch": 0.19878320050675427, "grad_norm": 1.5772699111376376, "learning_rate": 9.705806219467152e-06, "loss": 0.5589, "step": 6747 }, { "epoch": 0.19881266296421785, "grad_norm": 1.6513370062890917, "learning_rate": 9.705632403261115e-06, "loss": 0.488, "step": 6748 }, { "epoch": 0.19884212542168142, "grad_norm": 1.6790456812594978, "learning_rate": 9.705458537280334e-06, "loss": 0.5997, "step": 6749 }, { "epoch": 0.198871587879145, "grad_norm": 1.8263794544521994, "learning_rate": 9.705284621526645e-06, "loss": 0.5075, "step": 6750 }, { "epoch": 0.19890105033660857, "grad_norm": 1.5893091463812141, "learning_rate": 9.70511065600189e-06, "loss": 0.5202, "step": 6751 }, { "epoch": 0.19893051279407215, "grad_norm": 1.5782188729733995, "learning_rate": 9.704936640707908e-06, "loss": 0.4333, "step": 6752 }, { "epoch": 0.19895997525153572, "grad_norm": 1.7137019832741027, "learning_rate": 9.704762575646539e-06, "loss": 0.4588, "step": 6753 }, { "epoch": 0.1989894377089993, "grad_norm": 1.5621012783571333, "learning_rate": 9.704588460819626e-06, "loss": 0.5328, "step": 6754 }, { "epoch": 0.1990189001664629, "grad_norm": 1.6386872242767883, "learning_rate": 9.70441429622901e-06, "loss": 0.5617, "step": 6755 }, { "epoch": 0.19904836262392647, "grad_norm": 1.7853451667622486, "learning_rate": 9.704240081876534e-06, "loss": 0.6194, "step": 6756 }, { "epoch": 0.19907782508139005, "grad_norm": 1.5661981090454393, "learning_rate": 9.704065817764039e-06, "loss": 0.4461, "step": 6757 }, { "epoch": 0.19910728753885362, "grad_norm": 1.7060857042362105, "learning_rate": 9.70389150389337e-06, "loss": 0.5006, "step": 6758 }, { "epoch": 0.1991367499963172, "grad_norm": 1.7623894061085612, "learning_rate": 9.70371714026637e-06, "loss": 0.5959, "step": 6759 }, { "epoch": 0.19916621245378077, "grad_norm": 1.6994457603715054, "learning_rate": 9.703542726884884e-06, "loss": 0.6636, "step": 6760 }, { "epoch": 0.19919567491124435, "grad_norm": 1.6324023178125535, "learning_rate": 9.703368263750756e-06, "loss": 0.4511, "step": 6761 }, { "epoch": 0.19922513736870792, "grad_norm": 1.7252748620178429, "learning_rate": 9.703193750865834e-06, "loss": 0.4514, "step": 6762 }, { "epoch": 0.1992545998261715, "grad_norm": 1.4778774606154208, "learning_rate": 9.70301918823196e-06, "loss": 0.5571, "step": 6763 }, { "epoch": 0.19928406228363507, "grad_norm": 1.5793252640597026, "learning_rate": 9.702844575850982e-06, "loss": 0.6202, "step": 6764 }, { "epoch": 0.19931352474109865, "grad_norm": 1.9181610935643871, "learning_rate": 9.702669913724747e-06, "loss": 0.5409, "step": 6765 }, { "epoch": 0.19934298719856222, "grad_norm": 1.5803350279771053, "learning_rate": 9.702495201855104e-06, "loss": 0.3687, "step": 6766 }, { "epoch": 0.1993724496560258, "grad_norm": 1.6876660162261128, "learning_rate": 9.7023204402439e-06, "loss": 0.4477, "step": 6767 }, { "epoch": 0.1994019121134894, "grad_norm": 1.5494970827665304, "learning_rate": 9.702145628892983e-06, "loss": 0.426, "step": 6768 }, { "epoch": 0.19943137457095297, "grad_norm": 1.5457888133226774, "learning_rate": 9.701970767804202e-06, "loss": 0.3914, "step": 6769 }, { "epoch": 0.19946083702841655, "grad_norm": 1.6397550330980746, "learning_rate": 9.701795856979408e-06, "loss": 0.5452, "step": 6770 }, { "epoch": 0.19949029948588012, "grad_norm": 1.6639074824385367, "learning_rate": 9.70162089642045e-06, "loss": 0.4678, "step": 6771 }, { "epoch": 0.1995197619433437, "grad_norm": 1.7934993667784624, "learning_rate": 9.701445886129179e-06, "loss": 0.5202, "step": 6772 }, { "epoch": 0.19954922440080727, "grad_norm": 1.6171086270682138, "learning_rate": 9.701270826107445e-06, "loss": 0.5786, "step": 6773 }, { "epoch": 0.19957868685827085, "grad_norm": 1.3850553753072066, "learning_rate": 9.701095716357103e-06, "loss": 0.3567, "step": 6774 }, { "epoch": 0.19960814931573442, "grad_norm": 1.902644498207867, "learning_rate": 9.700920556880002e-06, "loss": 0.5782, "step": 6775 }, { "epoch": 0.199637611773198, "grad_norm": 1.6079785499647505, "learning_rate": 9.700745347677997e-06, "loss": 0.5238, "step": 6776 }, { "epoch": 0.19966707423066157, "grad_norm": 1.5637847582581286, "learning_rate": 9.700570088752938e-06, "loss": 0.4204, "step": 6777 }, { "epoch": 0.19969653668812515, "grad_norm": 1.7242591511250076, "learning_rate": 9.700394780106685e-06, "loss": 0.5946, "step": 6778 }, { "epoch": 0.19972599914558872, "grad_norm": 1.6134936659749624, "learning_rate": 9.700219421741087e-06, "loss": 0.5612, "step": 6779 }, { "epoch": 0.1997554616030523, "grad_norm": 1.5522318958202022, "learning_rate": 9.700044013657999e-06, "loss": 0.4588, "step": 6780 }, { "epoch": 0.1997849240605159, "grad_norm": 1.8442424287170454, "learning_rate": 9.699868555859277e-06, "loss": 0.5437, "step": 6781 }, { "epoch": 0.19981438651797948, "grad_norm": 1.396849875469252, "learning_rate": 9.699693048346781e-06, "loss": 0.3773, "step": 6782 }, { "epoch": 0.19984384897544305, "grad_norm": 1.6297801860981882, "learning_rate": 9.699517491122362e-06, "loss": 0.4677, "step": 6783 }, { "epoch": 0.19987331143290663, "grad_norm": 1.8731824651581874, "learning_rate": 9.699341884187879e-06, "loss": 0.5948, "step": 6784 }, { "epoch": 0.1999027738903702, "grad_norm": 1.8068083577232252, "learning_rate": 9.699166227545192e-06, "loss": 0.4907, "step": 6785 }, { "epoch": 0.19993223634783377, "grad_norm": 1.9805946358327107, "learning_rate": 9.698990521196155e-06, "loss": 0.6074, "step": 6786 }, { "epoch": 0.19996169880529735, "grad_norm": 1.6202875601174467, "learning_rate": 9.698814765142628e-06, "loss": 0.5786, "step": 6787 }, { "epoch": 0.19999116126276092, "grad_norm": 1.4309517892835821, "learning_rate": 9.69863895938647e-06, "loss": 0.4201, "step": 6788 }, { "epoch": 0.2000206237202245, "grad_norm": 1.6580299171277342, "learning_rate": 9.698463103929542e-06, "loss": 0.5318, "step": 6789 }, { "epoch": 0.20005008617768807, "grad_norm": 2.0610445665108372, "learning_rate": 9.698287198773704e-06, "loss": 0.5634, "step": 6790 }, { "epoch": 0.20007954863515165, "grad_norm": 1.5876333899095612, "learning_rate": 9.698111243920816e-06, "loss": 0.5342, "step": 6791 }, { "epoch": 0.20010901109261522, "grad_norm": 1.4341694535605245, "learning_rate": 9.697935239372737e-06, "loss": 0.4234, "step": 6792 }, { "epoch": 0.2001384735500788, "grad_norm": 1.537901641806976, "learning_rate": 9.697759185131333e-06, "loss": 0.4462, "step": 6793 }, { "epoch": 0.2001679360075424, "grad_norm": 1.7729055526866704, "learning_rate": 9.697583081198462e-06, "loss": 0.5422, "step": 6794 }, { "epoch": 0.20019739846500598, "grad_norm": 1.6785503281050187, "learning_rate": 9.69740692757599e-06, "loss": 0.479, "step": 6795 }, { "epoch": 0.20022686092246955, "grad_norm": 1.677658025843811, "learning_rate": 9.697230724265779e-06, "loss": 0.4583, "step": 6796 }, { "epoch": 0.20025632337993313, "grad_norm": 1.508036920886116, "learning_rate": 9.697054471269692e-06, "loss": 0.5487, "step": 6797 }, { "epoch": 0.2002857858373967, "grad_norm": 1.5710914724013192, "learning_rate": 9.696878168589596e-06, "loss": 0.5079, "step": 6798 }, { "epoch": 0.20031524829486028, "grad_norm": 1.5672955352413203, "learning_rate": 9.696701816227353e-06, "loss": 0.4397, "step": 6799 }, { "epoch": 0.20034471075232385, "grad_norm": 1.634165244540472, "learning_rate": 9.69652541418483e-06, "loss": 0.5087, "step": 6800 }, { "epoch": 0.20037417320978743, "grad_norm": 1.5675793909705247, "learning_rate": 9.69634896246389e-06, "loss": 0.5182, "step": 6801 }, { "epoch": 0.200403635667251, "grad_norm": 1.455142879318653, "learning_rate": 9.696172461066407e-06, "loss": 0.4464, "step": 6802 }, { "epoch": 0.20043309812471458, "grad_norm": 1.5391568664945996, "learning_rate": 9.695995909994237e-06, "loss": 0.5537, "step": 6803 }, { "epoch": 0.20046256058217815, "grad_norm": 1.4359254979802099, "learning_rate": 9.695819309249257e-06, "loss": 0.4153, "step": 6804 }, { "epoch": 0.20049202303964173, "grad_norm": 1.5728173389716504, "learning_rate": 9.695642658833333e-06, "loss": 0.5412, "step": 6805 }, { "epoch": 0.2005214854971053, "grad_norm": 1.8224872090213697, "learning_rate": 9.69546595874833e-06, "loss": 0.545, "step": 6806 }, { "epoch": 0.2005509479545689, "grad_norm": 1.4706123564434617, "learning_rate": 9.695289208996118e-06, "loss": 0.3521, "step": 6807 }, { "epoch": 0.20058041041203248, "grad_norm": 1.8359708827591104, "learning_rate": 9.69511240957857e-06, "loss": 0.676, "step": 6808 }, { "epoch": 0.20060987286949605, "grad_norm": 1.6074222319973088, "learning_rate": 9.694935560497551e-06, "loss": 0.3927, "step": 6809 }, { "epoch": 0.20063933532695963, "grad_norm": 1.6511357628238084, "learning_rate": 9.694758661754937e-06, "loss": 0.3655, "step": 6810 }, { "epoch": 0.2006687977844232, "grad_norm": 1.8348768885704068, "learning_rate": 9.694581713352595e-06, "loss": 0.539, "step": 6811 }, { "epoch": 0.20069826024188678, "grad_norm": 1.728117194421797, "learning_rate": 9.694404715292398e-06, "loss": 0.4926, "step": 6812 }, { "epoch": 0.20072772269935035, "grad_norm": 1.4419429510464943, "learning_rate": 9.69422766757622e-06, "loss": 0.4329, "step": 6813 }, { "epoch": 0.20075718515681393, "grad_norm": 1.4301912059606974, "learning_rate": 9.694050570205933e-06, "loss": 0.4459, "step": 6814 }, { "epoch": 0.2007866476142775, "grad_norm": 1.7331466284111223, "learning_rate": 9.693873423183408e-06, "loss": 0.5298, "step": 6815 }, { "epoch": 0.20081611007174108, "grad_norm": 1.6483941618200808, "learning_rate": 9.693696226510521e-06, "loss": 0.5811, "step": 6816 }, { "epoch": 0.20084557252920465, "grad_norm": 1.6011909032753155, "learning_rate": 9.693518980189144e-06, "loss": 0.656, "step": 6817 }, { "epoch": 0.20087503498666823, "grad_norm": 1.7384683947272281, "learning_rate": 9.693341684221158e-06, "loss": 0.5284, "step": 6818 }, { "epoch": 0.2009044974441318, "grad_norm": 1.7172339301181867, "learning_rate": 9.693164338608431e-06, "loss": 0.6734, "step": 6819 }, { "epoch": 0.2009339599015954, "grad_norm": 1.649134216369231, "learning_rate": 9.692986943352842e-06, "loss": 0.5765, "step": 6820 }, { "epoch": 0.20096342235905898, "grad_norm": 1.6746301958256193, "learning_rate": 9.692809498456266e-06, "loss": 0.4656, "step": 6821 }, { "epoch": 0.20099288481652255, "grad_norm": 1.6969887649060538, "learning_rate": 9.692632003920584e-06, "loss": 0.5602, "step": 6822 }, { "epoch": 0.20102234727398613, "grad_norm": 1.497233793843071, "learning_rate": 9.69245445974767e-06, "loss": 0.5037, "step": 6823 }, { "epoch": 0.2010518097314497, "grad_norm": 1.7429580352457406, "learning_rate": 9.692276865939403e-06, "loss": 0.6787, "step": 6824 }, { "epoch": 0.20108127218891328, "grad_norm": 1.963833635606686, "learning_rate": 9.69209922249766e-06, "loss": 0.6177, "step": 6825 }, { "epoch": 0.20111073464637685, "grad_norm": 1.608768600092607, "learning_rate": 9.691921529424322e-06, "loss": 0.6144, "step": 6826 }, { "epoch": 0.20114019710384043, "grad_norm": 1.7237621439377098, "learning_rate": 9.691743786721269e-06, "loss": 0.6103, "step": 6827 }, { "epoch": 0.201169659561304, "grad_norm": 1.6851849062040154, "learning_rate": 9.69156599439038e-06, "loss": 0.5355, "step": 6828 }, { "epoch": 0.20119912201876758, "grad_norm": 1.8644988229611612, "learning_rate": 9.691388152433534e-06, "loss": 0.5431, "step": 6829 }, { "epoch": 0.20122858447623115, "grad_norm": 1.8216886518435693, "learning_rate": 9.691210260852614e-06, "loss": 0.5687, "step": 6830 }, { "epoch": 0.20125804693369473, "grad_norm": 1.593436873082469, "learning_rate": 9.691032319649502e-06, "loss": 0.5892, "step": 6831 }, { "epoch": 0.2012875093911583, "grad_norm": 1.374816662920826, "learning_rate": 9.690854328826081e-06, "loss": 0.4321, "step": 6832 }, { "epoch": 0.2013169718486219, "grad_norm": 1.5086868109964067, "learning_rate": 9.690676288384232e-06, "loss": 0.5481, "step": 6833 }, { "epoch": 0.20134643430608548, "grad_norm": 1.5315867504604497, "learning_rate": 9.69049819832584e-06, "loss": 0.4737, "step": 6834 }, { "epoch": 0.20137589676354906, "grad_norm": 1.3595164466717673, "learning_rate": 9.690320058652785e-06, "loss": 0.3962, "step": 6835 }, { "epoch": 0.20140535922101263, "grad_norm": 1.508443338502973, "learning_rate": 9.690141869366954e-06, "loss": 0.4436, "step": 6836 }, { "epoch": 0.2014348216784762, "grad_norm": 1.5938021747614803, "learning_rate": 9.689963630470232e-06, "loss": 0.3647, "step": 6837 }, { "epoch": 0.20146428413593978, "grad_norm": 1.7174193438061085, "learning_rate": 9.689785341964506e-06, "loss": 0.535, "step": 6838 }, { "epoch": 0.20149374659340336, "grad_norm": 1.6704681109075277, "learning_rate": 9.689607003851657e-06, "loss": 0.5349, "step": 6839 }, { "epoch": 0.20152320905086693, "grad_norm": 1.567134532375281, "learning_rate": 9.689428616133576e-06, "loss": 0.5131, "step": 6840 }, { "epoch": 0.2015526715083305, "grad_norm": 1.6366243029024456, "learning_rate": 9.689250178812149e-06, "loss": 0.5368, "step": 6841 }, { "epoch": 0.20158213396579408, "grad_norm": 1.7048949172361012, "learning_rate": 9.689071691889262e-06, "loss": 0.6121, "step": 6842 }, { "epoch": 0.20161159642325766, "grad_norm": 1.5320875720590295, "learning_rate": 9.688893155366804e-06, "loss": 0.4847, "step": 6843 }, { "epoch": 0.20164105888072123, "grad_norm": 1.6060774360620196, "learning_rate": 9.688714569246662e-06, "loss": 0.6433, "step": 6844 }, { "epoch": 0.2016705213381848, "grad_norm": 1.9563914620624592, "learning_rate": 9.688535933530727e-06, "loss": 0.5997, "step": 6845 }, { "epoch": 0.2016999837956484, "grad_norm": 1.669903825793503, "learning_rate": 9.688357248220887e-06, "loss": 0.5789, "step": 6846 }, { "epoch": 0.20172944625311198, "grad_norm": 1.7201173662187816, "learning_rate": 9.688178513319033e-06, "loss": 0.6207, "step": 6847 }, { "epoch": 0.20175890871057556, "grad_norm": 1.421788810515102, "learning_rate": 9.687999728827055e-06, "loss": 0.372, "step": 6848 }, { "epoch": 0.20178837116803913, "grad_norm": 1.5053433367660638, "learning_rate": 9.687820894746847e-06, "loss": 0.4707, "step": 6849 }, { "epoch": 0.2018178336255027, "grad_norm": 1.6958259343417428, "learning_rate": 9.687642011080296e-06, "loss": 0.5785, "step": 6850 }, { "epoch": 0.20184729608296628, "grad_norm": 1.8627613999178911, "learning_rate": 9.687463077829298e-06, "loss": 0.563, "step": 6851 }, { "epoch": 0.20187675854042986, "grad_norm": 2.002505283638979, "learning_rate": 9.687284094995742e-06, "loss": 0.4375, "step": 6852 }, { "epoch": 0.20190622099789343, "grad_norm": 1.7664684351868816, "learning_rate": 9.687105062581526e-06, "loss": 0.3193, "step": 6853 }, { "epoch": 0.201935683455357, "grad_norm": 1.564793288969294, "learning_rate": 9.686925980588539e-06, "loss": 0.5885, "step": 6854 }, { "epoch": 0.20196514591282058, "grad_norm": 1.8223996039784016, "learning_rate": 9.686746849018678e-06, "loss": 0.4218, "step": 6855 }, { "epoch": 0.20199460837028416, "grad_norm": 1.5602023448895308, "learning_rate": 9.686567667873837e-06, "loss": 0.5845, "step": 6856 }, { "epoch": 0.20202407082774773, "grad_norm": 1.6461574826825534, "learning_rate": 9.686388437155911e-06, "loss": 0.5822, "step": 6857 }, { "epoch": 0.2020535332852113, "grad_norm": 1.7327354037402134, "learning_rate": 9.686209156866797e-06, "loss": 0.4617, "step": 6858 }, { "epoch": 0.2020829957426749, "grad_norm": 1.753044033627774, "learning_rate": 9.686029827008392e-06, "loss": 0.5035, "step": 6859 }, { "epoch": 0.20211245820013848, "grad_norm": 1.669551639857669, "learning_rate": 9.68585044758259e-06, "loss": 0.4886, "step": 6860 }, { "epoch": 0.20214192065760206, "grad_norm": 1.3505981188494285, "learning_rate": 9.685671018591289e-06, "loss": 0.3999, "step": 6861 }, { "epoch": 0.20217138311506563, "grad_norm": 1.5204567451316298, "learning_rate": 9.685491540036389e-06, "loss": 0.5112, "step": 6862 }, { "epoch": 0.2022008455725292, "grad_norm": 1.6190676648314495, "learning_rate": 9.685312011919788e-06, "loss": 0.5243, "step": 6863 }, { "epoch": 0.20223030802999278, "grad_norm": 1.6250812713333918, "learning_rate": 9.685132434243384e-06, "loss": 0.4678, "step": 6864 }, { "epoch": 0.20225977048745636, "grad_norm": 1.5576847487340955, "learning_rate": 9.684952807009079e-06, "loss": 0.5788, "step": 6865 }, { "epoch": 0.20228923294491993, "grad_norm": 1.682049057102255, "learning_rate": 9.684773130218768e-06, "loss": 0.5732, "step": 6866 }, { "epoch": 0.2023186954023835, "grad_norm": 1.6903757320380783, "learning_rate": 9.684593403874357e-06, "loss": 0.479, "step": 6867 }, { "epoch": 0.20234815785984708, "grad_norm": 1.599070864042291, "learning_rate": 9.684413627977742e-06, "loss": 0.5293, "step": 6868 }, { "epoch": 0.20237762031731066, "grad_norm": 1.6250003326440443, "learning_rate": 9.684233802530829e-06, "loss": 0.457, "step": 6869 }, { "epoch": 0.20240708277477423, "grad_norm": 1.7475017811894318, "learning_rate": 9.684053927535517e-06, "loss": 0.4382, "step": 6870 }, { "epoch": 0.2024365452322378, "grad_norm": 1.5484779705959615, "learning_rate": 9.683874002993712e-06, "loss": 0.5598, "step": 6871 }, { "epoch": 0.2024660076897014, "grad_norm": 1.6902069157686266, "learning_rate": 9.683694028907314e-06, "loss": 0.625, "step": 6872 }, { "epoch": 0.20249547014716499, "grad_norm": 1.5476981898445747, "learning_rate": 9.683514005278228e-06, "loss": 0.4998, "step": 6873 }, { "epoch": 0.20252493260462856, "grad_norm": 1.966740639781816, "learning_rate": 9.683333932108358e-06, "loss": 0.6029, "step": 6874 }, { "epoch": 0.20255439506209214, "grad_norm": 1.6319538212959424, "learning_rate": 9.683153809399611e-06, "loss": 0.4663, "step": 6875 }, { "epoch": 0.2025838575195557, "grad_norm": 1.7321661347016322, "learning_rate": 9.682973637153887e-06, "loss": 0.8052, "step": 6876 }, { "epoch": 0.20261331997701928, "grad_norm": 1.6543056549243416, "learning_rate": 9.682793415373098e-06, "loss": 0.6038, "step": 6877 }, { "epoch": 0.20264278243448286, "grad_norm": 1.6043640928091627, "learning_rate": 9.682613144059145e-06, "loss": 0.5237, "step": 6878 }, { "epoch": 0.20267224489194643, "grad_norm": 1.325292954273308, "learning_rate": 9.682432823213938e-06, "loss": 0.3472, "step": 6879 }, { "epoch": 0.20270170734941, "grad_norm": 1.5734291123215112, "learning_rate": 9.682252452839385e-06, "loss": 0.5707, "step": 6880 }, { "epoch": 0.20273116980687358, "grad_norm": 1.488613334762808, "learning_rate": 9.682072032937391e-06, "loss": 0.4707, "step": 6881 }, { "epoch": 0.20276063226433716, "grad_norm": 1.483737144556487, "learning_rate": 9.681891563509866e-06, "loss": 0.3685, "step": 6882 }, { "epoch": 0.20279009472180073, "grad_norm": 1.6930843945268363, "learning_rate": 9.68171104455872e-06, "loss": 0.6483, "step": 6883 }, { "epoch": 0.2028195571792643, "grad_norm": 1.4765950059046038, "learning_rate": 9.681530476085862e-06, "loss": 0.5, "step": 6884 }, { "epoch": 0.2028490196367279, "grad_norm": 1.6614828425776926, "learning_rate": 9.6813498580932e-06, "loss": 0.5109, "step": 6885 }, { "epoch": 0.2028784820941915, "grad_norm": 1.7579665771324533, "learning_rate": 9.681169190582645e-06, "loss": 0.4709, "step": 6886 }, { "epoch": 0.20290794455165506, "grad_norm": 1.7554291077300108, "learning_rate": 9.680988473556109e-06, "loss": 0.42, "step": 6887 }, { "epoch": 0.20293740700911864, "grad_norm": 1.7374977939587029, "learning_rate": 9.680807707015506e-06, "loss": 0.6234, "step": 6888 }, { "epoch": 0.2029668694665822, "grad_norm": 1.5422194753078273, "learning_rate": 9.680626890962742e-06, "loss": 0.4131, "step": 6889 }, { "epoch": 0.2029963319240458, "grad_norm": 1.6609352142413298, "learning_rate": 9.680446025399735e-06, "loss": 0.4804, "step": 6890 }, { "epoch": 0.20302579438150936, "grad_norm": 1.77316587445707, "learning_rate": 9.680265110328397e-06, "loss": 0.5211, "step": 6891 }, { "epoch": 0.20305525683897294, "grad_norm": 1.6702775002358086, "learning_rate": 9.680084145750642e-06, "loss": 0.4525, "step": 6892 }, { "epoch": 0.2030847192964365, "grad_norm": 1.8300401524958305, "learning_rate": 9.67990313166838e-06, "loss": 0.5413, "step": 6893 }, { "epoch": 0.20311418175390009, "grad_norm": 1.622082247951333, "learning_rate": 9.67972206808353e-06, "loss": 0.4988, "step": 6894 }, { "epoch": 0.20314364421136366, "grad_norm": 1.5629128967163908, "learning_rate": 9.679540954998008e-06, "loss": 0.4049, "step": 6895 }, { "epoch": 0.20317310666882724, "grad_norm": 1.6544739411093772, "learning_rate": 9.679359792413727e-06, "loss": 0.625, "step": 6896 }, { "epoch": 0.2032025691262908, "grad_norm": 1.8931517383163226, "learning_rate": 9.679178580332603e-06, "loss": 0.5964, "step": 6897 }, { "epoch": 0.2032320315837544, "grad_norm": 1.5662415037092308, "learning_rate": 9.678997318756555e-06, "loss": 0.4506, "step": 6898 }, { "epoch": 0.203261494041218, "grad_norm": 1.5934342584024976, "learning_rate": 9.6788160076875e-06, "loss": 0.4863, "step": 6899 }, { "epoch": 0.20329095649868156, "grad_norm": 1.627952564591483, "learning_rate": 9.678634647127355e-06, "loss": 0.4883, "step": 6900 }, { "epoch": 0.20332041895614514, "grad_norm": 1.6376147789417548, "learning_rate": 9.678453237078037e-06, "loss": 0.3556, "step": 6901 }, { "epoch": 0.2033498814136087, "grad_norm": 1.957280915962051, "learning_rate": 9.678271777541467e-06, "loss": 0.8067, "step": 6902 }, { "epoch": 0.2033793438710723, "grad_norm": 1.642347746423555, "learning_rate": 9.678090268519564e-06, "loss": 0.5109, "step": 6903 }, { "epoch": 0.20340880632853586, "grad_norm": 1.462608146529719, "learning_rate": 9.67790871001425e-06, "loss": 0.4489, "step": 6904 }, { "epoch": 0.20343826878599944, "grad_norm": 1.5109660978312565, "learning_rate": 9.67772710202744e-06, "loss": 0.3619, "step": 6905 }, { "epoch": 0.203467731243463, "grad_norm": 1.6929968875983283, "learning_rate": 9.677545444561061e-06, "loss": 0.4891, "step": 6906 }, { "epoch": 0.2034971937009266, "grad_norm": 1.6290085272250332, "learning_rate": 9.67736373761703e-06, "loss": 0.4074, "step": 6907 }, { "epoch": 0.20352665615839016, "grad_norm": 1.925474364797425, "learning_rate": 9.677181981197271e-06, "loss": 0.6353, "step": 6908 }, { "epoch": 0.20355611861585374, "grad_norm": 1.5181932993719034, "learning_rate": 9.677000175303708e-06, "loss": 0.451, "step": 6909 }, { "epoch": 0.2035855810733173, "grad_norm": 1.6621269948494457, "learning_rate": 9.676818319938262e-06, "loss": 0.6598, "step": 6910 }, { "epoch": 0.20361504353078091, "grad_norm": 1.6140159769836444, "learning_rate": 9.676636415102856e-06, "loss": 0.4964, "step": 6911 }, { "epoch": 0.2036445059882445, "grad_norm": 1.8903368390213804, "learning_rate": 9.676454460799415e-06, "loss": 0.6341, "step": 6912 }, { "epoch": 0.20367396844570806, "grad_norm": 1.6013867098673558, "learning_rate": 9.676272457029865e-06, "loss": 0.4063, "step": 6913 }, { "epoch": 0.20370343090317164, "grad_norm": 1.795400228644186, "learning_rate": 9.67609040379613e-06, "loss": 0.4626, "step": 6914 }, { "epoch": 0.20373289336063521, "grad_norm": 1.6322263816680795, "learning_rate": 9.675908301100135e-06, "loss": 0.668, "step": 6915 }, { "epoch": 0.2037623558180988, "grad_norm": 1.5896119907224746, "learning_rate": 9.675726148943807e-06, "loss": 0.5218, "step": 6916 }, { "epoch": 0.20379181827556236, "grad_norm": 1.660527640388032, "learning_rate": 9.675543947329073e-06, "loss": 0.6924, "step": 6917 }, { "epoch": 0.20382128073302594, "grad_norm": 1.56766851039621, "learning_rate": 9.675361696257861e-06, "loss": 0.3428, "step": 6918 }, { "epoch": 0.20385074319048951, "grad_norm": 1.9322289752627821, "learning_rate": 9.675179395732097e-06, "loss": 0.6027, "step": 6919 }, { "epoch": 0.2038802056479531, "grad_norm": 2.0443824513929756, "learning_rate": 9.674997045753711e-06, "loss": 0.6361, "step": 6920 }, { "epoch": 0.20390966810541666, "grad_norm": 1.5793745572552662, "learning_rate": 9.67481464632463e-06, "loss": 0.4013, "step": 6921 }, { "epoch": 0.20393913056288024, "grad_norm": 1.5211549384196108, "learning_rate": 9.674632197446785e-06, "loss": 0.5332, "step": 6922 }, { "epoch": 0.2039685930203438, "grad_norm": 1.5100913777968639, "learning_rate": 9.674449699122107e-06, "loss": 0.3984, "step": 6923 }, { "epoch": 0.20399805547780742, "grad_norm": 1.630694910545192, "learning_rate": 9.67426715135252e-06, "loss": 0.5755, "step": 6924 }, { "epoch": 0.204027517935271, "grad_norm": 1.5255648048021644, "learning_rate": 9.674084554139964e-06, "loss": 0.4988, "step": 6925 }, { "epoch": 0.20405698039273457, "grad_norm": 1.5231516171200765, "learning_rate": 9.673901907486363e-06, "loss": 0.4604, "step": 6926 }, { "epoch": 0.20408644285019814, "grad_norm": 1.9103552070036036, "learning_rate": 9.673719211393654e-06, "loss": 0.7138, "step": 6927 }, { "epoch": 0.20411590530766172, "grad_norm": 1.5985223991888087, "learning_rate": 9.673536465863767e-06, "loss": 0.3735, "step": 6928 }, { "epoch": 0.2041453677651253, "grad_norm": 1.5177730588652472, "learning_rate": 9.673353670898635e-06, "loss": 0.4695, "step": 6929 }, { "epoch": 0.20417483022258887, "grad_norm": 1.5508835711931248, "learning_rate": 9.673170826500193e-06, "loss": 0.5268, "step": 6930 }, { "epoch": 0.20420429268005244, "grad_norm": 1.6365726627585748, "learning_rate": 9.672987932670375e-06, "loss": 0.5199, "step": 6931 }, { "epoch": 0.20423375513751602, "grad_norm": 1.555325464841531, "learning_rate": 9.672804989411113e-06, "loss": 0.4048, "step": 6932 }, { "epoch": 0.2042632175949796, "grad_norm": 1.5566729647051256, "learning_rate": 9.672621996724344e-06, "loss": 0.4509, "step": 6933 }, { "epoch": 0.20429268005244317, "grad_norm": 1.8180936595960526, "learning_rate": 9.672438954612005e-06, "loss": 0.6032, "step": 6934 }, { "epoch": 0.20432214250990674, "grad_norm": 1.4902832856272963, "learning_rate": 9.672255863076028e-06, "loss": 0.573, "step": 6935 }, { "epoch": 0.20435160496737031, "grad_norm": 1.5753982507567705, "learning_rate": 9.672072722118356e-06, "loss": 0.5599, "step": 6936 }, { "epoch": 0.20438106742483392, "grad_norm": 1.5899937298569682, "learning_rate": 9.67188953174092e-06, "loss": 0.4839, "step": 6937 }, { "epoch": 0.2044105298822975, "grad_norm": 1.3849399343674424, "learning_rate": 9.671706291945662e-06, "loss": 0.3921, "step": 6938 }, { "epoch": 0.20443999233976107, "grad_norm": 1.8368654735653565, "learning_rate": 9.671523002734517e-06, "loss": 0.6921, "step": 6939 }, { "epoch": 0.20446945479722464, "grad_norm": 1.4102970299562652, "learning_rate": 9.671339664109426e-06, "loss": 0.4082, "step": 6940 }, { "epoch": 0.20449891725468822, "grad_norm": 1.6205531496122605, "learning_rate": 9.671156276072328e-06, "loss": 0.5088, "step": 6941 }, { "epoch": 0.2045283797121518, "grad_norm": 1.5612909705284457, "learning_rate": 9.67097283862516e-06, "loss": 0.5628, "step": 6942 }, { "epoch": 0.20455784216961537, "grad_norm": 1.6103260251427145, "learning_rate": 9.67078935176987e-06, "loss": 0.4996, "step": 6943 }, { "epoch": 0.20458730462707894, "grad_norm": 1.7253741732580852, "learning_rate": 9.670605815508389e-06, "loss": 0.5726, "step": 6944 }, { "epoch": 0.20461676708454252, "grad_norm": 1.6405239668672706, "learning_rate": 9.670422229842665e-06, "loss": 0.4293, "step": 6945 }, { "epoch": 0.2046462295420061, "grad_norm": 1.7000101358848692, "learning_rate": 9.67023859477464e-06, "loss": 0.5074, "step": 6946 }, { "epoch": 0.20467569199946967, "grad_norm": 1.7081406159647954, "learning_rate": 9.67005491030625e-06, "loss": 0.5078, "step": 6947 }, { "epoch": 0.20470515445693324, "grad_norm": 1.5829692488963398, "learning_rate": 9.669871176439444e-06, "loss": 0.4225, "step": 6948 }, { "epoch": 0.20473461691439682, "grad_norm": 1.398014037651193, "learning_rate": 9.669687393176166e-06, "loss": 0.4055, "step": 6949 }, { "epoch": 0.20476407937186042, "grad_norm": 1.582122869158906, "learning_rate": 9.669503560518357e-06, "loss": 0.4285, "step": 6950 }, { "epoch": 0.204793541829324, "grad_norm": 1.6439099995419582, "learning_rate": 9.66931967846796e-06, "loss": 0.7054, "step": 6951 }, { "epoch": 0.20482300428678757, "grad_norm": 1.5803972241780957, "learning_rate": 9.669135747026926e-06, "loss": 0.4008, "step": 6952 }, { "epoch": 0.20485246674425114, "grad_norm": 1.6734991254083718, "learning_rate": 9.668951766197196e-06, "loss": 0.5002, "step": 6953 }, { "epoch": 0.20488192920171472, "grad_norm": 1.5572473200240664, "learning_rate": 9.668767735980715e-06, "loss": 0.4372, "step": 6954 }, { "epoch": 0.2049113916591783, "grad_norm": 1.5254728020234638, "learning_rate": 9.668583656379434e-06, "loss": 0.4375, "step": 6955 }, { "epoch": 0.20494085411664187, "grad_norm": 1.683680589041428, "learning_rate": 9.668399527395298e-06, "loss": 0.3711, "step": 6956 }, { "epoch": 0.20497031657410544, "grad_norm": 1.6799975941440133, "learning_rate": 9.668215349030252e-06, "loss": 0.5658, "step": 6957 }, { "epoch": 0.20499977903156902, "grad_norm": 1.812922718120754, "learning_rate": 9.66803112128625e-06, "loss": 0.619, "step": 6958 }, { "epoch": 0.2050292414890326, "grad_norm": 1.6948730271758445, "learning_rate": 9.667846844165236e-06, "loss": 0.5577, "step": 6959 }, { "epoch": 0.20505870394649617, "grad_norm": 1.6670421161567077, "learning_rate": 9.66766251766916e-06, "loss": 0.672, "step": 6960 }, { "epoch": 0.20508816640395974, "grad_norm": 1.5961093744176422, "learning_rate": 9.667478141799973e-06, "loss": 0.4436, "step": 6961 }, { "epoch": 0.20511762886142332, "grad_norm": 1.7200768063880827, "learning_rate": 9.667293716559625e-06, "loss": 0.5693, "step": 6962 }, { "epoch": 0.20514709131888692, "grad_norm": 1.5352304591573567, "learning_rate": 9.667109241950066e-06, "loss": 0.4347, "step": 6963 }, { "epoch": 0.2051765537763505, "grad_norm": 1.6679884506284182, "learning_rate": 9.666924717973247e-06, "loss": 0.5678, "step": 6964 }, { "epoch": 0.20520601623381407, "grad_norm": 1.6780152618451796, "learning_rate": 9.666740144631121e-06, "loss": 0.4349, "step": 6965 }, { "epoch": 0.20523547869127765, "grad_norm": 1.8704107421289158, "learning_rate": 9.66655552192564e-06, "loss": 0.4796, "step": 6966 }, { "epoch": 0.20526494114874122, "grad_norm": 1.7500285362396895, "learning_rate": 9.666370849858758e-06, "loss": 0.5389, "step": 6967 }, { "epoch": 0.2052944036062048, "grad_norm": 1.5454558030775367, "learning_rate": 9.666186128432427e-06, "loss": 0.4191, "step": 6968 }, { "epoch": 0.20532386606366837, "grad_norm": 1.669331260151616, "learning_rate": 9.6660013576486e-06, "loss": 0.5723, "step": 6969 }, { "epoch": 0.20535332852113194, "grad_norm": 1.8908976703672131, "learning_rate": 9.665816537509235e-06, "loss": 0.562, "step": 6970 }, { "epoch": 0.20538279097859552, "grad_norm": 1.5136574920503392, "learning_rate": 9.665631668016282e-06, "loss": 0.5304, "step": 6971 }, { "epoch": 0.2054122534360591, "grad_norm": 1.7498002768163203, "learning_rate": 9.6654467491717e-06, "loss": 0.5544, "step": 6972 }, { "epoch": 0.20544171589352267, "grad_norm": 1.5484806489401854, "learning_rate": 9.665261780977444e-06, "loss": 0.5495, "step": 6973 }, { "epoch": 0.20547117835098624, "grad_norm": 1.506485514810115, "learning_rate": 9.665076763435472e-06, "loss": 0.5682, "step": 6974 }, { "epoch": 0.20550064080844982, "grad_norm": 1.6882567344675992, "learning_rate": 9.664891696547737e-06, "loss": 0.6575, "step": 6975 }, { "epoch": 0.20553010326591342, "grad_norm": 1.9852475839342922, "learning_rate": 9.664706580316201e-06, "loss": 0.78, "step": 6976 }, { "epoch": 0.205559565723377, "grad_norm": 1.547553122167708, "learning_rate": 9.664521414742823e-06, "loss": 0.5698, "step": 6977 }, { "epoch": 0.20558902818084057, "grad_norm": 2.534397659015121, "learning_rate": 9.664336199829557e-06, "loss": 0.3695, "step": 6978 }, { "epoch": 0.20561849063830415, "grad_norm": 1.8591186467501262, "learning_rate": 9.664150935578363e-06, "loss": 0.604, "step": 6979 }, { "epoch": 0.20564795309576772, "grad_norm": 1.565063079480408, "learning_rate": 9.663965621991201e-06, "loss": 0.4509, "step": 6980 }, { "epoch": 0.2056774155532313, "grad_norm": 1.5245312592775133, "learning_rate": 9.663780259070034e-06, "loss": 0.4633, "step": 6981 }, { "epoch": 0.20570687801069487, "grad_norm": 1.7488529115500506, "learning_rate": 9.663594846816819e-06, "loss": 0.4634, "step": 6982 }, { "epoch": 0.20573634046815845, "grad_norm": 1.8028102413266682, "learning_rate": 9.663409385233521e-06, "loss": 0.5352, "step": 6983 }, { "epoch": 0.20576580292562202, "grad_norm": 1.5736884187736246, "learning_rate": 9.663223874322098e-06, "loss": 0.3914, "step": 6984 }, { "epoch": 0.2057952653830856, "grad_norm": 1.635213708863617, "learning_rate": 9.663038314084516e-06, "loss": 0.5451, "step": 6985 }, { "epoch": 0.20582472784054917, "grad_norm": 1.6185811088117357, "learning_rate": 9.662852704522734e-06, "loss": 0.4976, "step": 6986 }, { "epoch": 0.20585419029801275, "grad_norm": 1.5667864650792689, "learning_rate": 9.66266704563872e-06, "loss": 0.4472, "step": 6987 }, { "epoch": 0.20588365275547632, "grad_norm": 1.6506657981135826, "learning_rate": 9.662481337434434e-06, "loss": 0.5467, "step": 6988 }, { "epoch": 0.20591311521293992, "grad_norm": 1.6921976130354992, "learning_rate": 9.662295579911838e-06, "loss": 0.4846, "step": 6989 }, { "epoch": 0.2059425776704035, "grad_norm": 1.6785242769296769, "learning_rate": 9.662109773072903e-06, "loss": 0.5387, "step": 6990 }, { "epoch": 0.20597204012786707, "grad_norm": 1.711304446967202, "learning_rate": 9.66192391691959e-06, "loss": 0.4667, "step": 6991 }, { "epoch": 0.20600150258533065, "grad_norm": 1.5016709417424687, "learning_rate": 9.66173801145387e-06, "loss": 0.514, "step": 6992 }, { "epoch": 0.20603096504279422, "grad_norm": 1.9135091945388965, "learning_rate": 9.661552056677703e-06, "loss": 0.6298, "step": 6993 }, { "epoch": 0.2060604275002578, "grad_norm": 1.698723555438854, "learning_rate": 9.661366052593059e-06, "loss": 0.5881, "step": 6994 }, { "epoch": 0.20608988995772137, "grad_norm": 1.6757722981562737, "learning_rate": 9.661179999201905e-06, "loss": 0.4926, "step": 6995 }, { "epoch": 0.20611935241518495, "grad_norm": 1.7741002283323153, "learning_rate": 9.660993896506213e-06, "loss": 0.6276, "step": 6996 }, { "epoch": 0.20614881487264852, "grad_norm": 1.4106197028534913, "learning_rate": 9.660807744507945e-06, "loss": 0.4815, "step": 6997 }, { "epoch": 0.2061782773301121, "grad_norm": 1.4770975825670298, "learning_rate": 9.660621543209074e-06, "loss": 0.4526, "step": 6998 }, { "epoch": 0.20620773978757567, "grad_norm": 1.7744676519033362, "learning_rate": 9.66043529261157e-06, "loss": 0.5001, "step": 6999 }, { "epoch": 0.20623720224503925, "grad_norm": 1.8555139432562904, "learning_rate": 9.660248992717399e-06, "loss": 0.5029, "step": 7000 }, { "epoch": 0.20626666470250282, "grad_norm": 1.5492713150955695, "learning_rate": 9.660062643528537e-06, "loss": 0.3782, "step": 7001 }, { "epoch": 0.20629612715996642, "grad_norm": 1.8322735157683614, "learning_rate": 9.65987624504695e-06, "loss": 0.5978, "step": 7002 }, { "epoch": 0.20632558961743, "grad_norm": 1.7225093219701328, "learning_rate": 9.659689797274613e-06, "loss": 0.4883, "step": 7003 }, { "epoch": 0.20635505207489357, "grad_norm": 1.5302341559215895, "learning_rate": 9.659503300213499e-06, "loss": 0.4063, "step": 7004 }, { "epoch": 0.20638451453235715, "grad_norm": 2.0524276481706867, "learning_rate": 9.659316753865579e-06, "loss": 0.593, "step": 7005 }, { "epoch": 0.20641397698982072, "grad_norm": 1.6029132379815048, "learning_rate": 9.659130158232826e-06, "loss": 0.5102, "step": 7006 }, { "epoch": 0.2064434394472843, "grad_norm": 1.7578942090909653, "learning_rate": 9.658943513317212e-06, "loss": 0.3491, "step": 7007 }, { "epoch": 0.20647290190474787, "grad_norm": 1.583810012561532, "learning_rate": 9.658756819120716e-06, "loss": 0.3998, "step": 7008 }, { "epoch": 0.20650236436221145, "grad_norm": 1.6679886660280987, "learning_rate": 9.658570075645308e-06, "loss": 0.3586, "step": 7009 }, { "epoch": 0.20653182681967502, "grad_norm": 1.8245740166212994, "learning_rate": 9.658383282892967e-06, "loss": 0.5754, "step": 7010 }, { "epoch": 0.2065612892771386, "grad_norm": 1.7646266892610774, "learning_rate": 9.658196440865668e-06, "loss": 0.6523, "step": 7011 }, { "epoch": 0.20659075173460217, "grad_norm": 1.6341938291609124, "learning_rate": 9.658009549565386e-06, "loss": 0.519, "step": 7012 }, { "epoch": 0.20662021419206575, "grad_norm": 1.7177229783452086, "learning_rate": 9.657822608994096e-06, "loss": 0.6535, "step": 7013 }, { "epoch": 0.20664967664952932, "grad_norm": 1.8015240184662316, "learning_rate": 9.65763561915378e-06, "loss": 0.5486, "step": 7014 }, { "epoch": 0.20667913910699293, "grad_norm": 1.3703374104033637, "learning_rate": 9.657448580046415e-06, "loss": 0.5625, "step": 7015 }, { "epoch": 0.2067086015644565, "grad_norm": 1.5851875399369963, "learning_rate": 9.657261491673977e-06, "loss": 0.5983, "step": 7016 }, { "epoch": 0.20673806402192008, "grad_norm": 1.6468425999040162, "learning_rate": 9.657074354038447e-06, "loss": 0.5539, "step": 7017 }, { "epoch": 0.20676752647938365, "grad_norm": 1.689997702708899, "learning_rate": 9.656887167141805e-06, "loss": 0.5493, "step": 7018 }, { "epoch": 0.20679698893684723, "grad_norm": 1.795538704100938, "learning_rate": 9.656699930986026e-06, "loss": 0.5091, "step": 7019 }, { "epoch": 0.2068264513943108, "grad_norm": 1.6967755372685693, "learning_rate": 9.656512645573097e-06, "loss": 0.595, "step": 7020 }, { "epoch": 0.20685591385177438, "grad_norm": 1.6969562256421464, "learning_rate": 9.656325310904996e-06, "loss": 0.6333, "step": 7021 }, { "epoch": 0.20688537630923795, "grad_norm": 1.806275942943593, "learning_rate": 9.656137926983705e-06, "loss": 0.5962, "step": 7022 }, { "epoch": 0.20691483876670153, "grad_norm": 1.5226477806409429, "learning_rate": 9.655950493811206e-06, "loss": 0.4835, "step": 7023 }, { "epoch": 0.2069443012241651, "grad_norm": 1.6055368104960608, "learning_rate": 9.65576301138948e-06, "loss": 0.4828, "step": 7024 }, { "epoch": 0.20697376368162868, "grad_norm": 1.6078206401355535, "learning_rate": 9.655575479720514e-06, "loss": 0.5132, "step": 7025 }, { "epoch": 0.20700322613909225, "grad_norm": 1.5959761598671296, "learning_rate": 9.655387898806288e-06, "loss": 0.4752, "step": 7026 }, { "epoch": 0.20703268859655583, "grad_norm": 1.71690659724167, "learning_rate": 9.655200268648788e-06, "loss": 0.5107, "step": 7027 }, { "epoch": 0.20706215105401943, "grad_norm": 1.6299700620114155, "learning_rate": 9.655012589249999e-06, "loss": 0.4883, "step": 7028 }, { "epoch": 0.207091613511483, "grad_norm": 1.7677263474063436, "learning_rate": 9.654824860611904e-06, "loss": 0.6039, "step": 7029 }, { "epoch": 0.20712107596894658, "grad_norm": 1.8080303473222914, "learning_rate": 9.654637082736493e-06, "loss": 0.5509, "step": 7030 }, { "epoch": 0.20715053842641015, "grad_norm": 1.5912704872747319, "learning_rate": 9.654449255625745e-06, "loss": 0.5446, "step": 7031 }, { "epoch": 0.20718000088387373, "grad_norm": 1.532074326429592, "learning_rate": 9.654261379281655e-06, "loss": 0.523, "step": 7032 }, { "epoch": 0.2072094633413373, "grad_norm": 1.7092185979628798, "learning_rate": 9.654073453706206e-06, "loss": 0.4998, "step": 7033 }, { "epoch": 0.20723892579880088, "grad_norm": 1.463520011138619, "learning_rate": 9.653885478901386e-06, "loss": 0.4176, "step": 7034 }, { "epoch": 0.20726838825626445, "grad_norm": 1.5583166544642764, "learning_rate": 9.653697454869184e-06, "loss": 0.4385, "step": 7035 }, { "epoch": 0.20729785071372803, "grad_norm": 1.5160277188086275, "learning_rate": 9.653509381611588e-06, "loss": 0.5239, "step": 7036 }, { "epoch": 0.2073273131711916, "grad_norm": 1.7218132807837816, "learning_rate": 9.653321259130588e-06, "loss": 0.567, "step": 7037 }, { "epoch": 0.20735677562865518, "grad_norm": 1.557414122905985, "learning_rate": 9.653133087428174e-06, "loss": 0.3275, "step": 7038 }, { "epoch": 0.20738623808611875, "grad_norm": 1.4737620744547235, "learning_rate": 9.652944866506337e-06, "loss": 0.5151, "step": 7039 }, { "epoch": 0.20741570054358233, "grad_norm": 1.5824388126597697, "learning_rate": 9.652756596367065e-06, "loss": 0.4444, "step": 7040 }, { "epoch": 0.20744516300104593, "grad_norm": 1.5019137612892535, "learning_rate": 9.652568277012353e-06, "loss": 0.527, "step": 7041 }, { "epoch": 0.2074746254585095, "grad_norm": 1.519940489138681, "learning_rate": 9.652379908444193e-06, "loss": 0.4401, "step": 7042 }, { "epoch": 0.20750408791597308, "grad_norm": 1.5246530114151495, "learning_rate": 9.652191490664576e-06, "loss": 0.5289, "step": 7043 }, { "epoch": 0.20753355037343665, "grad_norm": 1.5057669571274683, "learning_rate": 9.652003023675494e-06, "loss": 0.4177, "step": 7044 }, { "epoch": 0.20756301283090023, "grad_norm": 1.82550834027728, "learning_rate": 9.651814507478941e-06, "loss": 0.6003, "step": 7045 }, { "epoch": 0.2075924752883638, "grad_norm": 1.6558658123300447, "learning_rate": 9.651625942076913e-06, "loss": 0.5909, "step": 7046 }, { "epoch": 0.20762193774582738, "grad_norm": 1.610949174724384, "learning_rate": 9.651437327471403e-06, "loss": 0.5447, "step": 7047 }, { "epoch": 0.20765140020329095, "grad_norm": 1.4499790599562055, "learning_rate": 9.651248663664407e-06, "loss": 0.3759, "step": 7048 }, { "epoch": 0.20768086266075453, "grad_norm": 1.731286757175122, "learning_rate": 9.651059950657919e-06, "loss": 0.5966, "step": 7049 }, { "epoch": 0.2077103251182181, "grad_norm": 1.560956603840138, "learning_rate": 9.650871188453938e-06, "loss": 0.5454, "step": 7050 }, { "epoch": 0.20773978757568168, "grad_norm": 1.5929388850884871, "learning_rate": 9.65068237705446e-06, "loss": 0.514, "step": 7051 }, { "epoch": 0.20776925003314525, "grad_norm": 1.7839156163205065, "learning_rate": 9.650493516461478e-06, "loss": 0.5645, "step": 7052 }, { "epoch": 0.20779871249060883, "grad_norm": 1.5567664303832394, "learning_rate": 9.650304606676995e-06, "loss": 0.5686, "step": 7053 }, { "epoch": 0.20782817494807243, "grad_norm": 1.5831429485683461, "learning_rate": 9.650115647703005e-06, "loss": 0.4716, "step": 7054 }, { "epoch": 0.207857637405536, "grad_norm": 1.7654135851761206, "learning_rate": 9.64992663954151e-06, "loss": 0.4702, "step": 7055 }, { "epoch": 0.20788709986299958, "grad_norm": 1.518043361217195, "learning_rate": 9.64973758219451e-06, "loss": 0.4876, "step": 7056 }, { "epoch": 0.20791656232046316, "grad_norm": 1.6532007111593359, "learning_rate": 9.649548475664e-06, "loss": 0.4888, "step": 7057 }, { "epoch": 0.20794602477792673, "grad_norm": 1.4859518984985542, "learning_rate": 9.649359319951984e-06, "loss": 0.5136, "step": 7058 }, { "epoch": 0.2079754872353903, "grad_norm": 1.5728702726671733, "learning_rate": 9.649170115060463e-06, "loss": 0.5974, "step": 7059 }, { "epoch": 0.20800494969285388, "grad_norm": 1.6995887638726186, "learning_rate": 9.648980860991438e-06, "loss": 0.3829, "step": 7060 }, { "epoch": 0.20803441215031745, "grad_norm": 1.6955491866277284, "learning_rate": 9.648791557746908e-06, "loss": 0.5041, "step": 7061 }, { "epoch": 0.20806387460778103, "grad_norm": 1.7325584750658871, "learning_rate": 9.648602205328879e-06, "loss": 0.6199, "step": 7062 }, { "epoch": 0.2080933370652446, "grad_norm": 1.5976227576143802, "learning_rate": 9.648412803739352e-06, "loss": 0.4818, "step": 7063 }, { "epoch": 0.20812279952270818, "grad_norm": 1.6038798509679855, "learning_rate": 9.648223352980332e-06, "loss": 0.4284, "step": 7064 }, { "epoch": 0.20815226198017175, "grad_norm": 1.5826495576413413, "learning_rate": 9.64803385305382e-06, "loss": 0.4799, "step": 7065 }, { "epoch": 0.20818172443763533, "grad_norm": 1.435681825777552, "learning_rate": 9.647844303961823e-06, "loss": 0.4789, "step": 7066 }, { "epoch": 0.20821118689509893, "grad_norm": 1.484078879433954, "learning_rate": 9.647654705706349e-06, "loss": 0.4865, "step": 7067 }, { "epoch": 0.2082406493525625, "grad_norm": 1.5391729634600932, "learning_rate": 9.647465058289395e-06, "loss": 0.5279, "step": 7068 }, { "epoch": 0.20827011181002608, "grad_norm": 1.5829203020571774, "learning_rate": 9.647275361712975e-06, "loss": 0.485, "step": 7069 }, { "epoch": 0.20829957426748966, "grad_norm": 1.6562752729371584, "learning_rate": 9.647085615979093e-06, "loss": 0.4966, "step": 7070 }, { "epoch": 0.20832903672495323, "grad_norm": 1.7292084194979926, "learning_rate": 9.646895821089753e-06, "loss": 0.5575, "step": 7071 }, { "epoch": 0.2083584991824168, "grad_norm": 1.6903176778956774, "learning_rate": 9.646705977046966e-06, "loss": 0.408, "step": 7072 }, { "epoch": 0.20838796163988038, "grad_norm": 1.5786936801206215, "learning_rate": 9.64651608385274e-06, "loss": 0.525, "step": 7073 }, { "epoch": 0.20841742409734396, "grad_norm": 1.7498557556728451, "learning_rate": 9.646326141509084e-06, "loss": 0.637, "step": 7074 }, { "epoch": 0.20844688655480753, "grad_norm": 1.5174877686458772, "learning_rate": 9.646136150018004e-06, "loss": 0.4923, "step": 7075 }, { "epoch": 0.2084763490122711, "grad_norm": 1.6806032205945056, "learning_rate": 9.645946109381513e-06, "loss": 0.5807, "step": 7076 }, { "epoch": 0.20850581146973468, "grad_norm": 1.7206733066436355, "learning_rate": 9.64575601960162e-06, "loss": 0.6485, "step": 7077 }, { "epoch": 0.20853527392719826, "grad_norm": 1.625904654175273, "learning_rate": 9.645565880680334e-06, "loss": 0.5696, "step": 7078 }, { "epoch": 0.20856473638466186, "grad_norm": 1.5213956064517753, "learning_rate": 9.64537569261967e-06, "loss": 0.4597, "step": 7079 }, { "epoch": 0.20859419884212543, "grad_norm": 1.5992514457031641, "learning_rate": 9.645185455421636e-06, "loss": 0.5909, "step": 7080 }, { "epoch": 0.208623661299589, "grad_norm": 1.542324939669579, "learning_rate": 9.644995169088247e-06, "loss": 0.5032, "step": 7081 }, { "epoch": 0.20865312375705258, "grad_norm": 1.3826160038072244, "learning_rate": 9.644804833621514e-06, "loss": 0.3352, "step": 7082 }, { "epoch": 0.20868258621451616, "grad_norm": 1.5795956191513185, "learning_rate": 9.64461444902345e-06, "loss": 0.4184, "step": 7083 }, { "epoch": 0.20871204867197973, "grad_norm": 1.6227061068472488, "learning_rate": 9.644424015296073e-06, "loss": 0.5764, "step": 7084 }, { "epoch": 0.2087415111294433, "grad_norm": 1.3993691897153444, "learning_rate": 9.644233532441392e-06, "loss": 0.4104, "step": 7085 }, { "epoch": 0.20877097358690688, "grad_norm": 1.6973801713419794, "learning_rate": 9.644043000461425e-06, "loss": 0.4506, "step": 7086 }, { "epoch": 0.20880043604437046, "grad_norm": 1.5897184515089815, "learning_rate": 9.643852419358187e-06, "loss": 0.5294, "step": 7087 }, { "epoch": 0.20882989850183403, "grad_norm": 1.4323378076492346, "learning_rate": 9.643661789133692e-06, "loss": 0.5889, "step": 7088 }, { "epoch": 0.2088593609592976, "grad_norm": 1.6439889034754585, "learning_rate": 9.643471109789957e-06, "loss": 0.5343, "step": 7089 }, { "epoch": 0.20888882341676118, "grad_norm": 1.613623074471751, "learning_rate": 9.643280381329002e-06, "loss": 0.6047, "step": 7090 }, { "epoch": 0.20891828587422476, "grad_norm": 1.6396224055214474, "learning_rate": 9.64308960375284e-06, "loss": 0.6509, "step": 7091 }, { "epoch": 0.20894774833168836, "grad_norm": 1.5185134242373284, "learning_rate": 9.642898777063492e-06, "loss": 0.5495, "step": 7092 }, { "epoch": 0.20897721078915193, "grad_norm": 1.5297324047756935, "learning_rate": 9.642707901262976e-06, "loss": 0.5133, "step": 7093 }, { "epoch": 0.2090066732466155, "grad_norm": 1.7186338902696432, "learning_rate": 9.642516976353311e-06, "loss": 0.6103, "step": 7094 }, { "epoch": 0.20903613570407908, "grad_norm": 1.4555763318349935, "learning_rate": 9.642326002336515e-06, "loss": 0.5536, "step": 7095 }, { "epoch": 0.20906559816154266, "grad_norm": 1.6936856906537516, "learning_rate": 9.64213497921461e-06, "loss": 0.6003, "step": 7096 }, { "epoch": 0.20909506061900623, "grad_norm": 1.51257575461895, "learning_rate": 9.641943906989616e-06, "loss": 0.5471, "step": 7097 }, { "epoch": 0.2091245230764698, "grad_norm": 1.6255081854643316, "learning_rate": 9.641752785663557e-06, "loss": 0.5751, "step": 7098 }, { "epoch": 0.20915398553393338, "grad_norm": 1.7083413685015023, "learning_rate": 9.641561615238449e-06, "loss": 0.4985, "step": 7099 }, { "epoch": 0.20918344799139696, "grad_norm": 1.6584594851542862, "learning_rate": 9.641370395716315e-06, "loss": 0.5513, "step": 7100 }, { "epoch": 0.20921291044886053, "grad_norm": 1.675533874225458, "learning_rate": 9.641179127099182e-06, "loss": 0.4823, "step": 7101 }, { "epoch": 0.2092423729063241, "grad_norm": 1.379898106398813, "learning_rate": 9.64098780938907e-06, "loss": 0.4005, "step": 7102 }, { "epoch": 0.20927183536378768, "grad_norm": 1.7300076299653653, "learning_rate": 9.640796442588003e-06, "loss": 0.6292, "step": 7103 }, { "epoch": 0.20930129782125126, "grad_norm": 1.5825807441937219, "learning_rate": 9.640605026698005e-06, "loss": 0.5753, "step": 7104 }, { "epoch": 0.20933076027871486, "grad_norm": 1.78457769730939, "learning_rate": 9.640413561721102e-06, "loss": 0.4668, "step": 7105 }, { "epoch": 0.20936022273617844, "grad_norm": 1.805043093742159, "learning_rate": 9.640222047659318e-06, "loss": 0.6545, "step": 7106 }, { "epoch": 0.209389685193642, "grad_norm": 1.4274808751934482, "learning_rate": 9.64003048451468e-06, "loss": 0.3933, "step": 7107 }, { "epoch": 0.20941914765110559, "grad_norm": 1.5576269920889467, "learning_rate": 9.639838872289214e-06, "loss": 0.5173, "step": 7108 }, { "epoch": 0.20944861010856916, "grad_norm": 1.6609712562054397, "learning_rate": 9.639647210984945e-06, "loss": 0.591, "step": 7109 }, { "epoch": 0.20947807256603274, "grad_norm": 1.5984476199835247, "learning_rate": 9.639455500603905e-06, "loss": 0.4402, "step": 7110 }, { "epoch": 0.2095075350234963, "grad_norm": 1.560996565108184, "learning_rate": 9.639263741148115e-06, "loss": 0.569, "step": 7111 }, { "epoch": 0.20953699748095989, "grad_norm": 1.5495603890681187, "learning_rate": 9.639071932619609e-06, "loss": 0.496, "step": 7112 }, { "epoch": 0.20956645993842346, "grad_norm": 1.7422217020264517, "learning_rate": 9.638880075020414e-06, "loss": 0.5079, "step": 7113 }, { "epoch": 0.20959592239588704, "grad_norm": 1.6286148910495724, "learning_rate": 9.638688168352557e-06, "loss": 0.6451, "step": 7114 }, { "epoch": 0.2096253848533506, "grad_norm": 1.6703922734956118, "learning_rate": 9.638496212618072e-06, "loss": 0.4663, "step": 7115 }, { "epoch": 0.20965484731081419, "grad_norm": 1.539080940581385, "learning_rate": 9.638304207818988e-06, "loss": 0.4531, "step": 7116 }, { "epoch": 0.20968430976827776, "grad_norm": 1.5147906752173794, "learning_rate": 9.638112153957335e-06, "loss": 0.4545, "step": 7117 }, { "epoch": 0.20971377222574136, "grad_norm": 1.7208565703332022, "learning_rate": 9.637920051035146e-06, "loss": 0.5145, "step": 7118 }, { "epoch": 0.20974323468320494, "grad_norm": 1.6752916467388117, "learning_rate": 9.637727899054451e-06, "loss": 0.4727, "step": 7119 }, { "epoch": 0.2097726971406685, "grad_norm": 1.7677365346876406, "learning_rate": 9.637535698017285e-06, "loss": 0.6099, "step": 7120 }, { "epoch": 0.2098021595981321, "grad_norm": 1.5605614601827396, "learning_rate": 9.637343447925677e-06, "loss": 0.3904, "step": 7121 }, { "epoch": 0.20983162205559566, "grad_norm": 1.6093461892799223, "learning_rate": 9.637151148781667e-06, "loss": 0.4164, "step": 7122 }, { "epoch": 0.20986108451305924, "grad_norm": 1.6928078947251017, "learning_rate": 9.636958800587284e-06, "loss": 0.4881, "step": 7123 }, { "epoch": 0.2098905469705228, "grad_norm": 1.6679766018299254, "learning_rate": 9.636766403344561e-06, "loss": 0.5111, "step": 7124 }, { "epoch": 0.2099200094279864, "grad_norm": 1.5924964983609633, "learning_rate": 9.636573957055539e-06, "loss": 0.5423, "step": 7125 }, { "epoch": 0.20994947188544996, "grad_norm": 1.6789081061256192, "learning_rate": 9.636381461722252e-06, "loss": 0.571, "step": 7126 }, { "epoch": 0.20997893434291354, "grad_norm": 1.8020064630736563, "learning_rate": 9.636188917346733e-06, "loss": 0.4469, "step": 7127 }, { "epoch": 0.2100083968003771, "grad_norm": 1.8830684637534456, "learning_rate": 9.635996323931021e-06, "loss": 0.5566, "step": 7128 }, { "epoch": 0.2100378592578407, "grad_norm": 1.4794195521552014, "learning_rate": 9.635803681477151e-06, "loss": 0.4255, "step": 7129 }, { "epoch": 0.21006732171530426, "grad_norm": 1.621959287072127, "learning_rate": 9.635610989987166e-06, "loss": 0.4628, "step": 7130 }, { "epoch": 0.21009678417276786, "grad_norm": 1.5783303483338427, "learning_rate": 9.635418249463097e-06, "loss": 0.4726, "step": 7131 }, { "epoch": 0.21012624663023144, "grad_norm": 1.7571130484757693, "learning_rate": 9.63522545990699e-06, "loss": 0.6386, "step": 7132 }, { "epoch": 0.21015570908769501, "grad_norm": 1.620177026694157, "learning_rate": 9.635032621320877e-06, "loss": 0.4777, "step": 7133 }, { "epoch": 0.2101851715451586, "grad_norm": 1.7372474291391728, "learning_rate": 9.634839733706805e-06, "loss": 0.497, "step": 7134 }, { "epoch": 0.21021463400262216, "grad_norm": 1.7109459143838976, "learning_rate": 9.63464679706681e-06, "loss": 0.7102, "step": 7135 }, { "epoch": 0.21024409646008574, "grad_norm": 1.5214382782519658, "learning_rate": 9.63445381140293e-06, "loss": 0.4837, "step": 7136 }, { "epoch": 0.2102735589175493, "grad_norm": 1.586376923907813, "learning_rate": 9.634260776717214e-06, "loss": 0.417, "step": 7137 }, { "epoch": 0.2103030213750129, "grad_norm": 1.4584775958314082, "learning_rate": 9.634067693011698e-06, "loss": 0.4828, "step": 7138 }, { "epoch": 0.21033248383247646, "grad_norm": 1.8118927948033225, "learning_rate": 9.633874560288427e-06, "loss": 0.4194, "step": 7139 }, { "epoch": 0.21036194628994004, "grad_norm": 1.63363005279818, "learning_rate": 9.633681378549443e-06, "loss": 0.5893, "step": 7140 }, { "epoch": 0.2103914087474036, "grad_norm": 1.5247114453486341, "learning_rate": 9.63348814779679e-06, "loss": 0.5577, "step": 7141 }, { "epoch": 0.2104208712048672, "grad_norm": 1.6896595628820972, "learning_rate": 9.63329486803251e-06, "loss": 0.5022, "step": 7142 }, { "epoch": 0.21045033366233076, "grad_norm": 1.4555716356328567, "learning_rate": 9.63310153925865e-06, "loss": 0.5467, "step": 7143 }, { "epoch": 0.21047979611979437, "grad_norm": 1.6245599022444057, "learning_rate": 9.632908161477254e-06, "loss": 0.4114, "step": 7144 }, { "epoch": 0.21050925857725794, "grad_norm": 1.455694193195926, "learning_rate": 9.632714734690368e-06, "loss": 0.4443, "step": 7145 }, { "epoch": 0.21053872103472152, "grad_norm": 1.584763761630807, "learning_rate": 9.632521258900036e-06, "loss": 0.4761, "step": 7146 }, { "epoch": 0.2105681834921851, "grad_norm": 1.715098548802994, "learning_rate": 9.632327734108307e-06, "loss": 0.5747, "step": 7147 }, { "epoch": 0.21059764594964867, "grad_norm": 1.7004657572352904, "learning_rate": 9.632134160317226e-06, "loss": 0.3639, "step": 7148 }, { "epoch": 0.21062710840711224, "grad_norm": 1.5067629791213546, "learning_rate": 9.631940537528843e-06, "loss": 0.546, "step": 7149 }, { "epoch": 0.21065657086457582, "grad_norm": 1.852977542122055, "learning_rate": 9.631746865745203e-06, "loss": 0.5581, "step": 7150 }, { "epoch": 0.2106860333220394, "grad_norm": 1.7359371633263776, "learning_rate": 9.631553144968358e-06, "loss": 0.4601, "step": 7151 }, { "epoch": 0.21071549577950296, "grad_norm": 1.6074204883447323, "learning_rate": 9.631359375200352e-06, "loss": 0.5034, "step": 7152 }, { "epoch": 0.21074495823696654, "grad_norm": 1.3649255874596375, "learning_rate": 9.63116555644324e-06, "loss": 0.3987, "step": 7153 }, { "epoch": 0.21077442069443011, "grad_norm": 1.6296376853258183, "learning_rate": 9.630971688699071e-06, "loss": 0.5613, "step": 7154 }, { "epoch": 0.2108038831518937, "grad_norm": 1.400797120240424, "learning_rate": 9.630777771969895e-06, "loss": 0.378, "step": 7155 }, { "epoch": 0.21083334560935726, "grad_norm": 1.6100989912082755, "learning_rate": 9.630583806257761e-06, "loss": 0.5063, "step": 7156 }, { "epoch": 0.21086280806682087, "grad_norm": 1.57212547069578, "learning_rate": 9.630389791564725e-06, "loss": 0.5821, "step": 7157 }, { "epoch": 0.21089227052428444, "grad_norm": 1.5950167618589153, "learning_rate": 9.630195727892835e-06, "loss": 0.3997, "step": 7158 }, { "epoch": 0.21092173298174802, "grad_norm": 1.6307329993688962, "learning_rate": 9.630001615244147e-06, "loss": 0.5643, "step": 7159 }, { "epoch": 0.2109511954392116, "grad_norm": 1.4532761639883516, "learning_rate": 9.629807453620712e-06, "loss": 0.2832, "step": 7160 }, { "epoch": 0.21098065789667517, "grad_norm": 1.760067727295501, "learning_rate": 9.629613243024585e-06, "loss": 0.5052, "step": 7161 }, { "epoch": 0.21101012035413874, "grad_norm": 1.7214664768572974, "learning_rate": 9.629418983457821e-06, "loss": 0.6714, "step": 7162 }, { "epoch": 0.21103958281160232, "grad_norm": 1.4801506849818151, "learning_rate": 9.629224674922473e-06, "loss": 0.4982, "step": 7163 }, { "epoch": 0.2110690452690659, "grad_norm": 1.6746433315772897, "learning_rate": 9.629030317420599e-06, "loss": 0.4767, "step": 7164 }, { "epoch": 0.21109850772652947, "grad_norm": 1.6127484757165087, "learning_rate": 9.628835910954249e-06, "loss": 0.4546, "step": 7165 }, { "epoch": 0.21112797018399304, "grad_norm": 1.5569419183358604, "learning_rate": 9.628641455525486e-06, "loss": 0.558, "step": 7166 }, { "epoch": 0.21115743264145662, "grad_norm": 1.5650045848752858, "learning_rate": 9.628446951136364e-06, "loss": 0.44, "step": 7167 }, { "epoch": 0.2111868950989202, "grad_norm": 1.642366718970551, "learning_rate": 9.628252397788942e-06, "loss": 0.5054, "step": 7168 }, { "epoch": 0.21121635755638377, "grad_norm": 1.8901950195808388, "learning_rate": 9.628057795485274e-06, "loss": 0.639, "step": 7169 }, { "epoch": 0.21124582001384737, "grad_norm": 1.4923901203788403, "learning_rate": 9.627863144227422e-06, "loss": 0.5142, "step": 7170 }, { "epoch": 0.21127528247131094, "grad_norm": 1.6456747686273245, "learning_rate": 9.627668444017446e-06, "loss": 0.5947, "step": 7171 }, { "epoch": 0.21130474492877452, "grad_norm": 1.6938634593045745, "learning_rate": 9.627473694857402e-06, "loss": 0.4269, "step": 7172 }, { "epoch": 0.2113342073862381, "grad_norm": 1.47834667767409, "learning_rate": 9.627278896749351e-06, "loss": 0.31, "step": 7173 }, { "epoch": 0.21136366984370167, "grad_norm": 1.5435527608607076, "learning_rate": 9.627084049695355e-06, "loss": 0.487, "step": 7174 }, { "epoch": 0.21139313230116524, "grad_norm": 1.6250092366384938, "learning_rate": 9.626889153697473e-06, "loss": 0.5413, "step": 7175 }, { "epoch": 0.21142259475862882, "grad_norm": 1.5287841248272624, "learning_rate": 9.626694208757769e-06, "loss": 0.4194, "step": 7176 }, { "epoch": 0.2114520572160924, "grad_norm": 1.6236107972101466, "learning_rate": 9.626499214878302e-06, "loss": 0.5429, "step": 7177 }, { "epoch": 0.21148151967355597, "grad_norm": 1.745182286842959, "learning_rate": 9.626304172061139e-06, "loss": 0.541, "step": 7178 }, { "epoch": 0.21151098213101954, "grad_norm": 1.5521317863782815, "learning_rate": 9.626109080308338e-06, "loss": 0.5444, "step": 7179 }, { "epoch": 0.21154044458848312, "grad_norm": 1.5416161769631254, "learning_rate": 9.625913939621966e-06, "loss": 0.5647, "step": 7180 }, { "epoch": 0.2115699070459467, "grad_norm": 1.94702544515745, "learning_rate": 9.625718750004086e-06, "loss": 0.726, "step": 7181 }, { "epoch": 0.21159936950341027, "grad_norm": 1.7059079957121146, "learning_rate": 9.625523511456763e-06, "loss": 0.5859, "step": 7182 }, { "epoch": 0.21162883196087387, "grad_norm": 1.4598233391829771, "learning_rate": 9.62532822398206e-06, "loss": 0.503, "step": 7183 }, { "epoch": 0.21165829441833744, "grad_norm": 1.825951368365969, "learning_rate": 9.625132887582047e-06, "loss": 0.4883, "step": 7184 }, { "epoch": 0.21168775687580102, "grad_norm": 1.6613436784682127, "learning_rate": 9.624937502258787e-06, "loss": 0.4921, "step": 7185 }, { "epoch": 0.2117172193332646, "grad_norm": 1.5944637664823929, "learning_rate": 9.624742068014348e-06, "loss": 0.5343, "step": 7186 }, { "epoch": 0.21174668179072817, "grad_norm": 1.663495416026513, "learning_rate": 9.624546584850795e-06, "loss": 0.5769, "step": 7187 }, { "epoch": 0.21177614424819174, "grad_norm": 1.9625803789583887, "learning_rate": 9.6243510527702e-06, "loss": 0.6772, "step": 7188 }, { "epoch": 0.21180560670565532, "grad_norm": 1.6848958889523236, "learning_rate": 9.624155471774628e-06, "loss": 0.5613, "step": 7189 }, { "epoch": 0.2118350691631189, "grad_norm": 1.726641451270923, "learning_rate": 9.623959841866149e-06, "loss": 0.6129, "step": 7190 }, { "epoch": 0.21186453162058247, "grad_norm": 1.7191567882170258, "learning_rate": 9.623764163046831e-06, "loss": 0.6545, "step": 7191 }, { "epoch": 0.21189399407804604, "grad_norm": 1.6852215575828724, "learning_rate": 9.623568435318746e-06, "loss": 0.5153, "step": 7192 }, { "epoch": 0.21192345653550962, "grad_norm": 1.561044627108142, "learning_rate": 9.623372658683962e-06, "loss": 0.5886, "step": 7193 }, { "epoch": 0.2119529189929732, "grad_norm": 1.3569736430734898, "learning_rate": 9.623176833144552e-06, "loss": 0.4262, "step": 7194 }, { "epoch": 0.21198238145043677, "grad_norm": 1.5673586968288182, "learning_rate": 9.622980958702586e-06, "loss": 0.4497, "step": 7195 }, { "epoch": 0.21201184390790037, "grad_norm": 1.68704232228953, "learning_rate": 9.622785035360135e-06, "loss": 0.3863, "step": 7196 }, { "epoch": 0.21204130636536395, "grad_norm": 1.6120831538060305, "learning_rate": 9.622589063119276e-06, "loss": 0.5912, "step": 7197 }, { "epoch": 0.21207076882282752, "grad_norm": 1.5324340999961175, "learning_rate": 9.622393041982077e-06, "loss": 0.4968, "step": 7198 }, { "epoch": 0.2121002312802911, "grad_norm": 1.900707982567308, "learning_rate": 9.622196971950613e-06, "loss": 0.5736, "step": 7199 }, { "epoch": 0.21212969373775467, "grad_norm": 1.4156162490245638, "learning_rate": 9.622000853026958e-06, "loss": 0.5071, "step": 7200 }, { "epoch": 0.21215915619521825, "grad_norm": 1.712389501001298, "learning_rate": 9.621804685213187e-06, "loss": 0.4968, "step": 7201 }, { "epoch": 0.21218861865268182, "grad_norm": 1.6739153177529034, "learning_rate": 9.621608468511373e-06, "loss": 0.4351, "step": 7202 }, { "epoch": 0.2122180811101454, "grad_norm": 1.8134235053142693, "learning_rate": 9.621412202923595e-06, "loss": 0.6057, "step": 7203 }, { "epoch": 0.21224754356760897, "grad_norm": 1.6624676095121893, "learning_rate": 9.621215888451926e-06, "loss": 0.4935, "step": 7204 }, { "epoch": 0.21227700602507255, "grad_norm": 1.8026164574024666, "learning_rate": 9.621019525098445e-06, "loss": 0.5836, "step": 7205 }, { "epoch": 0.21230646848253612, "grad_norm": 1.8084821083148832, "learning_rate": 9.620823112865227e-06, "loss": 0.525, "step": 7206 }, { "epoch": 0.2123359309399997, "grad_norm": 1.7171831804333464, "learning_rate": 9.62062665175435e-06, "loss": 0.7286, "step": 7207 }, { "epoch": 0.21236539339746327, "grad_norm": 1.7281383564265038, "learning_rate": 9.620430141767893e-06, "loss": 0.6301, "step": 7208 }, { "epoch": 0.21239485585492687, "grad_norm": 1.4887187384374747, "learning_rate": 9.620233582907934e-06, "loss": 0.3542, "step": 7209 }, { "epoch": 0.21242431831239045, "grad_norm": 1.569496969231848, "learning_rate": 9.620036975176552e-06, "loss": 0.5366, "step": 7210 }, { "epoch": 0.21245378076985402, "grad_norm": 1.5466110724044628, "learning_rate": 9.619840318575825e-06, "loss": 0.5136, "step": 7211 }, { "epoch": 0.2124832432273176, "grad_norm": 1.5846805799076702, "learning_rate": 9.619643613107839e-06, "loss": 0.5953, "step": 7212 }, { "epoch": 0.21251270568478117, "grad_norm": 1.680802145500404, "learning_rate": 9.619446858774668e-06, "loss": 0.639, "step": 7213 }, { "epoch": 0.21254216814224475, "grad_norm": 2.0395023727899004, "learning_rate": 9.619250055578397e-06, "loss": 0.6246, "step": 7214 }, { "epoch": 0.21257163059970832, "grad_norm": 1.4281548582501182, "learning_rate": 9.619053203521106e-06, "loss": 0.3942, "step": 7215 }, { "epoch": 0.2126010930571719, "grad_norm": 1.6261804509827025, "learning_rate": 9.618856302604877e-06, "loss": 0.5055, "step": 7216 }, { "epoch": 0.21263055551463547, "grad_norm": 1.4349809810942729, "learning_rate": 9.618659352831796e-06, "loss": 0.3527, "step": 7217 }, { "epoch": 0.21266001797209905, "grad_norm": 1.6167510815467838, "learning_rate": 9.618462354203944e-06, "loss": 0.4901, "step": 7218 }, { "epoch": 0.21268948042956262, "grad_norm": 1.747772543463389, "learning_rate": 9.618265306723402e-06, "loss": 0.463, "step": 7219 }, { "epoch": 0.2127189428870262, "grad_norm": 1.796321155691399, "learning_rate": 9.61806821039226e-06, "loss": 0.5862, "step": 7220 }, { "epoch": 0.21274840534448977, "grad_norm": 1.5676791149832623, "learning_rate": 9.617871065212598e-06, "loss": 0.3804, "step": 7221 }, { "epoch": 0.21277786780195337, "grad_norm": 1.5593763557882083, "learning_rate": 9.617673871186504e-06, "loss": 0.3796, "step": 7222 }, { "epoch": 0.21280733025941695, "grad_norm": 1.584084057749642, "learning_rate": 9.617476628316062e-06, "loss": 0.4944, "step": 7223 }, { "epoch": 0.21283679271688052, "grad_norm": 1.5231328803829818, "learning_rate": 9.617279336603361e-06, "loss": 0.4333, "step": 7224 }, { "epoch": 0.2128662551743441, "grad_norm": 1.7952990452766657, "learning_rate": 9.617081996050486e-06, "loss": 0.6362, "step": 7225 }, { "epoch": 0.21289571763180767, "grad_norm": 1.7550480847097396, "learning_rate": 9.616884606659524e-06, "loss": 0.6041, "step": 7226 }, { "epoch": 0.21292518008927125, "grad_norm": 1.570367625684703, "learning_rate": 9.616687168432564e-06, "loss": 0.4976, "step": 7227 }, { "epoch": 0.21295464254673482, "grad_norm": 1.5661744514347762, "learning_rate": 9.616489681371694e-06, "loss": 0.538, "step": 7228 }, { "epoch": 0.2129841050041984, "grad_norm": 1.7715767258672048, "learning_rate": 9.616292145479003e-06, "loss": 0.432, "step": 7229 }, { "epoch": 0.21301356746166197, "grad_norm": 1.6220667032905933, "learning_rate": 9.616094560756582e-06, "loss": 0.5124, "step": 7230 }, { "epoch": 0.21304302991912555, "grad_norm": 1.5764927907263873, "learning_rate": 9.615896927206516e-06, "loss": 0.3578, "step": 7231 }, { "epoch": 0.21307249237658912, "grad_norm": 1.834768626000713, "learning_rate": 9.615699244830903e-06, "loss": 0.5605, "step": 7232 }, { "epoch": 0.2131019548340527, "grad_norm": 1.7530467938535492, "learning_rate": 9.615501513631827e-06, "loss": 0.6224, "step": 7233 }, { "epoch": 0.21313141729151627, "grad_norm": 1.6098308195366984, "learning_rate": 9.615303733611386e-06, "loss": 0.4499, "step": 7234 }, { "epoch": 0.21316087974897988, "grad_norm": 1.758587402969385, "learning_rate": 9.615105904771666e-06, "loss": 0.4296, "step": 7235 }, { "epoch": 0.21319034220644345, "grad_norm": 1.8589093630032363, "learning_rate": 9.614908027114762e-06, "loss": 0.48, "step": 7236 }, { "epoch": 0.21321980466390703, "grad_norm": 1.5886110688422244, "learning_rate": 9.614710100642768e-06, "loss": 0.5675, "step": 7237 }, { "epoch": 0.2132492671213706, "grad_norm": 1.794159742442526, "learning_rate": 9.614512125357778e-06, "loss": 0.5961, "step": 7238 }, { "epoch": 0.21327872957883418, "grad_norm": 1.8831491937881926, "learning_rate": 9.614314101261883e-06, "loss": 0.6862, "step": 7239 }, { "epoch": 0.21330819203629775, "grad_norm": 1.5071207311978938, "learning_rate": 9.614116028357181e-06, "loss": 0.4424, "step": 7240 }, { "epoch": 0.21333765449376133, "grad_norm": 1.6842647050419932, "learning_rate": 9.613917906645767e-06, "loss": 0.4751, "step": 7241 }, { "epoch": 0.2133671169512249, "grad_norm": 1.7782518646978622, "learning_rate": 9.613719736129734e-06, "loss": 0.6722, "step": 7242 }, { "epoch": 0.21339657940868847, "grad_norm": 1.5209812877403377, "learning_rate": 9.613521516811179e-06, "loss": 0.4332, "step": 7243 }, { "epoch": 0.21342604186615205, "grad_norm": 1.704750187288073, "learning_rate": 9.613323248692202e-06, "loss": 0.6252, "step": 7244 }, { "epoch": 0.21345550432361562, "grad_norm": 1.539641325639042, "learning_rate": 9.613124931774896e-06, "loss": 0.5119, "step": 7245 }, { "epoch": 0.2134849667810792, "grad_norm": 1.6297482816594733, "learning_rate": 9.61292656606136e-06, "loss": 0.5096, "step": 7246 }, { "epoch": 0.21351442923854277, "grad_norm": 1.4286602281452425, "learning_rate": 9.612728151553693e-06, "loss": 0.4155, "step": 7247 }, { "epoch": 0.21354389169600638, "grad_norm": 1.6155293791492176, "learning_rate": 9.612529688253993e-06, "loss": 0.5714, "step": 7248 }, { "epoch": 0.21357335415346995, "grad_norm": 1.9005718692110696, "learning_rate": 9.61233117616436e-06, "loss": 0.7622, "step": 7249 }, { "epoch": 0.21360281661093353, "grad_norm": 1.5466872078372376, "learning_rate": 9.612132615286893e-06, "loss": 0.3885, "step": 7250 }, { "epoch": 0.2136322790683971, "grad_norm": 1.651284966944607, "learning_rate": 9.611934005623694e-06, "loss": 0.4499, "step": 7251 }, { "epoch": 0.21366174152586068, "grad_norm": 1.74740855193956, "learning_rate": 9.611735347176862e-06, "loss": 0.4912, "step": 7252 }, { "epoch": 0.21369120398332425, "grad_norm": 1.6331194948214784, "learning_rate": 9.611536639948499e-06, "loss": 0.5651, "step": 7253 }, { "epoch": 0.21372066644078783, "grad_norm": 2.020684047427475, "learning_rate": 9.611337883940707e-06, "loss": 0.5605, "step": 7254 }, { "epoch": 0.2137501288982514, "grad_norm": 1.5096500339165073, "learning_rate": 9.611139079155587e-06, "loss": 0.5507, "step": 7255 }, { "epoch": 0.21377959135571498, "grad_norm": 1.7616182468493877, "learning_rate": 9.610940225595244e-06, "loss": 0.5438, "step": 7256 }, { "epoch": 0.21380905381317855, "grad_norm": 1.8358783250498183, "learning_rate": 9.610741323261781e-06, "loss": 0.5335, "step": 7257 }, { "epoch": 0.21383851627064213, "grad_norm": 1.4767209506984242, "learning_rate": 9.6105423721573e-06, "loss": 0.4834, "step": 7258 }, { "epoch": 0.2138679787281057, "grad_norm": 1.5816469600087772, "learning_rate": 9.61034337228391e-06, "loss": 0.5334, "step": 7259 }, { "epoch": 0.21389744118556928, "grad_norm": 1.6948192755293803, "learning_rate": 9.610144323643712e-06, "loss": 0.4391, "step": 7260 }, { "epoch": 0.21392690364303288, "grad_norm": 1.7202770950492954, "learning_rate": 9.60994522623881e-06, "loss": 0.5478, "step": 7261 }, { "epoch": 0.21395636610049645, "grad_norm": 1.5102130664099442, "learning_rate": 9.609746080071313e-06, "loss": 0.4703, "step": 7262 }, { "epoch": 0.21398582855796003, "grad_norm": 1.8057789267977231, "learning_rate": 9.609546885143327e-06, "loss": 0.4453, "step": 7263 }, { "epoch": 0.2140152910154236, "grad_norm": 1.5877659978279346, "learning_rate": 9.609347641456958e-06, "loss": 0.5167, "step": 7264 }, { "epoch": 0.21404475347288718, "grad_norm": 1.5082701251178228, "learning_rate": 9.609148349014317e-06, "loss": 0.439, "step": 7265 }, { "epoch": 0.21407421593035075, "grad_norm": 1.6153937995103804, "learning_rate": 9.608949007817507e-06, "loss": 0.5788, "step": 7266 }, { "epoch": 0.21410367838781433, "grad_norm": 1.8551620363727124, "learning_rate": 9.608749617868641e-06, "loss": 0.4667, "step": 7267 }, { "epoch": 0.2141331408452779, "grad_norm": 1.633875923148002, "learning_rate": 9.608550179169825e-06, "loss": 0.5826, "step": 7268 }, { "epoch": 0.21416260330274148, "grad_norm": 1.5442815680092128, "learning_rate": 9.608350691723169e-06, "loss": 0.4539, "step": 7269 }, { "epoch": 0.21419206576020505, "grad_norm": 1.709046374920483, "learning_rate": 9.608151155530785e-06, "loss": 0.6251, "step": 7270 }, { "epoch": 0.21422152821766863, "grad_norm": 1.6766902226836857, "learning_rate": 9.607951570594783e-06, "loss": 0.4648, "step": 7271 }, { "epoch": 0.2142509906751322, "grad_norm": 1.719261931002836, "learning_rate": 9.607751936917272e-06, "loss": 0.5454, "step": 7272 }, { "epoch": 0.21428045313259578, "grad_norm": 1.6469646673340055, "learning_rate": 9.607552254500366e-06, "loss": 0.605, "step": 7273 }, { "epoch": 0.21430991559005938, "grad_norm": 1.6219997466064886, "learning_rate": 9.607352523346177e-06, "loss": 0.5069, "step": 7274 }, { "epoch": 0.21433937804752295, "grad_norm": 1.5325421211013386, "learning_rate": 9.607152743456815e-06, "loss": 0.4372, "step": 7275 }, { "epoch": 0.21436884050498653, "grad_norm": 1.699382834445301, "learning_rate": 9.606952914834398e-06, "loss": 0.4543, "step": 7276 }, { "epoch": 0.2143983029624501, "grad_norm": 1.6895496483831716, "learning_rate": 9.606753037481036e-06, "loss": 0.5832, "step": 7277 }, { "epoch": 0.21442776541991368, "grad_norm": 1.8547974818698174, "learning_rate": 9.606553111398844e-06, "loss": 0.6883, "step": 7278 }, { "epoch": 0.21445722787737725, "grad_norm": 1.7105412723018565, "learning_rate": 9.60635313658994e-06, "loss": 0.5791, "step": 7279 }, { "epoch": 0.21448669033484083, "grad_norm": 1.6907239562919805, "learning_rate": 9.606153113056431e-06, "loss": 0.3866, "step": 7280 }, { "epoch": 0.2145161527923044, "grad_norm": 1.6866702290840343, "learning_rate": 9.605953040800443e-06, "loss": 0.4752, "step": 7281 }, { "epoch": 0.21454561524976798, "grad_norm": 1.7677769271988426, "learning_rate": 9.605752919824085e-06, "loss": 0.4795, "step": 7282 }, { "epoch": 0.21457507770723155, "grad_norm": 1.5463486595051543, "learning_rate": 9.605552750129475e-06, "loss": 0.5512, "step": 7283 }, { "epoch": 0.21460454016469513, "grad_norm": 1.442229900183307, "learning_rate": 9.605352531718734e-06, "loss": 0.4175, "step": 7284 }, { "epoch": 0.2146340026221587, "grad_norm": 1.6745009681921044, "learning_rate": 9.605152264593976e-06, "loss": 0.5745, "step": 7285 }, { "epoch": 0.21466346507962228, "grad_norm": 1.5541470061163556, "learning_rate": 9.604951948757321e-06, "loss": 0.4909, "step": 7286 }, { "epoch": 0.21469292753708588, "grad_norm": 1.7920378147667804, "learning_rate": 9.60475158421089e-06, "loss": 0.3764, "step": 7287 }, { "epoch": 0.21472238999454946, "grad_norm": 1.7978975175648593, "learning_rate": 9.604551170956797e-06, "loss": 0.5546, "step": 7288 }, { "epoch": 0.21475185245201303, "grad_norm": 1.6805602792691168, "learning_rate": 9.604350708997167e-06, "loss": 0.5078, "step": 7289 }, { "epoch": 0.2147813149094766, "grad_norm": 1.3977031871386953, "learning_rate": 9.604150198334117e-06, "loss": 0.4101, "step": 7290 }, { "epoch": 0.21481077736694018, "grad_norm": 1.5546580307634663, "learning_rate": 9.60394963896977e-06, "loss": 0.5283, "step": 7291 }, { "epoch": 0.21484023982440376, "grad_norm": 1.6397484992533007, "learning_rate": 9.603749030906247e-06, "loss": 0.5135, "step": 7292 }, { "epoch": 0.21486970228186733, "grad_norm": 1.5182291120831357, "learning_rate": 9.603548374145671e-06, "loss": 0.4845, "step": 7293 }, { "epoch": 0.2148991647393309, "grad_norm": 1.7634039056645463, "learning_rate": 9.60334766869016e-06, "loss": 0.6181, "step": 7294 }, { "epoch": 0.21492862719679448, "grad_norm": 1.5125526274124235, "learning_rate": 9.603146914541843e-06, "loss": 0.5777, "step": 7295 }, { "epoch": 0.21495808965425806, "grad_norm": 1.67429292132466, "learning_rate": 9.60294611170284e-06, "loss": 0.4595, "step": 7296 }, { "epoch": 0.21498755211172163, "grad_norm": 1.6809470712714134, "learning_rate": 9.602745260175276e-06, "loss": 0.4371, "step": 7297 }, { "epoch": 0.2150170145691852, "grad_norm": 1.600403554311782, "learning_rate": 9.602544359961274e-06, "loss": 0.5061, "step": 7298 }, { "epoch": 0.21504647702664878, "grad_norm": 1.709417353170724, "learning_rate": 9.602343411062962e-06, "loss": 0.4932, "step": 7299 }, { "epoch": 0.21507593948411238, "grad_norm": 1.5348499938048157, "learning_rate": 9.602142413482464e-06, "loss": 0.4466, "step": 7300 }, { "epoch": 0.21510540194157596, "grad_norm": 1.790750333231616, "learning_rate": 9.601941367221906e-06, "loss": 0.4882, "step": 7301 }, { "epoch": 0.21513486439903953, "grad_norm": 1.5416571141766477, "learning_rate": 9.601740272283414e-06, "loss": 0.4349, "step": 7302 }, { "epoch": 0.2151643268565031, "grad_norm": 1.582515599138773, "learning_rate": 9.601539128669117e-06, "loss": 0.5387, "step": 7303 }, { "epoch": 0.21519378931396668, "grad_norm": 1.6385442053252606, "learning_rate": 9.60133793638114e-06, "loss": 0.5981, "step": 7304 }, { "epoch": 0.21522325177143026, "grad_norm": 1.758418625074345, "learning_rate": 9.601136695421614e-06, "loss": 0.3934, "step": 7305 }, { "epoch": 0.21525271422889383, "grad_norm": 1.5897394493881685, "learning_rate": 9.600935405792664e-06, "loss": 0.3992, "step": 7306 }, { "epoch": 0.2152821766863574, "grad_norm": 1.712309090654974, "learning_rate": 9.600734067496424e-06, "loss": 0.4758, "step": 7307 }, { "epoch": 0.21531163914382098, "grad_norm": 1.5044905344443853, "learning_rate": 9.60053268053502e-06, "loss": 0.4241, "step": 7308 }, { "epoch": 0.21534110160128456, "grad_norm": 1.5412785508700744, "learning_rate": 9.600331244910583e-06, "loss": 0.4499, "step": 7309 }, { "epoch": 0.21537056405874813, "grad_norm": 1.6844479994686279, "learning_rate": 9.600129760625245e-06, "loss": 0.6061, "step": 7310 }, { "epoch": 0.2154000265162117, "grad_norm": 1.628157637561671, "learning_rate": 9.599928227681138e-06, "loss": 0.5209, "step": 7311 }, { "epoch": 0.21542948897367528, "grad_norm": 1.8363373666199765, "learning_rate": 9.599726646080388e-06, "loss": 0.5477, "step": 7312 }, { "epoch": 0.21545895143113888, "grad_norm": 1.5334912994143344, "learning_rate": 9.599525015825134e-06, "loss": 0.4665, "step": 7313 }, { "epoch": 0.21548841388860246, "grad_norm": 1.7834333381094574, "learning_rate": 9.599323336917505e-06, "loss": 0.4951, "step": 7314 }, { "epoch": 0.21551787634606603, "grad_norm": 1.4923110018823458, "learning_rate": 9.599121609359636e-06, "loss": 0.476, "step": 7315 }, { "epoch": 0.2155473388035296, "grad_norm": 1.5283808962894898, "learning_rate": 9.598919833153661e-06, "loss": 0.4292, "step": 7316 }, { "epoch": 0.21557680126099318, "grad_norm": 1.4410248875086742, "learning_rate": 9.598718008301713e-06, "loss": 0.5561, "step": 7317 }, { "epoch": 0.21560626371845676, "grad_norm": 1.599262812308419, "learning_rate": 9.598516134805926e-06, "loss": 0.4825, "step": 7318 }, { "epoch": 0.21563572617592033, "grad_norm": 1.5805122946208736, "learning_rate": 9.598314212668437e-06, "loss": 0.6018, "step": 7319 }, { "epoch": 0.2156651886333839, "grad_norm": 1.5264718018010655, "learning_rate": 9.598112241891385e-06, "loss": 0.5437, "step": 7320 }, { "epoch": 0.21569465109084748, "grad_norm": 1.794400168921419, "learning_rate": 9.597910222476899e-06, "loss": 0.4309, "step": 7321 }, { "epoch": 0.21572411354831106, "grad_norm": 1.889805058039178, "learning_rate": 9.597708154427122e-06, "loss": 0.7226, "step": 7322 }, { "epoch": 0.21575357600577463, "grad_norm": 1.6120641587500852, "learning_rate": 9.597506037744189e-06, "loss": 0.4986, "step": 7323 }, { "epoch": 0.2157830384632382, "grad_norm": 1.6973957120638332, "learning_rate": 9.597303872430238e-06, "loss": 0.6267, "step": 7324 }, { "epoch": 0.21581250092070178, "grad_norm": 1.8744064971252135, "learning_rate": 9.597101658487409e-06, "loss": 0.5962, "step": 7325 }, { "epoch": 0.21584196337816539, "grad_norm": 1.7209813903599775, "learning_rate": 9.596899395917838e-06, "loss": 0.4681, "step": 7326 }, { "epoch": 0.21587142583562896, "grad_norm": 1.7016535118239011, "learning_rate": 9.596697084723667e-06, "loss": 0.5967, "step": 7327 }, { "epoch": 0.21590088829309254, "grad_norm": 1.5373184634798938, "learning_rate": 9.596494724907035e-06, "loss": 0.4144, "step": 7328 }, { "epoch": 0.2159303507505561, "grad_norm": 1.5881113087637326, "learning_rate": 9.59629231647008e-06, "loss": 0.4824, "step": 7329 }, { "epoch": 0.21595981320801969, "grad_norm": 1.6675645699007802, "learning_rate": 9.59608985941495e-06, "loss": 0.5895, "step": 7330 }, { "epoch": 0.21598927566548326, "grad_norm": 1.6670203458171173, "learning_rate": 9.59588735374378e-06, "loss": 0.6181, "step": 7331 }, { "epoch": 0.21601873812294684, "grad_norm": 1.5614712802916848, "learning_rate": 9.595684799458715e-06, "loss": 0.6744, "step": 7332 }, { "epoch": 0.2160482005804104, "grad_norm": 1.821368951763939, "learning_rate": 9.595482196561895e-06, "loss": 0.4514, "step": 7333 }, { "epoch": 0.21607766303787398, "grad_norm": 1.5442092308036637, "learning_rate": 9.595279545055466e-06, "loss": 0.4951, "step": 7334 }, { "epoch": 0.21610712549533756, "grad_norm": 1.7170037217523926, "learning_rate": 9.59507684494157e-06, "loss": 0.6131, "step": 7335 }, { "epoch": 0.21613658795280113, "grad_norm": 1.9954457596539286, "learning_rate": 9.594874096222352e-06, "loss": 0.6405, "step": 7336 }, { "epoch": 0.2161660504102647, "grad_norm": 1.7882902617116923, "learning_rate": 9.594671298899959e-06, "loss": 0.49, "step": 7337 }, { "epoch": 0.21619551286772828, "grad_norm": 1.45353761149408, "learning_rate": 9.594468452976529e-06, "loss": 0.4115, "step": 7338 }, { "epoch": 0.2162249753251919, "grad_norm": 1.8032335643518216, "learning_rate": 9.594265558454213e-06, "loss": 0.5457, "step": 7339 }, { "epoch": 0.21625443778265546, "grad_norm": 1.5988630728999458, "learning_rate": 9.594062615335156e-06, "loss": 0.4108, "step": 7340 }, { "epoch": 0.21628390024011904, "grad_norm": 1.6884919260932012, "learning_rate": 9.593859623621507e-06, "loss": 0.6227, "step": 7341 }, { "epoch": 0.2163133626975826, "grad_norm": 1.3831667296840706, "learning_rate": 9.593656583315408e-06, "loss": 0.4485, "step": 7342 }, { "epoch": 0.2163428251550462, "grad_norm": 1.5945781781505286, "learning_rate": 9.59345349441901e-06, "loss": 0.5015, "step": 7343 }, { "epoch": 0.21637228761250976, "grad_norm": 1.5229528302003887, "learning_rate": 9.593250356934463e-06, "loss": 0.5191, "step": 7344 }, { "epoch": 0.21640175006997334, "grad_norm": 1.6219841849309393, "learning_rate": 9.593047170863911e-06, "loss": 0.518, "step": 7345 }, { "epoch": 0.2164312125274369, "grad_norm": 1.4832148125555817, "learning_rate": 9.592843936209508e-06, "loss": 0.4557, "step": 7346 }, { "epoch": 0.2164606749849005, "grad_norm": 1.5343693057682757, "learning_rate": 9.5926406529734e-06, "loss": 0.4016, "step": 7347 }, { "epoch": 0.21649013744236406, "grad_norm": 1.563389688330355, "learning_rate": 9.592437321157739e-06, "loss": 0.4622, "step": 7348 }, { "epoch": 0.21651959989982764, "grad_norm": 1.5732810076363473, "learning_rate": 9.592233940764676e-06, "loss": 0.5483, "step": 7349 }, { "epoch": 0.2165490623572912, "grad_norm": 1.565777896398043, "learning_rate": 9.592030511796363e-06, "loss": 0.5561, "step": 7350 }, { "epoch": 0.21657852481475479, "grad_norm": 1.589478125959783, "learning_rate": 9.591827034254948e-06, "loss": 0.5243, "step": 7351 }, { "epoch": 0.2166079872722184, "grad_norm": 1.5703088068604392, "learning_rate": 9.591623508142588e-06, "loss": 0.5119, "step": 7352 }, { "epoch": 0.21663744972968196, "grad_norm": 1.7208027609892396, "learning_rate": 9.591419933461433e-06, "loss": 0.6679, "step": 7353 }, { "epoch": 0.21666691218714554, "grad_norm": 1.9517249333028048, "learning_rate": 9.591216310213638e-06, "loss": 0.6157, "step": 7354 }, { "epoch": 0.2166963746446091, "grad_norm": 1.386038145512519, "learning_rate": 9.591012638401356e-06, "loss": 0.396, "step": 7355 }, { "epoch": 0.2167258371020727, "grad_norm": 1.653638819064743, "learning_rate": 9.590808918026741e-06, "loss": 0.4053, "step": 7356 }, { "epoch": 0.21675529955953626, "grad_norm": 1.606051356013917, "learning_rate": 9.590605149091949e-06, "loss": 0.4405, "step": 7357 }, { "epoch": 0.21678476201699984, "grad_norm": 1.885634063981016, "learning_rate": 9.590401331599135e-06, "loss": 0.4246, "step": 7358 }, { "epoch": 0.2168142244744634, "grad_norm": 1.6939128030907695, "learning_rate": 9.590197465550453e-06, "loss": 0.5278, "step": 7359 }, { "epoch": 0.216843686931927, "grad_norm": 1.6394604087469176, "learning_rate": 9.589993550948063e-06, "loss": 0.4731, "step": 7360 }, { "epoch": 0.21687314938939056, "grad_norm": 1.6817309009352692, "learning_rate": 9.589789587794118e-06, "loss": 0.5631, "step": 7361 }, { "epoch": 0.21690261184685414, "grad_norm": 1.6672628744366333, "learning_rate": 9.589585576090778e-06, "loss": 0.5127, "step": 7362 }, { "epoch": 0.2169320743043177, "grad_norm": 1.5663259230714748, "learning_rate": 9.589381515840202e-06, "loss": 0.5876, "step": 7363 }, { "epoch": 0.2169615367617813, "grad_norm": 1.7338678667392058, "learning_rate": 9.589177407044546e-06, "loss": 0.6372, "step": 7364 }, { "epoch": 0.2169909992192449, "grad_norm": 1.729479800832147, "learning_rate": 9.58897324970597e-06, "loss": 0.4921, "step": 7365 }, { "epoch": 0.21702046167670846, "grad_norm": 1.6009663969286874, "learning_rate": 9.588769043826634e-06, "loss": 0.5411, "step": 7366 }, { "epoch": 0.21704992413417204, "grad_norm": 1.4844205388648684, "learning_rate": 9.588564789408698e-06, "loss": 0.4422, "step": 7367 }, { "epoch": 0.21707938659163561, "grad_norm": 1.5300953959149812, "learning_rate": 9.58836048645432e-06, "loss": 0.5101, "step": 7368 }, { "epoch": 0.2171088490490992, "grad_norm": 1.8762459450696773, "learning_rate": 9.588156134965664e-06, "loss": 0.6976, "step": 7369 }, { "epoch": 0.21713831150656276, "grad_norm": 1.6850690904358927, "learning_rate": 9.587951734944892e-06, "loss": 0.445, "step": 7370 }, { "epoch": 0.21716777396402634, "grad_norm": 1.7382918329898427, "learning_rate": 9.587747286394164e-06, "loss": 0.4876, "step": 7371 }, { "epoch": 0.21719723642148991, "grad_norm": 1.5856986851397643, "learning_rate": 9.587542789315644e-06, "loss": 0.5461, "step": 7372 }, { "epoch": 0.2172266988789535, "grad_norm": 1.7047939106697734, "learning_rate": 9.587338243711492e-06, "loss": 0.4768, "step": 7373 }, { "epoch": 0.21725616133641706, "grad_norm": 1.54781135273728, "learning_rate": 9.587133649583877e-06, "loss": 0.578, "step": 7374 }, { "epoch": 0.21728562379388064, "grad_norm": 1.5506828799666599, "learning_rate": 9.58692900693496e-06, "loss": 0.4219, "step": 7375 }, { "epoch": 0.2173150862513442, "grad_norm": 1.6069466582854486, "learning_rate": 9.586724315766905e-06, "loss": 0.4404, "step": 7376 }, { "epoch": 0.2173445487088078, "grad_norm": 1.4384872780953153, "learning_rate": 9.586519576081877e-06, "loss": 0.4193, "step": 7377 }, { "epoch": 0.2173740111662714, "grad_norm": 1.444725835404862, "learning_rate": 9.586314787882043e-06, "loss": 0.4301, "step": 7378 }, { "epoch": 0.21740347362373497, "grad_norm": 1.5074047866731957, "learning_rate": 9.58610995116957e-06, "loss": 0.4456, "step": 7379 }, { "epoch": 0.21743293608119854, "grad_norm": 1.573801059026638, "learning_rate": 9.585905065946624e-06, "loss": 0.5264, "step": 7380 }, { "epoch": 0.21746239853866212, "grad_norm": 1.712651734554942, "learning_rate": 9.58570013221537e-06, "loss": 0.4451, "step": 7381 }, { "epoch": 0.2174918609961257, "grad_norm": 1.588488174443238, "learning_rate": 9.58549514997798e-06, "loss": 0.4751, "step": 7382 }, { "epoch": 0.21752132345358927, "grad_norm": 1.481058490711505, "learning_rate": 9.585290119236617e-06, "loss": 0.447, "step": 7383 }, { "epoch": 0.21755078591105284, "grad_norm": 1.6269254160172884, "learning_rate": 9.585085039993453e-06, "loss": 0.5265, "step": 7384 }, { "epoch": 0.21758024836851642, "grad_norm": 1.731911270818611, "learning_rate": 9.584879912250659e-06, "loss": 0.5251, "step": 7385 }, { "epoch": 0.21760971082598, "grad_norm": 1.4610497441656016, "learning_rate": 9.584674736010401e-06, "loss": 0.3442, "step": 7386 }, { "epoch": 0.21763917328344357, "grad_norm": 1.4802245973601524, "learning_rate": 9.584469511274852e-06, "loss": 0.3481, "step": 7387 }, { "epoch": 0.21766863574090714, "grad_norm": 1.6619551541507736, "learning_rate": 9.58426423804618e-06, "loss": 0.4965, "step": 7388 }, { "epoch": 0.21769809819837072, "grad_norm": 1.4854785309646972, "learning_rate": 9.584058916326558e-06, "loss": 0.4699, "step": 7389 }, { "epoch": 0.2177275606558343, "grad_norm": 1.3911581110753597, "learning_rate": 9.58385354611816e-06, "loss": 0.3964, "step": 7390 }, { "epoch": 0.2177570231132979, "grad_norm": 1.5593026311218485, "learning_rate": 9.583648127423153e-06, "loss": 0.5756, "step": 7391 }, { "epoch": 0.21778648557076147, "grad_norm": 1.4462276921253459, "learning_rate": 9.583442660243716e-06, "loss": 0.4838, "step": 7392 }, { "epoch": 0.21781594802822504, "grad_norm": 1.4925283239185438, "learning_rate": 9.583237144582018e-06, "loss": 0.3419, "step": 7393 }, { "epoch": 0.21784541048568862, "grad_norm": 1.6279115181095805, "learning_rate": 9.583031580440235e-06, "loss": 0.4057, "step": 7394 }, { "epoch": 0.2178748729431522, "grad_norm": 1.442936510206438, "learning_rate": 9.58282596782054e-06, "loss": 0.4824, "step": 7395 }, { "epoch": 0.21790433540061577, "grad_norm": 1.6528146832574002, "learning_rate": 9.582620306725108e-06, "loss": 0.529, "step": 7396 }, { "epoch": 0.21793379785807934, "grad_norm": 1.8568494722321585, "learning_rate": 9.582414597156115e-06, "loss": 0.3167, "step": 7397 }, { "epoch": 0.21796326031554292, "grad_norm": 1.4864997638263329, "learning_rate": 9.582208839115737e-06, "loss": 0.4967, "step": 7398 }, { "epoch": 0.2179927227730065, "grad_norm": 1.984143224003455, "learning_rate": 9.582003032606153e-06, "loss": 0.6633, "step": 7399 }, { "epoch": 0.21802218523047007, "grad_norm": 1.6850937312769139, "learning_rate": 9.581797177629533e-06, "loss": 0.4273, "step": 7400 }, { "epoch": 0.21805164768793364, "grad_norm": 1.4064834085616886, "learning_rate": 9.581591274188062e-06, "loss": 0.4993, "step": 7401 }, { "epoch": 0.21808111014539722, "grad_norm": 1.6289372174123584, "learning_rate": 9.581385322283914e-06, "loss": 0.576, "step": 7402 }, { "epoch": 0.2181105726028608, "grad_norm": 1.5350951958019272, "learning_rate": 9.581179321919268e-06, "loss": 0.4189, "step": 7403 }, { "epoch": 0.2181400350603244, "grad_norm": 1.5679710977614343, "learning_rate": 9.580973273096304e-06, "loss": 0.5754, "step": 7404 }, { "epoch": 0.21816949751778797, "grad_norm": 1.6883603064714283, "learning_rate": 9.580767175817199e-06, "loss": 0.5563, "step": 7405 }, { "epoch": 0.21819895997525154, "grad_norm": 1.6765138813862326, "learning_rate": 9.580561030084136e-06, "loss": 0.6449, "step": 7406 }, { "epoch": 0.21822842243271512, "grad_norm": 1.5073208171284422, "learning_rate": 9.580354835899295e-06, "loss": 0.3604, "step": 7407 }, { "epoch": 0.2182578848901787, "grad_norm": 1.8262452167456509, "learning_rate": 9.580148593264856e-06, "loss": 0.7081, "step": 7408 }, { "epoch": 0.21828734734764227, "grad_norm": 1.7660883165342514, "learning_rate": 9.579942302183001e-06, "loss": 0.5808, "step": 7409 }, { "epoch": 0.21831680980510584, "grad_norm": 1.6547654377854704, "learning_rate": 9.579735962655912e-06, "loss": 0.3324, "step": 7410 }, { "epoch": 0.21834627226256942, "grad_norm": 1.525119457185446, "learning_rate": 9.579529574685773e-06, "loss": 0.5403, "step": 7411 }, { "epoch": 0.218375734720033, "grad_norm": 1.5615307676078172, "learning_rate": 9.579323138274764e-06, "loss": 0.5447, "step": 7412 }, { "epoch": 0.21840519717749657, "grad_norm": 1.423377879438613, "learning_rate": 9.579116653425071e-06, "loss": 0.4103, "step": 7413 }, { "epoch": 0.21843465963496014, "grad_norm": 1.5357807567586752, "learning_rate": 9.578910120138877e-06, "loss": 0.4859, "step": 7414 }, { "epoch": 0.21846412209242372, "grad_norm": 1.473803546292561, "learning_rate": 9.578703538418368e-06, "loss": 0.4049, "step": 7415 }, { "epoch": 0.2184935845498873, "grad_norm": 1.769548721209539, "learning_rate": 9.578496908265729e-06, "loss": 0.528, "step": 7416 }, { "epoch": 0.2185230470073509, "grad_norm": 1.6142211071277384, "learning_rate": 9.578290229683143e-06, "loss": 0.3528, "step": 7417 }, { "epoch": 0.21855250946481447, "grad_norm": 1.5064124189865948, "learning_rate": 9.5780835026728e-06, "loss": 0.3626, "step": 7418 }, { "epoch": 0.21858197192227805, "grad_norm": 1.5695691037746962, "learning_rate": 9.577876727236884e-06, "loss": 0.4947, "step": 7419 }, { "epoch": 0.21861143437974162, "grad_norm": 1.578772276848232, "learning_rate": 9.577669903377584e-06, "loss": 0.5828, "step": 7420 }, { "epoch": 0.2186408968372052, "grad_norm": 1.5675406925113418, "learning_rate": 9.577463031097087e-06, "loss": 0.589, "step": 7421 }, { "epoch": 0.21867035929466877, "grad_norm": 1.5709566824414274, "learning_rate": 9.577256110397579e-06, "loss": 0.5284, "step": 7422 }, { "epoch": 0.21869982175213235, "grad_norm": 1.5869583309020705, "learning_rate": 9.577049141281252e-06, "loss": 0.4567, "step": 7423 }, { "epoch": 0.21872928420959592, "grad_norm": 1.5725398472683376, "learning_rate": 9.576842123750295e-06, "loss": 0.4438, "step": 7424 }, { "epoch": 0.2187587466670595, "grad_norm": 1.6321819112640272, "learning_rate": 9.576635057806897e-06, "loss": 0.5882, "step": 7425 }, { "epoch": 0.21878820912452307, "grad_norm": 1.7115047318850596, "learning_rate": 9.576427943453246e-06, "loss": 0.4218, "step": 7426 }, { "epoch": 0.21881767158198664, "grad_norm": 1.7355838766536673, "learning_rate": 9.576220780691536e-06, "loss": 0.5178, "step": 7427 }, { "epoch": 0.21884713403945022, "grad_norm": 1.4517632754245509, "learning_rate": 9.57601356952396e-06, "loss": 0.4971, "step": 7428 }, { "epoch": 0.2188765964969138, "grad_norm": 1.6047506725001128, "learning_rate": 9.575806309952703e-06, "loss": 0.4955, "step": 7429 }, { "epoch": 0.2189060589543774, "grad_norm": 1.5286037182281997, "learning_rate": 9.575599001979962e-06, "loss": 0.481, "step": 7430 }, { "epoch": 0.21893552141184097, "grad_norm": 1.5436355362935175, "learning_rate": 9.575391645607932e-06, "loss": 0.5413, "step": 7431 }, { "epoch": 0.21896498386930455, "grad_norm": 1.5696409036945906, "learning_rate": 9.575184240838801e-06, "loss": 0.5359, "step": 7432 }, { "epoch": 0.21899444632676812, "grad_norm": 1.3600994772714434, "learning_rate": 9.574976787674765e-06, "loss": 0.5135, "step": 7433 }, { "epoch": 0.2190239087842317, "grad_norm": 1.4704937957954767, "learning_rate": 9.574769286118021e-06, "loss": 0.5225, "step": 7434 }, { "epoch": 0.21905337124169527, "grad_norm": 1.5779567155071794, "learning_rate": 9.574561736170761e-06, "loss": 0.46, "step": 7435 }, { "epoch": 0.21908283369915885, "grad_norm": 1.7224039567653975, "learning_rate": 9.57435413783518e-06, "loss": 0.4154, "step": 7436 }, { "epoch": 0.21911229615662242, "grad_norm": 1.8487164403642729, "learning_rate": 9.574146491113477e-06, "loss": 0.6614, "step": 7437 }, { "epoch": 0.219141758614086, "grad_norm": 1.5317829834138363, "learning_rate": 9.573938796007844e-06, "loss": 0.4108, "step": 7438 }, { "epoch": 0.21917122107154957, "grad_norm": 1.5539329454927386, "learning_rate": 9.573731052520482e-06, "loss": 0.5114, "step": 7439 }, { "epoch": 0.21920068352901315, "grad_norm": 1.8574470234385565, "learning_rate": 9.573523260653587e-06, "loss": 0.5838, "step": 7440 }, { "epoch": 0.21923014598647672, "grad_norm": 1.3874698988127276, "learning_rate": 9.573315420409357e-06, "loss": 0.3743, "step": 7441 }, { "epoch": 0.2192596084439403, "grad_norm": 1.704792002449977, "learning_rate": 9.573107531789987e-06, "loss": 0.5518, "step": 7442 }, { "epoch": 0.2192890709014039, "grad_norm": 1.6899260529279376, "learning_rate": 9.572899594797683e-06, "loss": 0.4303, "step": 7443 }, { "epoch": 0.21931853335886747, "grad_norm": 1.5425464546005594, "learning_rate": 9.572691609434638e-06, "loss": 0.5886, "step": 7444 }, { "epoch": 0.21934799581633105, "grad_norm": 1.5417570543432089, "learning_rate": 9.572483575703058e-06, "loss": 0.4615, "step": 7445 }, { "epoch": 0.21937745827379462, "grad_norm": 1.4855531648868887, "learning_rate": 9.572275493605138e-06, "loss": 0.5012, "step": 7446 }, { "epoch": 0.2194069207312582, "grad_norm": 1.835185142593989, "learning_rate": 9.572067363143081e-06, "loss": 0.6181, "step": 7447 }, { "epoch": 0.21943638318872177, "grad_norm": 1.611049766433555, "learning_rate": 9.57185918431909e-06, "loss": 0.4705, "step": 7448 }, { "epoch": 0.21946584564618535, "grad_norm": 1.5397482984953512, "learning_rate": 9.571650957135365e-06, "loss": 0.4613, "step": 7449 }, { "epoch": 0.21949530810364892, "grad_norm": 1.6728958611689255, "learning_rate": 9.57144268159411e-06, "loss": 0.4473, "step": 7450 }, { "epoch": 0.2195247705611125, "grad_norm": 1.5062276124553016, "learning_rate": 9.571234357697528e-06, "loss": 0.4563, "step": 7451 }, { "epoch": 0.21955423301857607, "grad_norm": 1.5716189149856907, "learning_rate": 9.57102598544782e-06, "loss": 0.4226, "step": 7452 }, { "epoch": 0.21958369547603965, "grad_norm": 1.5771833500352161, "learning_rate": 9.570817564847194e-06, "loss": 0.4414, "step": 7453 }, { "epoch": 0.21961315793350322, "grad_norm": 1.8502871487767036, "learning_rate": 9.57060909589785e-06, "loss": 0.5889, "step": 7454 }, { "epoch": 0.2196426203909668, "grad_norm": 1.5852089546943906, "learning_rate": 9.570400578602e-06, "loss": 0.5602, "step": 7455 }, { "epoch": 0.2196720828484304, "grad_norm": 1.6159846640431978, "learning_rate": 9.570192012961844e-06, "loss": 0.5533, "step": 7456 }, { "epoch": 0.21970154530589397, "grad_norm": 1.6830823773121317, "learning_rate": 9.56998339897959e-06, "loss": 0.5049, "step": 7457 }, { "epoch": 0.21973100776335755, "grad_norm": 1.795039935427137, "learning_rate": 9.569774736657444e-06, "loss": 0.5133, "step": 7458 }, { "epoch": 0.21976047022082112, "grad_norm": 1.4707214548684888, "learning_rate": 9.569566025997612e-06, "loss": 0.4303, "step": 7459 }, { "epoch": 0.2197899326782847, "grad_norm": 1.6009554644960604, "learning_rate": 9.569357267002306e-06, "loss": 0.4892, "step": 7460 }, { "epoch": 0.21981939513574827, "grad_norm": 1.7914096544139269, "learning_rate": 9.56914845967373e-06, "loss": 0.4651, "step": 7461 }, { "epoch": 0.21984885759321185, "grad_norm": 1.7597017776278974, "learning_rate": 9.568939604014093e-06, "loss": 0.5394, "step": 7462 }, { "epoch": 0.21987832005067542, "grad_norm": 1.54310524812797, "learning_rate": 9.568730700025606e-06, "loss": 0.4253, "step": 7463 }, { "epoch": 0.219907782508139, "grad_norm": 1.4999028248931159, "learning_rate": 9.56852174771048e-06, "loss": 0.3884, "step": 7464 }, { "epoch": 0.21993724496560257, "grad_norm": 1.579363384190631, "learning_rate": 9.56831274707092e-06, "loss": 0.5153, "step": 7465 }, { "epoch": 0.21996670742306615, "grad_norm": 1.5168722644026906, "learning_rate": 9.568103698109141e-06, "loss": 0.4819, "step": 7466 }, { "epoch": 0.21999616988052972, "grad_norm": 1.6431977942528826, "learning_rate": 9.567894600827355e-06, "loss": 0.5142, "step": 7467 }, { "epoch": 0.2200256323379933, "grad_norm": 1.6796907831950347, "learning_rate": 9.567685455227772e-06, "loss": 0.5529, "step": 7468 }, { "epoch": 0.2200550947954569, "grad_norm": 1.5828259947919545, "learning_rate": 9.567476261312602e-06, "loss": 0.5038, "step": 7469 }, { "epoch": 0.22008455725292048, "grad_norm": 1.6232961583871748, "learning_rate": 9.567267019084061e-06, "loss": 0.5039, "step": 7470 }, { "epoch": 0.22011401971038405, "grad_norm": 1.6943999560052887, "learning_rate": 9.567057728544362e-06, "loss": 0.5337, "step": 7471 }, { "epoch": 0.22014348216784763, "grad_norm": 1.575681671151879, "learning_rate": 9.566848389695718e-06, "loss": 0.5337, "step": 7472 }, { "epoch": 0.2201729446253112, "grad_norm": 1.6658122776085151, "learning_rate": 9.566639002540342e-06, "loss": 0.4727, "step": 7473 }, { "epoch": 0.22020240708277478, "grad_norm": 1.747447648984362, "learning_rate": 9.566429567080454e-06, "loss": 0.5166, "step": 7474 }, { "epoch": 0.22023186954023835, "grad_norm": 1.599567921665, "learning_rate": 9.566220083318263e-06, "loss": 0.371, "step": 7475 }, { "epoch": 0.22026133199770193, "grad_norm": 1.525891962602955, "learning_rate": 9.566010551255988e-06, "loss": 0.4547, "step": 7476 }, { "epoch": 0.2202907944551655, "grad_norm": 1.656737862865484, "learning_rate": 9.565800970895845e-06, "loss": 0.5233, "step": 7477 }, { "epoch": 0.22032025691262908, "grad_norm": 1.510917158152973, "learning_rate": 9.56559134224005e-06, "loss": 0.4281, "step": 7478 }, { "epoch": 0.22034971937009265, "grad_norm": 1.7399317631591873, "learning_rate": 9.565381665290823e-06, "loss": 0.573, "step": 7479 }, { "epoch": 0.22037918182755623, "grad_norm": 1.6275588137174908, "learning_rate": 9.565171940050379e-06, "loss": 0.5721, "step": 7480 }, { "epoch": 0.2204086442850198, "grad_norm": 1.5407331132360818, "learning_rate": 9.564962166520936e-06, "loss": 0.4731, "step": 7481 }, { "epoch": 0.2204381067424834, "grad_norm": 1.655665980347565, "learning_rate": 9.564752344704717e-06, "loss": 0.5507, "step": 7482 }, { "epoch": 0.22046756919994698, "grad_norm": 1.6742512089694612, "learning_rate": 9.564542474603937e-06, "loss": 0.4711, "step": 7483 }, { "epoch": 0.22049703165741055, "grad_norm": 1.652220745887512, "learning_rate": 9.564332556220819e-06, "loss": 0.4799, "step": 7484 }, { "epoch": 0.22052649411487413, "grad_norm": 1.6182217828555698, "learning_rate": 9.564122589557582e-06, "loss": 0.6685, "step": 7485 }, { "epoch": 0.2205559565723377, "grad_norm": 1.4267096841701115, "learning_rate": 9.563912574616448e-06, "loss": 0.435, "step": 7486 }, { "epoch": 0.22058541902980128, "grad_norm": 1.6069804090973185, "learning_rate": 9.563702511399635e-06, "loss": 0.6048, "step": 7487 }, { "epoch": 0.22061488148726485, "grad_norm": 1.3840556953084204, "learning_rate": 9.56349239990937e-06, "loss": 0.418, "step": 7488 }, { "epoch": 0.22064434394472843, "grad_norm": 1.5477051197786598, "learning_rate": 9.563282240147874e-06, "loss": 0.5132, "step": 7489 }, { "epoch": 0.220673806402192, "grad_norm": 1.4993494280977469, "learning_rate": 9.563072032117367e-06, "loss": 0.4696, "step": 7490 }, { "epoch": 0.22070326885965558, "grad_norm": 1.627417301891051, "learning_rate": 9.562861775820075e-06, "loss": 0.6237, "step": 7491 }, { "epoch": 0.22073273131711915, "grad_norm": 1.5176095734902246, "learning_rate": 9.562651471258222e-06, "loss": 0.4291, "step": 7492 }, { "epoch": 0.22076219377458273, "grad_norm": 1.4703111529606288, "learning_rate": 9.562441118434031e-06, "loss": 0.367, "step": 7493 }, { "epoch": 0.2207916562320463, "grad_norm": 1.7775472967131094, "learning_rate": 9.562230717349731e-06, "loss": 0.5827, "step": 7494 }, { "epoch": 0.2208211186895099, "grad_norm": 1.4609992561189775, "learning_rate": 9.562020268007544e-06, "loss": 0.5493, "step": 7495 }, { "epoch": 0.22085058114697348, "grad_norm": 1.506487685906987, "learning_rate": 9.561809770409696e-06, "loss": 0.3288, "step": 7496 }, { "epoch": 0.22088004360443705, "grad_norm": 1.6327826492541977, "learning_rate": 9.561599224558415e-06, "loss": 0.3638, "step": 7497 }, { "epoch": 0.22090950606190063, "grad_norm": 1.5010924561564862, "learning_rate": 9.561388630455928e-06, "loss": 0.4437, "step": 7498 }, { "epoch": 0.2209389685193642, "grad_norm": 1.7606838858890435, "learning_rate": 9.561177988104461e-06, "loss": 0.4743, "step": 7499 }, { "epoch": 0.22096843097682778, "grad_norm": 1.8946170393949797, "learning_rate": 9.560967297506245e-06, "loss": 0.5266, "step": 7500 }, { "epoch": 0.22099789343429135, "grad_norm": 1.720003442095711, "learning_rate": 9.560756558663506e-06, "loss": 0.6772, "step": 7501 }, { "epoch": 0.22102735589175493, "grad_norm": 1.5708858299000918, "learning_rate": 9.560545771578474e-06, "loss": 0.5081, "step": 7502 }, { "epoch": 0.2210568183492185, "grad_norm": 1.7648179374799808, "learning_rate": 9.560334936253377e-06, "loss": 0.5431, "step": 7503 }, { "epoch": 0.22108628080668208, "grad_norm": 1.4959667911567782, "learning_rate": 9.56012405269045e-06, "loss": 0.4485, "step": 7504 }, { "epoch": 0.22111574326414565, "grad_norm": 1.7522407490746137, "learning_rate": 9.559913120891919e-06, "loss": 0.3911, "step": 7505 }, { "epoch": 0.22114520572160923, "grad_norm": 1.3715702183714447, "learning_rate": 9.559702140860017e-06, "loss": 0.4326, "step": 7506 }, { "epoch": 0.2211746681790728, "grad_norm": 1.5898078863169773, "learning_rate": 9.559491112596975e-06, "loss": 0.4026, "step": 7507 }, { "epoch": 0.2212041306365364, "grad_norm": 1.8992881703206568, "learning_rate": 9.559280036105025e-06, "loss": 0.5826, "step": 7508 }, { "epoch": 0.22123359309399998, "grad_norm": 1.395307348828728, "learning_rate": 9.559068911386403e-06, "loss": 0.3735, "step": 7509 }, { "epoch": 0.22126305555146356, "grad_norm": 1.7170987548980416, "learning_rate": 9.558857738443335e-06, "loss": 0.7231, "step": 7510 }, { "epoch": 0.22129251800892713, "grad_norm": 1.4192038189029301, "learning_rate": 9.558646517278063e-06, "loss": 0.4427, "step": 7511 }, { "epoch": 0.2213219804663907, "grad_norm": 1.5986902440929134, "learning_rate": 9.558435247892815e-06, "loss": 0.4639, "step": 7512 }, { "epoch": 0.22135144292385428, "grad_norm": 1.5338930227165632, "learning_rate": 9.558223930289829e-06, "loss": 0.4938, "step": 7513 }, { "epoch": 0.22138090538131786, "grad_norm": 1.5593940970530862, "learning_rate": 9.55801256447134e-06, "loss": 0.4645, "step": 7514 }, { "epoch": 0.22141036783878143, "grad_norm": 1.556636721434095, "learning_rate": 9.557801150439583e-06, "loss": 0.539, "step": 7515 }, { "epoch": 0.221439830296245, "grad_norm": 1.6256238255908262, "learning_rate": 9.557589688196795e-06, "loss": 0.502, "step": 7516 }, { "epoch": 0.22146929275370858, "grad_norm": 1.546270588285515, "learning_rate": 9.557378177745211e-06, "loss": 0.4309, "step": 7517 }, { "epoch": 0.22149875521117215, "grad_norm": 1.7203420775747835, "learning_rate": 9.55716661908707e-06, "loss": 0.5392, "step": 7518 }, { "epoch": 0.22152821766863573, "grad_norm": 1.6157592494997195, "learning_rate": 9.55695501222461e-06, "loss": 0.4546, "step": 7519 }, { "epoch": 0.2215576801260993, "grad_norm": 1.6033929092990575, "learning_rate": 9.556743357160067e-06, "loss": 0.5174, "step": 7520 }, { "epoch": 0.2215871425835629, "grad_norm": 1.5910900932230334, "learning_rate": 9.556531653895685e-06, "loss": 0.4827, "step": 7521 }, { "epoch": 0.22161660504102648, "grad_norm": 1.4819072670105127, "learning_rate": 9.556319902433697e-06, "loss": 0.4843, "step": 7522 }, { "epoch": 0.22164606749849006, "grad_norm": 1.612763275618597, "learning_rate": 9.556108102776345e-06, "loss": 0.5444, "step": 7523 }, { "epoch": 0.22167552995595363, "grad_norm": 1.4953255958065164, "learning_rate": 9.555896254925872e-06, "loss": 0.4792, "step": 7524 }, { "epoch": 0.2217049924134172, "grad_norm": 1.645660621088371, "learning_rate": 9.555684358884516e-06, "loss": 0.4351, "step": 7525 }, { "epoch": 0.22173445487088078, "grad_norm": 1.7567206557648591, "learning_rate": 9.555472414654519e-06, "loss": 0.5825, "step": 7526 }, { "epoch": 0.22176391732834436, "grad_norm": 1.5434340127931576, "learning_rate": 9.555260422238123e-06, "loss": 0.406, "step": 7527 }, { "epoch": 0.22179337978580793, "grad_norm": 1.4484342672733, "learning_rate": 9.555048381637571e-06, "loss": 0.4814, "step": 7528 }, { "epoch": 0.2218228422432715, "grad_norm": 1.7282551591417803, "learning_rate": 9.554836292855105e-06, "loss": 0.4893, "step": 7529 }, { "epoch": 0.22185230470073508, "grad_norm": 1.468470808691559, "learning_rate": 9.554624155892969e-06, "loss": 0.4056, "step": 7530 }, { "epoch": 0.22188176715819866, "grad_norm": 1.7205341203209565, "learning_rate": 9.554411970753408e-06, "loss": 0.4537, "step": 7531 }, { "epoch": 0.22191122961566223, "grad_norm": 1.6367325462430387, "learning_rate": 9.554199737438662e-06, "loss": 0.4133, "step": 7532 }, { "epoch": 0.2219406920731258, "grad_norm": 1.5162066466004083, "learning_rate": 9.553987455950981e-06, "loss": 0.376, "step": 7533 }, { "epoch": 0.2219701545305894, "grad_norm": 1.5449002730638908, "learning_rate": 9.55377512629261e-06, "loss": 0.4499, "step": 7534 }, { "epoch": 0.22199961698805298, "grad_norm": 1.638743118591122, "learning_rate": 9.553562748465792e-06, "loss": 0.4272, "step": 7535 }, { "epoch": 0.22202907944551656, "grad_norm": 1.7757265184240443, "learning_rate": 9.553350322472776e-06, "loss": 0.6781, "step": 7536 }, { "epoch": 0.22205854190298013, "grad_norm": 1.4610868809017208, "learning_rate": 9.553137848315807e-06, "loss": 0.4336, "step": 7537 }, { "epoch": 0.2220880043604437, "grad_norm": 1.5531613020295987, "learning_rate": 9.552925325997133e-06, "loss": 0.431, "step": 7538 }, { "epoch": 0.22211746681790728, "grad_norm": 1.7292931032705552, "learning_rate": 9.552712755519003e-06, "loss": 0.6651, "step": 7539 }, { "epoch": 0.22214692927537086, "grad_norm": 1.6383726169680464, "learning_rate": 9.552500136883666e-06, "loss": 0.4392, "step": 7540 }, { "epoch": 0.22217639173283443, "grad_norm": 1.5870947213523319, "learning_rate": 9.552287470093368e-06, "loss": 0.6227, "step": 7541 }, { "epoch": 0.222205854190298, "grad_norm": 1.5517345464549162, "learning_rate": 9.552074755150362e-06, "loss": 0.5218, "step": 7542 }, { "epoch": 0.22223531664776158, "grad_norm": 1.638219399720229, "learning_rate": 9.551861992056896e-06, "loss": 0.4498, "step": 7543 }, { "epoch": 0.22226477910522516, "grad_norm": 1.8279742282191482, "learning_rate": 9.551649180815221e-06, "loss": 0.4975, "step": 7544 }, { "epoch": 0.22229424156268873, "grad_norm": 1.3495660530737987, "learning_rate": 9.551436321427587e-06, "loss": 0.3897, "step": 7545 }, { "epoch": 0.2223237040201523, "grad_norm": 1.6036333392249742, "learning_rate": 9.551223413896248e-06, "loss": 0.5066, "step": 7546 }, { "epoch": 0.2223531664776159, "grad_norm": 1.3602394981026156, "learning_rate": 9.551010458223455e-06, "loss": 0.3354, "step": 7547 }, { "epoch": 0.22238262893507948, "grad_norm": 1.5431698489092498, "learning_rate": 9.55079745441146e-06, "loss": 0.3568, "step": 7548 }, { "epoch": 0.22241209139254306, "grad_norm": 1.8405884909767363, "learning_rate": 9.550584402462517e-06, "loss": 0.5374, "step": 7549 }, { "epoch": 0.22244155385000663, "grad_norm": 1.5662800439256557, "learning_rate": 9.550371302378878e-06, "loss": 0.4667, "step": 7550 }, { "epoch": 0.2224710163074702, "grad_norm": 1.9016063566008339, "learning_rate": 9.550158154162799e-06, "loss": 0.7212, "step": 7551 }, { "epoch": 0.22250047876493378, "grad_norm": 1.5179268253950906, "learning_rate": 9.549944957816534e-06, "loss": 0.4137, "step": 7552 }, { "epoch": 0.22252994122239736, "grad_norm": 1.6782580384532217, "learning_rate": 9.549731713342336e-06, "loss": 0.4214, "step": 7553 }, { "epoch": 0.22255940367986093, "grad_norm": 1.9075513958120565, "learning_rate": 9.549518420742464e-06, "loss": 0.6421, "step": 7554 }, { "epoch": 0.2225888661373245, "grad_norm": 1.7570464639299657, "learning_rate": 9.549305080019173e-06, "loss": 0.4752, "step": 7555 }, { "epoch": 0.22261832859478808, "grad_norm": 1.653190043754462, "learning_rate": 9.54909169117472e-06, "loss": 0.4392, "step": 7556 }, { "epoch": 0.22264779105225166, "grad_norm": 1.6360145718554795, "learning_rate": 9.54887825421136e-06, "loss": 0.5561, "step": 7557 }, { "epoch": 0.22267725350971523, "grad_norm": 1.671009735484271, "learning_rate": 9.548664769131351e-06, "loss": 0.6058, "step": 7558 }, { "epoch": 0.2227067159671788, "grad_norm": 1.6448623447026505, "learning_rate": 9.548451235936954e-06, "loss": 0.5518, "step": 7559 }, { "epoch": 0.2227361784246424, "grad_norm": 1.6586028612310573, "learning_rate": 9.548237654630425e-06, "loss": 0.4896, "step": 7560 }, { "epoch": 0.222765640882106, "grad_norm": 1.5266318010233488, "learning_rate": 9.548024025214026e-06, "loss": 0.4666, "step": 7561 }, { "epoch": 0.22279510333956956, "grad_norm": 1.561346301047377, "learning_rate": 9.547810347690012e-06, "loss": 0.5234, "step": 7562 }, { "epoch": 0.22282456579703314, "grad_norm": 1.5966672807113027, "learning_rate": 9.547596622060648e-06, "loss": 0.6084, "step": 7563 }, { "epoch": 0.2228540282544967, "grad_norm": 1.7470536558495506, "learning_rate": 9.547382848328195e-06, "loss": 0.6447, "step": 7564 }, { "epoch": 0.22288349071196029, "grad_norm": 1.6442372760383568, "learning_rate": 9.547169026494909e-06, "loss": 0.4483, "step": 7565 }, { "epoch": 0.22291295316942386, "grad_norm": 1.662851631908152, "learning_rate": 9.546955156563056e-06, "loss": 0.5124, "step": 7566 }, { "epoch": 0.22294241562688744, "grad_norm": 1.6298376845480151, "learning_rate": 9.546741238534895e-06, "loss": 0.5533, "step": 7567 }, { "epoch": 0.222971878084351, "grad_norm": 1.649076560283218, "learning_rate": 9.546527272412694e-06, "loss": 0.5615, "step": 7568 }, { "epoch": 0.22300134054181459, "grad_norm": 1.4864942287063168, "learning_rate": 9.546313258198711e-06, "loss": 0.4101, "step": 7569 }, { "epoch": 0.22303080299927816, "grad_norm": 1.63919360976083, "learning_rate": 9.546099195895213e-06, "loss": 0.4891, "step": 7570 }, { "epoch": 0.22306026545674174, "grad_norm": 1.6643438423579537, "learning_rate": 9.545885085504463e-06, "loss": 0.4848, "step": 7571 }, { "epoch": 0.2230897279142053, "grad_norm": 1.5531264708042531, "learning_rate": 9.545670927028725e-06, "loss": 0.5555, "step": 7572 }, { "epoch": 0.2231191903716689, "grad_norm": 1.685886560885129, "learning_rate": 9.545456720470267e-06, "loss": 0.5284, "step": 7573 }, { "epoch": 0.2231486528291325, "grad_norm": 1.4821335730288543, "learning_rate": 9.545242465831353e-06, "loss": 0.4022, "step": 7574 }, { "epoch": 0.22317811528659606, "grad_norm": 1.6780813931762144, "learning_rate": 9.545028163114247e-06, "loss": 0.5385, "step": 7575 }, { "epoch": 0.22320757774405964, "grad_norm": 1.7180847913116581, "learning_rate": 9.544813812321221e-06, "loss": 0.588, "step": 7576 }, { "epoch": 0.2232370402015232, "grad_norm": 1.6544253184952564, "learning_rate": 9.544599413454538e-06, "loss": 0.4237, "step": 7577 }, { "epoch": 0.2232665026589868, "grad_norm": 1.5491786587508252, "learning_rate": 9.544384966516466e-06, "loss": 0.5134, "step": 7578 }, { "epoch": 0.22329596511645036, "grad_norm": 1.3636168047931192, "learning_rate": 9.544170471509278e-06, "loss": 0.3338, "step": 7579 }, { "epoch": 0.22332542757391394, "grad_norm": 1.5515933730680649, "learning_rate": 9.543955928435238e-06, "loss": 0.4494, "step": 7580 }, { "epoch": 0.2233548900313775, "grad_norm": 1.6008710508223394, "learning_rate": 9.543741337296618e-06, "loss": 0.5383, "step": 7581 }, { "epoch": 0.2233843524888411, "grad_norm": 1.5608148132111446, "learning_rate": 9.543526698095684e-06, "loss": 0.434, "step": 7582 }, { "epoch": 0.22341381494630466, "grad_norm": 1.392125928992347, "learning_rate": 9.543312010834712e-06, "loss": 0.4456, "step": 7583 }, { "epoch": 0.22344327740376824, "grad_norm": 1.6278352701892083, "learning_rate": 9.54309727551597e-06, "loss": 0.5812, "step": 7584 }, { "epoch": 0.2234727398612318, "grad_norm": 1.504004152598165, "learning_rate": 9.54288249214173e-06, "loss": 0.3992, "step": 7585 }, { "epoch": 0.22350220231869541, "grad_norm": 1.5725402979292675, "learning_rate": 9.542667660714262e-06, "loss": 0.4833, "step": 7586 }, { "epoch": 0.223531664776159, "grad_norm": 1.8469510739693775, "learning_rate": 9.54245278123584e-06, "loss": 0.6642, "step": 7587 }, { "epoch": 0.22356112723362256, "grad_norm": 1.7589749086632043, "learning_rate": 9.542237853708737e-06, "loss": 0.5406, "step": 7588 }, { "epoch": 0.22359058969108614, "grad_norm": 1.5965496137694994, "learning_rate": 9.542022878135226e-06, "loss": 0.475, "step": 7589 }, { "epoch": 0.22362005214854971, "grad_norm": 1.4117016381496834, "learning_rate": 9.541807854517583e-06, "loss": 0.4588, "step": 7590 }, { "epoch": 0.2236495146060133, "grad_norm": 1.7341338022634256, "learning_rate": 9.541592782858079e-06, "loss": 0.4945, "step": 7591 }, { "epoch": 0.22367897706347686, "grad_norm": 1.4842867026102209, "learning_rate": 9.541377663158991e-06, "loss": 0.5337, "step": 7592 }, { "epoch": 0.22370843952094044, "grad_norm": 1.7823327329352188, "learning_rate": 9.541162495422593e-06, "loss": 0.6676, "step": 7593 }, { "epoch": 0.223737901978404, "grad_norm": 1.4937939050668347, "learning_rate": 9.540947279651166e-06, "loss": 0.3564, "step": 7594 }, { "epoch": 0.2237673644358676, "grad_norm": 1.8500188863853944, "learning_rate": 9.54073201584698e-06, "loss": 0.4966, "step": 7595 }, { "epoch": 0.22379682689333116, "grad_norm": 1.7356875517992, "learning_rate": 9.540516704012313e-06, "loss": 0.5098, "step": 7596 }, { "epoch": 0.22382628935079474, "grad_norm": 1.4993453740712224, "learning_rate": 9.540301344149446e-06, "loss": 0.5369, "step": 7597 }, { "epoch": 0.2238557518082583, "grad_norm": 1.3090589396094894, "learning_rate": 9.540085936260654e-06, "loss": 0.3265, "step": 7598 }, { "epoch": 0.22388521426572192, "grad_norm": 1.4830084031989648, "learning_rate": 9.539870480348217e-06, "loss": 0.4613, "step": 7599 }, { "epoch": 0.2239146767231855, "grad_norm": 1.4909475487131434, "learning_rate": 9.539654976414415e-06, "loss": 0.4148, "step": 7600 }, { "epoch": 0.22394413918064907, "grad_norm": 1.837137479356822, "learning_rate": 9.539439424461524e-06, "loss": 0.5891, "step": 7601 }, { "epoch": 0.22397360163811264, "grad_norm": 1.8603802747189346, "learning_rate": 9.539223824491826e-06, "loss": 0.745, "step": 7602 }, { "epoch": 0.22400306409557622, "grad_norm": 1.4777433029375255, "learning_rate": 9.539008176507604e-06, "loss": 0.4565, "step": 7603 }, { "epoch": 0.2240325265530398, "grad_norm": 1.6766424591272902, "learning_rate": 9.538792480511136e-06, "loss": 0.513, "step": 7604 }, { "epoch": 0.22406198901050337, "grad_norm": 1.4400726540077218, "learning_rate": 9.538576736504704e-06, "loss": 0.3328, "step": 7605 }, { "epoch": 0.22409145146796694, "grad_norm": 1.6156316624219338, "learning_rate": 9.53836094449059e-06, "loss": 0.5417, "step": 7606 }, { "epoch": 0.22412091392543051, "grad_norm": 1.8183587887535289, "learning_rate": 9.538145104471078e-06, "loss": 0.7259, "step": 7607 }, { "epoch": 0.2241503763828941, "grad_norm": 1.5544247571111658, "learning_rate": 9.53792921644845e-06, "loss": 0.424, "step": 7608 }, { "epoch": 0.22417983884035766, "grad_norm": 1.6861787096882965, "learning_rate": 9.53771328042499e-06, "loss": 0.5966, "step": 7609 }, { "epoch": 0.22420930129782124, "grad_norm": 1.759461382348439, "learning_rate": 9.53749729640298e-06, "loss": 0.5404, "step": 7610 }, { "epoch": 0.22423876375528481, "grad_norm": 1.6411074141386157, "learning_rate": 9.537281264384708e-06, "loss": 0.516, "step": 7611 }, { "epoch": 0.22426822621274842, "grad_norm": 1.6106270385843227, "learning_rate": 9.537065184372459e-06, "loss": 0.477, "step": 7612 }, { "epoch": 0.224297688670212, "grad_norm": 1.6176035141860305, "learning_rate": 9.536849056368514e-06, "loss": 0.5964, "step": 7613 }, { "epoch": 0.22432715112767557, "grad_norm": 1.6568981440619532, "learning_rate": 9.536632880375165e-06, "loss": 0.6605, "step": 7614 }, { "epoch": 0.22435661358513914, "grad_norm": 1.697077636680749, "learning_rate": 9.536416656394694e-06, "loss": 0.6877, "step": 7615 }, { "epoch": 0.22438607604260272, "grad_norm": 1.509755249822036, "learning_rate": 9.53620038442939e-06, "loss": 0.4393, "step": 7616 }, { "epoch": 0.2244155385000663, "grad_norm": 1.4456053942448464, "learning_rate": 9.535984064481543e-06, "loss": 0.3892, "step": 7617 }, { "epoch": 0.22444500095752987, "grad_norm": 1.615588531459417, "learning_rate": 9.535767696553437e-06, "loss": 0.6357, "step": 7618 }, { "epoch": 0.22447446341499344, "grad_norm": 1.5456242447424575, "learning_rate": 9.535551280647364e-06, "loss": 0.4025, "step": 7619 }, { "epoch": 0.22450392587245702, "grad_norm": 1.922834949501023, "learning_rate": 9.535334816765612e-06, "loss": 0.7953, "step": 7620 }, { "epoch": 0.2245333883299206, "grad_norm": 1.5684585410790866, "learning_rate": 9.535118304910469e-06, "loss": 0.4412, "step": 7621 }, { "epoch": 0.22456285078738417, "grad_norm": 1.684593629265065, "learning_rate": 9.534901745084226e-06, "loss": 0.4463, "step": 7622 }, { "epoch": 0.22459231324484774, "grad_norm": 1.4953260978405325, "learning_rate": 9.534685137289176e-06, "loss": 0.5113, "step": 7623 }, { "epoch": 0.22462177570231132, "grad_norm": 1.6042608455367153, "learning_rate": 9.53446848152761e-06, "loss": 0.5504, "step": 7624 }, { "epoch": 0.22465123815977492, "grad_norm": 1.9266194594068475, "learning_rate": 9.534251777801814e-06, "loss": 0.5777, "step": 7625 }, { "epoch": 0.2246807006172385, "grad_norm": 1.6741090347761014, "learning_rate": 9.534035026114088e-06, "loss": 0.388, "step": 7626 }, { "epoch": 0.22471016307470207, "grad_norm": 1.7585193745399887, "learning_rate": 9.533818226466721e-06, "loss": 0.4401, "step": 7627 }, { "epoch": 0.22473962553216564, "grad_norm": 1.6269329890070385, "learning_rate": 9.533601378862007e-06, "loss": 0.4812, "step": 7628 }, { "epoch": 0.22476908798962922, "grad_norm": 1.5356668335505959, "learning_rate": 9.533384483302236e-06, "loss": 0.5502, "step": 7629 }, { "epoch": 0.2247985504470928, "grad_norm": 1.630262070911355, "learning_rate": 9.53316753978971e-06, "loss": 0.4641, "step": 7630 }, { "epoch": 0.22482801290455637, "grad_norm": 1.5576785494835204, "learning_rate": 9.532950548326716e-06, "loss": 0.5437, "step": 7631 }, { "epoch": 0.22485747536201994, "grad_norm": 1.6328111436743682, "learning_rate": 9.532733508915555e-06, "loss": 0.373, "step": 7632 }, { "epoch": 0.22488693781948352, "grad_norm": 1.6016939811261974, "learning_rate": 9.532516421558517e-06, "loss": 0.4885, "step": 7633 }, { "epoch": 0.2249164002769471, "grad_norm": 1.5007847814726145, "learning_rate": 9.532299286257904e-06, "loss": 0.5056, "step": 7634 }, { "epoch": 0.22494586273441067, "grad_norm": 1.4833111763673201, "learning_rate": 9.53208210301601e-06, "loss": 0.4715, "step": 7635 }, { "epoch": 0.22497532519187424, "grad_norm": 1.9926499045201946, "learning_rate": 9.531864871835133e-06, "loss": 0.5351, "step": 7636 }, { "epoch": 0.22500478764933785, "grad_norm": 1.60133093737985, "learning_rate": 9.531647592717572e-06, "loss": 0.6299, "step": 7637 }, { "epoch": 0.22503425010680142, "grad_norm": 1.6247896414970775, "learning_rate": 9.531430265665621e-06, "loss": 0.4202, "step": 7638 }, { "epoch": 0.225063712564265, "grad_norm": 1.7152216711324115, "learning_rate": 9.531212890681582e-06, "loss": 0.5772, "step": 7639 }, { "epoch": 0.22509317502172857, "grad_norm": 1.4446741548382303, "learning_rate": 9.530995467767755e-06, "loss": 0.5381, "step": 7640 }, { "epoch": 0.22512263747919214, "grad_norm": 1.482086283127292, "learning_rate": 9.53077799692644e-06, "loss": 0.4959, "step": 7641 }, { "epoch": 0.22515209993665572, "grad_norm": 1.330646311527593, "learning_rate": 9.530560478159933e-06, "loss": 0.396, "step": 7642 }, { "epoch": 0.2251815623941193, "grad_norm": 1.5762558207117876, "learning_rate": 9.53034291147054e-06, "loss": 0.4286, "step": 7643 }, { "epoch": 0.22521102485158287, "grad_norm": 1.4067333203610337, "learning_rate": 9.530125296860559e-06, "loss": 0.4515, "step": 7644 }, { "epoch": 0.22524048730904644, "grad_norm": 1.8114031158073576, "learning_rate": 9.529907634332296e-06, "loss": 0.4048, "step": 7645 }, { "epoch": 0.22526994976651002, "grad_norm": 1.5119526079209487, "learning_rate": 9.529689923888048e-06, "loss": 0.6033, "step": 7646 }, { "epoch": 0.2252994122239736, "grad_norm": 1.4828030566958035, "learning_rate": 9.529472165530122e-06, "loss": 0.5618, "step": 7647 }, { "epoch": 0.22532887468143717, "grad_norm": 1.4494358748077218, "learning_rate": 9.529254359260818e-06, "loss": 0.4862, "step": 7648 }, { "epoch": 0.22535833713890074, "grad_norm": 1.3522441119316286, "learning_rate": 9.529036505082443e-06, "loss": 0.377, "step": 7649 }, { "epoch": 0.22538779959636435, "grad_norm": 1.7109793034387177, "learning_rate": 9.5288186029973e-06, "loss": 0.5296, "step": 7650 }, { "epoch": 0.22541726205382792, "grad_norm": 1.5400695382525365, "learning_rate": 9.528600653007695e-06, "loss": 0.4985, "step": 7651 }, { "epoch": 0.2254467245112915, "grad_norm": 1.6241395674508168, "learning_rate": 9.528382655115931e-06, "loss": 0.6099, "step": 7652 }, { "epoch": 0.22547618696875507, "grad_norm": 1.5296368737477302, "learning_rate": 9.528164609324316e-06, "loss": 0.5497, "step": 7653 }, { "epoch": 0.22550564942621865, "grad_norm": 1.772938108274568, "learning_rate": 9.527946515635155e-06, "loss": 0.4338, "step": 7654 }, { "epoch": 0.22553511188368222, "grad_norm": 1.4482970492277218, "learning_rate": 9.527728374050756e-06, "loss": 0.5248, "step": 7655 }, { "epoch": 0.2255645743411458, "grad_norm": 1.5595407226813436, "learning_rate": 9.527510184573428e-06, "loss": 0.4645, "step": 7656 }, { "epoch": 0.22559403679860937, "grad_norm": 1.5964445894238934, "learning_rate": 9.527291947205474e-06, "loss": 0.571, "step": 7657 }, { "epoch": 0.22562349925607295, "grad_norm": 1.3959739088905394, "learning_rate": 9.527073661949206e-06, "loss": 0.463, "step": 7658 }, { "epoch": 0.22565296171353652, "grad_norm": 1.7180376186004107, "learning_rate": 9.526855328806934e-06, "loss": 0.5162, "step": 7659 }, { "epoch": 0.2256824241710001, "grad_norm": 1.4808424644135474, "learning_rate": 9.526636947780965e-06, "loss": 0.5175, "step": 7660 }, { "epoch": 0.22571188662846367, "grad_norm": 1.5162325432880674, "learning_rate": 9.52641851887361e-06, "loss": 0.4546, "step": 7661 }, { "epoch": 0.22574134908592725, "grad_norm": 1.6396722729772653, "learning_rate": 9.52620004208718e-06, "loss": 0.5417, "step": 7662 }, { "epoch": 0.22577081154339085, "grad_norm": 1.5411936066607896, "learning_rate": 9.525981517423985e-06, "loss": 0.3298, "step": 7663 }, { "epoch": 0.22580027400085442, "grad_norm": 1.6120736194011431, "learning_rate": 9.525762944886336e-06, "loss": 0.4783, "step": 7664 }, { "epoch": 0.225829736458318, "grad_norm": 1.6323542886120253, "learning_rate": 9.525544324476547e-06, "loss": 0.5684, "step": 7665 }, { "epoch": 0.22585919891578157, "grad_norm": 1.7428266533548282, "learning_rate": 9.52532565619693e-06, "loss": 0.6259, "step": 7666 }, { "epoch": 0.22588866137324515, "grad_norm": 1.612010809550719, "learning_rate": 9.525106940049797e-06, "loss": 0.5724, "step": 7667 }, { "epoch": 0.22591812383070872, "grad_norm": 1.645012413491889, "learning_rate": 9.52488817603746e-06, "loss": 0.6007, "step": 7668 }, { "epoch": 0.2259475862881723, "grad_norm": 2.0558540300899026, "learning_rate": 9.524669364162236e-06, "loss": 0.5073, "step": 7669 }, { "epoch": 0.22597704874563587, "grad_norm": 1.5493305502755843, "learning_rate": 9.52445050442644e-06, "loss": 0.5541, "step": 7670 }, { "epoch": 0.22600651120309945, "grad_norm": 1.5353112759642957, "learning_rate": 9.524231596832384e-06, "loss": 0.5546, "step": 7671 }, { "epoch": 0.22603597366056302, "grad_norm": 1.5580748091642187, "learning_rate": 9.524012641382382e-06, "loss": 0.5538, "step": 7672 }, { "epoch": 0.2260654361180266, "grad_norm": 1.7024548861889137, "learning_rate": 9.523793638078758e-06, "loss": 0.4754, "step": 7673 }, { "epoch": 0.22609489857549017, "grad_norm": 1.5238375684745438, "learning_rate": 9.52357458692382e-06, "loss": 0.6128, "step": 7674 }, { "epoch": 0.22612436103295375, "grad_norm": 1.5981670828784686, "learning_rate": 9.523355487919891e-06, "loss": 0.4096, "step": 7675 }, { "epoch": 0.22615382349041735, "grad_norm": 1.6218767815181554, "learning_rate": 9.523136341069286e-06, "loss": 0.4603, "step": 7676 }, { "epoch": 0.22618328594788092, "grad_norm": 1.6310605441468813, "learning_rate": 9.522917146374322e-06, "loss": 0.5894, "step": 7677 }, { "epoch": 0.2262127484053445, "grad_norm": 1.6669238291104131, "learning_rate": 9.52269790383732e-06, "loss": 0.4726, "step": 7678 }, { "epoch": 0.22624221086280807, "grad_norm": 1.5587555007176594, "learning_rate": 9.522478613460597e-06, "loss": 0.4977, "step": 7679 }, { "epoch": 0.22627167332027165, "grad_norm": 1.587458119021371, "learning_rate": 9.522259275246473e-06, "loss": 0.607, "step": 7680 }, { "epoch": 0.22630113577773522, "grad_norm": 1.4833036672865312, "learning_rate": 9.52203988919727e-06, "loss": 0.401, "step": 7681 }, { "epoch": 0.2263305982351988, "grad_norm": 1.5139329458261277, "learning_rate": 9.521820455315307e-06, "loss": 0.4582, "step": 7682 }, { "epoch": 0.22636006069266237, "grad_norm": 1.5861497903540231, "learning_rate": 9.521600973602904e-06, "loss": 0.4234, "step": 7683 }, { "epoch": 0.22638952315012595, "grad_norm": 1.612971987276373, "learning_rate": 9.521381444062386e-06, "loss": 0.5918, "step": 7684 }, { "epoch": 0.22641898560758952, "grad_norm": 1.5523571107628495, "learning_rate": 9.52116186669607e-06, "loss": 0.3819, "step": 7685 }, { "epoch": 0.2264484480650531, "grad_norm": 1.932044529800675, "learning_rate": 9.520942241506283e-06, "loss": 0.4453, "step": 7686 }, { "epoch": 0.22647791052251667, "grad_norm": 1.5608427804172391, "learning_rate": 9.520722568495348e-06, "loss": 0.4984, "step": 7687 }, { "epoch": 0.22650737297998025, "grad_norm": 1.7150314573162617, "learning_rate": 9.520502847665586e-06, "loss": 0.4595, "step": 7688 }, { "epoch": 0.22653683543744385, "grad_norm": 1.471991702544529, "learning_rate": 9.520283079019324e-06, "loss": 0.4272, "step": 7689 }, { "epoch": 0.22656629789490743, "grad_norm": 1.5822318408308438, "learning_rate": 9.520063262558885e-06, "loss": 0.5594, "step": 7690 }, { "epoch": 0.226595760352371, "grad_norm": 1.884776192556461, "learning_rate": 9.519843398286592e-06, "loss": 0.5417, "step": 7691 }, { "epoch": 0.22662522280983458, "grad_norm": 1.5711107024323872, "learning_rate": 9.519623486204775e-06, "loss": 0.5351, "step": 7692 }, { "epoch": 0.22665468526729815, "grad_norm": 1.6830042168117738, "learning_rate": 9.519403526315759e-06, "loss": 0.5172, "step": 7693 }, { "epoch": 0.22668414772476173, "grad_norm": 1.662364169914509, "learning_rate": 9.519183518621869e-06, "loss": 0.3962, "step": 7694 }, { "epoch": 0.2267136101822253, "grad_norm": 1.4223428076107971, "learning_rate": 9.518963463125432e-06, "loss": 0.384, "step": 7695 }, { "epoch": 0.22674307263968888, "grad_norm": 1.7303773243318532, "learning_rate": 9.518743359828776e-06, "loss": 0.6333, "step": 7696 }, { "epoch": 0.22677253509715245, "grad_norm": 1.8243274608998534, "learning_rate": 9.518523208734231e-06, "loss": 0.4198, "step": 7697 }, { "epoch": 0.22680199755461602, "grad_norm": 1.6521243199291096, "learning_rate": 9.518303009844125e-06, "loss": 0.5649, "step": 7698 }, { "epoch": 0.2268314600120796, "grad_norm": 1.9061029719929499, "learning_rate": 9.518082763160787e-06, "loss": 0.669, "step": 7699 }, { "epoch": 0.22686092246954317, "grad_norm": 1.7517155455654554, "learning_rate": 9.517862468686545e-06, "loss": 0.4206, "step": 7700 }, { "epoch": 0.22689038492700675, "grad_norm": 1.8575210612214195, "learning_rate": 9.517642126423733e-06, "loss": 0.6531, "step": 7701 }, { "epoch": 0.22691984738447035, "grad_norm": 1.6112624031074987, "learning_rate": 9.51742173637468e-06, "loss": 0.5363, "step": 7702 }, { "epoch": 0.22694930984193393, "grad_norm": 1.6135550355754462, "learning_rate": 9.517201298541713e-06, "loss": 0.4377, "step": 7703 }, { "epoch": 0.2269787722993975, "grad_norm": 1.74127228668789, "learning_rate": 9.51698081292717e-06, "loss": 0.6206, "step": 7704 }, { "epoch": 0.22700823475686108, "grad_norm": 1.697514987242757, "learning_rate": 9.516760279533379e-06, "loss": 0.5377, "step": 7705 }, { "epoch": 0.22703769721432465, "grad_norm": 1.7948970141177774, "learning_rate": 9.516539698362674e-06, "loss": 0.5319, "step": 7706 }, { "epoch": 0.22706715967178823, "grad_norm": 1.3030394769235778, "learning_rate": 9.51631906941739e-06, "loss": 0.3589, "step": 7707 }, { "epoch": 0.2270966221292518, "grad_norm": 1.619626364079008, "learning_rate": 9.516098392699858e-06, "loss": 0.6793, "step": 7708 }, { "epoch": 0.22712608458671538, "grad_norm": 1.4862514464774246, "learning_rate": 9.515877668212414e-06, "loss": 0.5688, "step": 7709 }, { "epoch": 0.22715554704417895, "grad_norm": 1.53047404860517, "learning_rate": 9.515656895957393e-06, "loss": 0.4569, "step": 7710 }, { "epoch": 0.22718500950164253, "grad_norm": 1.7798419227978195, "learning_rate": 9.51543607593713e-06, "loss": 0.6241, "step": 7711 }, { "epoch": 0.2272144719591061, "grad_norm": 1.5331376679181936, "learning_rate": 9.515215208153958e-06, "loss": 0.564, "step": 7712 }, { "epoch": 0.22724393441656968, "grad_norm": 1.8106589681338776, "learning_rate": 9.514994292610217e-06, "loss": 0.4813, "step": 7713 }, { "epoch": 0.22727339687403325, "grad_norm": 2.1229433074049338, "learning_rate": 9.514773329308243e-06, "loss": 0.7555, "step": 7714 }, { "epoch": 0.22730285933149685, "grad_norm": 1.6543402187115614, "learning_rate": 9.514552318250372e-06, "loss": 0.5073, "step": 7715 }, { "epoch": 0.22733232178896043, "grad_norm": 1.6233282226860364, "learning_rate": 9.51433125943894e-06, "loss": 0.5076, "step": 7716 }, { "epoch": 0.227361784246424, "grad_norm": 1.6695028544694557, "learning_rate": 9.51411015287629e-06, "loss": 0.4865, "step": 7717 }, { "epoch": 0.22739124670388758, "grad_norm": 1.7647924439045224, "learning_rate": 9.513888998564759e-06, "loss": 0.5932, "step": 7718 }, { "epoch": 0.22742070916135115, "grad_norm": 1.7133075280018637, "learning_rate": 9.513667796506686e-06, "loss": 0.5381, "step": 7719 }, { "epoch": 0.22745017161881473, "grad_norm": 1.4831343273410478, "learning_rate": 9.51344654670441e-06, "loss": 0.3092, "step": 7720 }, { "epoch": 0.2274796340762783, "grad_norm": 1.4654146426715087, "learning_rate": 9.513225249160273e-06, "loss": 0.4236, "step": 7721 }, { "epoch": 0.22750909653374188, "grad_norm": 1.719143763825658, "learning_rate": 9.513003903876614e-06, "loss": 0.6764, "step": 7722 }, { "epoch": 0.22753855899120545, "grad_norm": 1.5695373174814213, "learning_rate": 9.512782510855774e-06, "loss": 0.556, "step": 7723 }, { "epoch": 0.22756802144866903, "grad_norm": 1.6274570121718799, "learning_rate": 9.512561070100097e-06, "loss": 0.5124, "step": 7724 }, { "epoch": 0.2275974839061326, "grad_norm": 1.6457767702013646, "learning_rate": 9.512339581611923e-06, "loss": 0.4676, "step": 7725 }, { "epoch": 0.22762694636359618, "grad_norm": 1.4699680619729436, "learning_rate": 9.512118045393598e-06, "loss": 0.4043, "step": 7726 }, { "epoch": 0.22765640882105975, "grad_norm": 1.6350866711393526, "learning_rate": 9.511896461447463e-06, "loss": 0.4609, "step": 7727 }, { "epoch": 0.22768587127852336, "grad_norm": 1.5810040174003008, "learning_rate": 9.51167482977586e-06, "loss": 0.5959, "step": 7728 }, { "epoch": 0.22771533373598693, "grad_norm": 1.9200689758192833, "learning_rate": 9.511453150381139e-06, "loss": 0.5404, "step": 7729 }, { "epoch": 0.2277447961934505, "grad_norm": 1.6123223428498497, "learning_rate": 9.51123142326564e-06, "loss": 0.5272, "step": 7730 }, { "epoch": 0.22777425865091408, "grad_norm": 1.5072898764354261, "learning_rate": 9.51100964843171e-06, "loss": 0.6003, "step": 7731 }, { "epoch": 0.22780372110837765, "grad_norm": 1.5758526140484939, "learning_rate": 9.510787825881693e-06, "loss": 0.4882, "step": 7732 }, { "epoch": 0.22783318356584123, "grad_norm": 1.7703519071467995, "learning_rate": 9.510565955617939e-06, "loss": 0.5621, "step": 7733 }, { "epoch": 0.2278626460233048, "grad_norm": 1.576086469576153, "learning_rate": 9.510344037642793e-06, "loss": 0.4134, "step": 7734 }, { "epoch": 0.22789210848076838, "grad_norm": 1.6924586904934866, "learning_rate": 9.5101220719586e-06, "loss": 0.521, "step": 7735 }, { "epoch": 0.22792157093823195, "grad_norm": 1.5068122028059427, "learning_rate": 9.509900058567714e-06, "loss": 0.4146, "step": 7736 }, { "epoch": 0.22795103339569553, "grad_norm": 1.8141333126908112, "learning_rate": 9.509677997472477e-06, "loss": 0.4913, "step": 7737 }, { "epoch": 0.2279804958531591, "grad_norm": 1.6553148801832995, "learning_rate": 9.509455888675241e-06, "loss": 0.4341, "step": 7738 }, { "epoch": 0.22800995831062268, "grad_norm": 1.390838119993589, "learning_rate": 9.509233732178355e-06, "loss": 0.4323, "step": 7739 }, { "epoch": 0.22803942076808625, "grad_norm": 1.7079459085673108, "learning_rate": 9.50901152798417e-06, "loss": 0.689, "step": 7740 }, { "epoch": 0.22806888322554986, "grad_norm": 1.5590993561706958, "learning_rate": 9.508789276095034e-06, "loss": 0.4933, "step": 7741 }, { "epoch": 0.22809834568301343, "grad_norm": 1.3147248736549282, "learning_rate": 9.508566976513298e-06, "loss": 0.2902, "step": 7742 }, { "epoch": 0.228127808140477, "grad_norm": 1.6955826878457418, "learning_rate": 9.508344629241317e-06, "loss": 0.5336, "step": 7743 }, { "epoch": 0.22815727059794058, "grad_norm": 1.787785353060018, "learning_rate": 9.508122234281439e-06, "loss": 0.6734, "step": 7744 }, { "epoch": 0.22818673305540416, "grad_norm": 1.63496955766081, "learning_rate": 9.507899791636018e-06, "loss": 0.5006, "step": 7745 }, { "epoch": 0.22821619551286773, "grad_norm": 1.4652365112790815, "learning_rate": 9.507677301307408e-06, "loss": 0.4599, "step": 7746 }, { "epoch": 0.2282456579703313, "grad_norm": 1.562067407641146, "learning_rate": 9.50745476329796e-06, "loss": 0.5549, "step": 7747 }, { "epoch": 0.22827512042779488, "grad_norm": 1.8763293263823477, "learning_rate": 9.50723217761003e-06, "loss": 0.5127, "step": 7748 }, { "epoch": 0.22830458288525846, "grad_norm": 1.7828121005882342, "learning_rate": 9.50700954424597e-06, "loss": 0.5124, "step": 7749 }, { "epoch": 0.22833404534272203, "grad_norm": 1.5857012211714456, "learning_rate": 9.506786863208138e-06, "loss": 0.5278, "step": 7750 }, { "epoch": 0.2283635078001856, "grad_norm": 1.69989782091549, "learning_rate": 9.506564134498887e-06, "loss": 0.5541, "step": 7751 }, { "epoch": 0.22839297025764918, "grad_norm": 1.6907122040389417, "learning_rate": 9.506341358120576e-06, "loss": 0.6538, "step": 7752 }, { "epoch": 0.22842243271511276, "grad_norm": 1.4669813307731403, "learning_rate": 9.506118534075559e-06, "loss": 0.5019, "step": 7753 }, { "epoch": 0.22845189517257636, "grad_norm": 1.6614665295592326, "learning_rate": 9.505895662366191e-06, "loss": 0.5478, "step": 7754 }, { "epoch": 0.22848135763003993, "grad_norm": 1.6026285639865776, "learning_rate": 9.505672742994834e-06, "loss": 0.3821, "step": 7755 }, { "epoch": 0.2285108200875035, "grad_norm": 1.4977059672400828, "learning_rate": 9.505449775963845e-06, "loss": 0.4271, "step": 7756 }, { "epoch": 0.22854028254496708, "grad_norm": 1.5645103392947128, "learning_rate": 9.505226761275579e-06, "loss": 0.4371, "step": 7757 }, { "epoch": 0.22856974500243066, "grad_norm": 1.6016353429839665, "learning_rate": 9.505003698932399e-06, "loss": 0.5623, "step": 7758 }, { "epoch": 0.22859920745989423, "grad_norm": 1.4479409342273397, "learning_rate": 9.504780588936661e-06, "loss": 0.5097, "step": 7759 }, { "epoch": 0.2286286699173578, "grad_norm": 1.502624569197409, "learning_rate": 9.504557431290728e-06, "loss": 0.3898, "step": 7760 }, { "epoch": 0.22865813237482138, "grad_norm": 1.9115334158097315, "learning_rate": 9.504334225996958e-06, "loss": 0.6231, "step": 7761 }, { "epoch": 0.22868759483228496, "grad_norm": 1.4335902984835711, "learning_rate": 9.504110973057714e-06, "loss": 0.5078, "step": 7762 }, { "epoch": 0.22871705728974853, "grad_norm": 1.6878352947581319, "learning_rate": 9.503887672475357e-06, "loss": 0.5855, "step": 7763 }, { "epoch": 0.2287465197472121, "grad_norm": 1.4950728634064068, "learning_rate": 9.50366432425225e-06, "loss": 0.4697, "step": 7764 }, { "epoch": 0.22877598220467568, "grad_norm": 1.6168922049585361, "learning_rate": 9.503440928390752e-06, "loss": 0.5331, "step": 7765 }, { "epoch": 0.22880544466213926, "grad_norm": 1.4278140010556222, "learning_rate": 9.50321748489323e-06, "loss": 0.5304, "step": 7766 }, { "epoch": 0.22883490711960286, "grad_norm": 1.5098088523761484, "learning_rate": 9.502993993762047e-06, "loss": 0.5304, "step": 7767 }, { "epoch": 0.22886436957706643, "grad_norm": 1.519234659963455, "learning_rate": 9.502770454999563e-06, "loss": 0.4604, "step": 7768 }, { "epoch": 0.22889383203453, "grad_norm": 1.7277888111379633, "learning_rate": 9.502546868608147e-06, "loss": 0.481, "step": 7769 }, { "epoch": 0.22892329449199358, "grad_norm": 1.6189054382844699, "learning_rate": 9.502323234590162e-06, "loss": 0.5137, "step": 7770 }, { "epoch": 0.22895275694945716, "grad_norm": 1.695602605581298, "learning_rate": 9.502099552947974e-06, "loss": 0.7329, "step": 7771 }, { "epoch": 0.22898221940692073, "grad_norm": 1.4440573446544913, "learning_rate": 9.50187582368395e-06, "loss": 0.4669, "step": 7772 }, { "epoch": 0.2290116818643843, "grad_norm": 1.4784576376181335, "learning_rate": 9.501652046800455e-06, "loss": 0.4788, "step": 7773 }, { "epoch": 0.22904114432184788, "grad_norm": 1.6206281238001943, "learning_rate": 9.501428222299855e-06, "loss": 0.3932, "step": 7774 }, { "epoch": 0.22907060677931146, "grad_norm": 1.6825687918405994, "learning_rate": 9.50120435018452e-06, "loss": 0.5444, "step": 7775 }, { "epoch": 0.22910006923677503, "grad_norm": 1.7453204517007224, "learning_rate": 9.500980430456819e-06, "loss": 0.6427, "step": 7776 }, { "epoch": 0.2291295316942386, "grad_norm": 1.5581216042040384, "learning_rate": 9.500756463119116e-06, "loss": 0.409, "step": 7777 }, { "epoch": 0.22915899415170218, "grad_norm": 1.4474531029063353, "learning_rate": 9.500532448173785e-06, "loss": 0.4998, "step": 7778 }, { "epoch": 0.22918845660916576, "grad_norm": 1.618066755473754, "learning_rate": 9.50030838562319e-06, "loss": 0.3639, "step": 7779 }, { "epoch": 0.22921791906662936, "grad_norm": 1.440824648018432, "learning_rate": 9.500084275469709e-06, "loss": 0.4237, "step": 7780 }, { "epoch": 0.22924738152409294, "grad_norm": 1.49021975399825, "learning_rate": 9.499860117715705e-06, "loss": 0.4055, "step": 7781 }, { "epoch": 0.2292768439815565, "grad_norm": 1.540468304032029, "learning_rate": 9.499635912363554e-06, "loss": 0.4548, "step": 7782 }, { "epoch": 0.22930630643902009, "grad_norm": 1.724675286780514, "learning_rate": 9.499411659415623e-06, "loss": 0.7072, "step": 7783 }, { "epoch": 0.22933576889648366, "grad_norm": 1.6349876287368839, "learning_rate": 9.499187358874288e-06, "loss": 0.4971, "step": 7784 }, { "epoch": 0.22936523135394724, "grad_norm": 1.701480960751296, "learning_rate": 9.498963010741922e-06, "loss": 0.5459, "step": 7785 }, { "epoch": 0.2293946938114108, "grad_norm": 1.542878154105965, "learning_rate": 9.498738615020894e-06, "loss": 0.4473, "step": 7786 }, { "epoch": 0.22942415626887439, "grad_norm": 1.5604772423675721, "learning_rate": 9.498514171713579e-06, "loss": 0.5206, "step": 7787 }, { "epoch": 0.22945361872633796, "grad_norm": 1.6243049095542634, "learning_rate": 9.498289680822354e-06, "loss": 0.4837, "step": 7788 }, { "epoch": 0.22948308118380153, "grad_norm": 1.487338734165057, "learning_rate": 9.498065142349593e-06, "loss": 0.4014, "step": 7789 }, { "epoch": 0.2295125436412651, "grad_norm": 1.5128448764717808, "learning_rate": 9.497840556297669e-06, "loss": 0.4566, "step": 7790 }, { "epoch": 0.22954200609872868, "grad_norm": 1.6685787328088262, "learning_rate": 9.497615922668957e-06, "loss": 0.5126, "step": 7791 }, { "epoch": 0.22957146855619226, "grad_norm": 1.8945631583774425, "learning_rate": 9.497391241465834e-06, "loss": 0.4874, "step": 7792 }, { "epoch": 0.22960093101365586, "grad_norm": 1.4405368528783098, "learning_rate": 9.497166512690679e-06, "loss": 0.4585, "step": 7793 }, { "epoch": 0.22963039347111944, "grad_norm": 1.5465219919683975, "learning_rate": 9.496941736345866e-06, "loss": 0.4518, "step": 7794 }, { "epoch": 0.229659855928583, "grad_norm": 1.5842232612058578, "learning_rate": 9.496716912433774e-06, "loss": 0.6033, "step": 7795 }, { "epoch": 0.2296893183860466, "grad_norm": 1.5056558788451007, "learning_rate": 9.49649204095678e-06, "loss": 0.5276, "step": 7796 }, { "epoch": 0.22971878084351016, "grad_norm": 1.690721556173659, "learning_rate": 9.496267121917266e-06, "loss": 0.5042, "step": 7797 }, { "epoch": 0.22974824330097374, "grad_norm": 1.4788817799894398, "learning_rate": 9.496042155317606e-06, "loss": 0.4522, "step": 7798 }, { "epoch": 0.2297777057584373, "grad_norm": 1.5605342908758615, "learning_rate": 9.495817141160183e-06, "loss": 0.4728, "step": 7799 }, { "epoch": 0.2298071682159009, "grad_norm": 1.4946651075490178, "learning_rate": 9.495592079447378e-06, "loss": 0.4357, "step": 7800 }, { "epoch": 0.22983663067336446, "grad_norm": 1.778603321258942, "learning_rate": 9.495366970181569e-06, "loss": 0.5479, "step": 7801 }, { "epoch": 0.22986609313082804, "grad_norm": 1.408814665673313, "learning_rate": 9.495141813365138e-06, "loss": 0.4515, "step": 7802 }, { "epoch": 0.2298955555882916, "grad_norm": 1.650789471691102, "learning_rate": 9.494916609000467e-06, "loss": 0.4285, "step": 7803 }, { "epoch": 0.2299250180457552, "grad_norm": 1.664337493216501, "learning_rate": 9.494691357089939e-06, "loss": 0.4327, "step": 7804 }, { "epoch": 0.22995448050321876, "grad_norm": 1.6694773292270475, "learning_rate": 9.494466057635934e-06, "loss": 0.5858, "step": 7805 }, { "epoch": 0.22998394296068236, "grad_norm": 1.5606149840461447, "learning_rate": 9.494240710640838e-06, "loss": 0.5057, "step": 7806 }, { "epoch": 0.23001340541814594, "grad_norm": 1.595075405746379, "learning_rate": 9.494015316107033e-06, "loss": 0.5983, "step": 7807 }, { "epoch": 0.2300428678756095, "grad_norm": 1.5172936890283222, "learning_rate": 9.493789874036903e-06, "loss": 0.5066, "step": 7808 }, { "epoch": 0.2300723303330731, "grad_norm": 1.7479614218373822, "learning_rate": 9.493564384432833e-06, "loss": 0.4973, "step": 7809 }, { "epoch": 0.23010179279053666, "grad_norm": 1.6048632098127917, "learning_rate": 9.493338847297208e-06, "loss": 0.42, "step": 7810 }, { "epoch": 0.23013125524800024, "grad_norm": 1.827052069383308, "learning_rate": 9.493113262632416e-06, "loss": 0.5936, "step": 7811 }, { "epoch": 0.2301607177054638, "grad_norm": 1.5686377191087388, "learning_rate": 9.492887630440842e-06, "loss": 0.5193, "step": 7812 }, { "epoch": 0.2301901801629274, "grad_norm": 1.7055995783332516, "learning_rate": 9.492661950724869e-06, "loss": 0.558, "step": 7813 }, { "epoch": 0.23021964262039096, "grad_norm": 1.6679287211012859, "learning_rate": 9.492436223486888e-06, "loss": 0.4964, "step": 7814 }, { "epoch": 0.23024910507785454, "grad_norm": 1.842057342947242, "learning_rate": 9.492210448729286e-06, "loss": 0.6562, "step": 7815 }, { "epoch": 0.2302785675353181, "grad_norm": 1.525975194817681, "learning_rate": 9.49198462645445e-06, "loss": 0.4125, "step": 7816 }, { "epoch": 0.2303080299927817, "grad_norm": 1.4417487443640917, "learning_rate": 9.491758756664772e-06, "loss": 0.4911, "step": 7817 }, { "epoch": 0.23033749245024526, "grad_norm": 1.4806361103599797, "learning_rate": 9.491532839362637e-06, "loss": 0.4442, "step": 7818 }, { "epoch": 0.23036695490770887, "grad_norm": 1.6754803049243565, "learning_rate": 9.491306874550437e-06, "loss": 0.5439, "step": 7819 }, { "epoch": 0.23039641736517244, "grad_norm": 2.1539467693623515, "learning_rate": 9.491080862230561e-06, "loss": 0.5414, "step": 7820 }, { "epoch": 0.23042587982263601, "grad_norm": 1.4606920501559177, "learning_rate": 9.4908548024054e-06, "loss": 0.5157, "step": 7821 }, { "epoch": 0.2304553422800996, "grad_norm": 1.438628550230148, "learning_rate": 9.490628695077347e-06, "loss": 0.4416, "step": 7822 }, { "epoch": 0.23048480473756316, "grad_norm": 1.7635317919089404, "learning_rate": 9.490402540248791e-06, "loss": 0.5196, "step": 7823 }, { "epoch": 0.23051426719502674, "grad_norm": 1.6628846724104511, "learning_rate": 9.490176337922126e-06, "loss": 0.5732, "step": 7824 }, { "epoch": 0.23054372965249031, "grad_norm": 1.4811159726990168, "learning_rate": 9.489950088099745e-06, "loss": 0.5228, "step": 7825 }, { "epoch": 0.2305731921099539, "grad_norm": 1.5665553597064406, "learning_rate": 9.48972379078404e-06, "loss": 0.4503, "step": 7826 }, { "epoch": 0.23060265456741746, "grad_norm": 1.5919937410957845, "learning_rate": 9.489497445977403e-06, "loss": 0.551, "step": 7827 }, { "epoch": 0.23063211702488104, "grad_norm": 1.5508116178874796, "learning_rate": 9.489271053682232e-06, "loss": 0.4985, "step": 7828 }, { "epoch": 0.23066157948234461, "grad_norm": 1.7337793317710006, "learning_rate": 9.48904461390092e-06, "loss": 0.4793, "step": 7829 }, { "epoch": 0.2306910419398082, "grad_norm": 1.6406958783998837, "learning_rate": 9.488818126635861e-06, "loss": 0.517, "step": 7830 }, { "epoch": 0.23072050439727176, "grad_norm": 1.421620579741531, "learning_rate": 9.488591591889454e-06, "loss": 0.4321, "step": 7831 }, { "epoch": 0.23074996685473537, "grad_norm": 1.564703551443078, "learning_rate": 9.488365009664092e-06, "loss": 0.5149, "step": 7832 }, { "epoch": 0.23077942931219894, "grad_norm": 1.5584539701022897, "learning_rate": 9.488138379962172e-06, "loss": 0.6814, "step": 7833 }, { "epoch": 0.23080889176966252, "grad_norm": 1.6142031052078754, "learning_rate": 9.487911702786092e-06, "loss": 0.4772, "step": 7834 }, { "epoch": 0.2308383542271261, "grad_norm": 1.718936156026283, "learning_rate": 9.48768497813825e-06, "loss": 0.6052, "step": 7835 }, { "epoch": 0.23086781668458967, "grad_norm": 1.671131653593264, "learning_rate": 9.487458206021046e-06, "loss": 0.5889, "step": 7836 }, { "epoch": 0.23089727914205324, "grad_norm": 1.703219924731219, "learning_rate": 9.487231386436874e-06, "loss": 0.5931, "step": 7837 }, { "epoch": 0.23092674159951682, "grad_norm": 1.6771507738541191, "learning_rate": 9.487004519388137e-06, "loss": 0.5261, "step": 7838 }, { "epoch": 0.2309562040569804, "grad_norm": 1.5636196441651535, "learning_rate": 9.486777604877233e-06, "loss": 0.5619, "step": 7839 }, { "epoch": 0.23098566651444397, "grad_norm": 1.559201388432243, "learning_rate": 9.486550642906564e-06, "loss": 0.44, "step": 7840 }, { "epoch": 0.23101512897190754, "grad_norm": 1.6884001861448856, "learning_rate": 9.486323633478529e-06, "loss": 0.5598, "step": 7841 }, { "epoch": 0.23104459142937112, "grad_norm": 1.8266414121854506, "learning_rate": 9.486096576595529e-06, "loss": 0.3468, "step": 7842 }, { "epoch": 0.2310740538868347, "grad_norm": 1.6074543091194247, "learning_rate": 9.485869472259967e-06, "loss": 0.3904, "step": 7843 }, { "epoch": 0.23110351634429827, "grad_norm": 1.6989965459756955, "learning_rate": 9.485642320474246e-06, "loss": 0.5244, "step": 7844 }, { "epoch": 0.23113297880176187, "grad_norm": 1.3950734014121655, "learning_rate": 9.485415121240767e-06, "loss": 0.3008, "step": 7845 }, { "epoch": 0.23116244125922544, "grad_norm": 1.538992544452406, "learning_rate": 9.485187874561934e-06, "loss": 0.4789, "step": 7846 }, { "epoch": 0.23119190371668902, "grad_norm": 1.4877260921328948, "learning_rate": 9.484960580440151e-06, "loss": 0.3906, "step": 7847 }, { "epoch": 0.2312213661741526, "grad_norm": 1.6602580093333177, "learning_rate": 9.48473323887782e-06, "loss": 0.6256, "step": 7848 }, { "epoch": 0.23125082863161617, "grad_norm": 1.6138781303411391, "learning_rate": 9.48450584987735e-06, "loss": 0.5907, "step": 7849 }, { "epoch": 0.23128029108907974, "grad_norm": 1.4130769329275918, "learning_rate": 9.484278413441142e-06, "loss": 0.5235, "step": 7850 }, { "epoch": 0.23130975354654332, "grad_norm": 1.5981829193879797, "learning_rate": 9.484050929571605e-06, "loss": 0.5607, "step": 7851 }, { "epoch": 0.2313392160040069, "grad_norm": 1.6106096298474217, "learning_rate": 9.483823398271141e-06, "loss": 0.5411, "step": 7852 }, { "epoch": 0.23136867846147047, "grad_norm": 1.469640471931089, "learning_rate": 9.483595819542162e-06, "loss": 0.6233, "step": 7853 }, { "epoch": 0.23139814091893404, "grad_norm": 1.7505539201007292, "learning_rate": 9.483368193387072e-06, "loss": 0.5895, "step": 7854 }, { "epoch": 0.23142760337639762, "grad_norm": 1.498996718149511, "learning_rate": 9.48314051980828e-06, "loss": 0.5272, "step": 7855 }, { "epoch": 0.2314570658338612, "grad_norm": 1.4017096059243948, "learning_rate": 9.482912798808195e-06, "loss": 0.4142, "step": 7856 }, { "epoch": 0.23148652829132477, "grad_norm": 1.8038510008249649, "learning_rate": 9.482685030389222e-06, "loss": 0.6128, "step": 7857 }, { "epoch": 0.23151599074878837, "grad_norm": 1.5079783527268278, "learning_rate": 9.482457214553776e-06, "loss": 0.5044, "step": 7858 }, { "epoch": 0.23154545320625194, "grad_norm": 1.6249101096611271, "learning_rate": 9.48222935130426e-06, "loss": 0.6163, "step": 7859 }, { "epoch": 0.23157491566371552, "grad_norm": 1.8038939640215288, "learning_rate": 9.482001440643091e-06, "loss": 0.4947, "step": 7860 }, { "epoch": 0.2316043781211791, "grad_norm": 1.6523972004741045, "learning_rate": 9.481773482572675e-06, "loss": 0.4918, "step": 7861 }, { "epoch": 0.23163384057864267, "grad_norm": 1.559599842175695, "learning_rate": 9.481545477095426e-06, "loss": 0.3932, "step": 7862 }, { "epoch": 0.23166330303610624, "grad_norm": 1.60005908932636, "learning_rate": 9.481317424213755e-06, "loss": 0.5039, "step": 7863 }, { "epoch": 0.23169276549356982, "grad_norm": 1.6454381799881013, "learning_rate": 9.481089323930073e-06, "loss": 0.5882, "step": 7864 }, { "epoch": 0.2317222279510334, "grad_norm": 1.6147974684427135, "learning_rate": 9.480861176246793e-06, "loss": 0.6275, "step": 7865 }, { "epoch": 0.23175169040849697, "grad_norm": 1.569855864179291, "learning_rate": 9.480632981166331e-06, "loss": 0.4819, "step": 7866 }, { "epoch": 0.23178115286596054, "grad_norm": 1.6137710164335843, "learning_rate": 9.480404738691098e-06, "loss": 0.4597, "step": 7867 }, { "epoch": 0.23181061532342412, "grad_norm": 1.651454092495614, "learning_rate": 9.480176448823508e-06, "loss": 0.516, "step": 7868 }, { "epoch": 0.2318400777808877, "grad_norm": 1.4728052977223645, "learning_rate": 9.47994811156598e-06, "loss": 0.5309, "step": 7869 }, { "epoch": 0.23186954023835127, "grad_norm": 1.4829776487389386, "learning_rate": 9.479719726920925e-06, "loss": 0.5133, "step": 7870 }, { "epoch": 0.23189900269581487, "grad_norm": 1.4345408105732949, "learning_rate": 9.479491294890756e-06, "loss": 0.3568, "step": 7871 }, { "epoch": 0.23192846515327845, "grad_norm": 1.582130720928851, "learning_rate": 9.479262815477897e-06, "loss": 0.4728, "step": 7872 }, { "epoch": 0.23195792761074202, "grad_norm": 1.5321205007771643, "learning_rate": 9.479034288684761e-06, "loss": 0.5314, "step": 7873 }, { "epoch": 0.2319873900682056, "grad_norm": 1.5354159953202613, "learning_rate": 9.478805714513765e-06, "loss": 0.5333, "step": 7874 }, { "epoch": 0.23201685252566917, "grad_norm": 1.4418225913030946, "learning_rate": 9.478577092967327e-06, "loss": 0.4181, "step": 7875 }, { "epoch": 0.23204631498313275, "grad_norm": 1.6393329267655823, "learning_rate": 9.478348424047865e-06, "loss": 0.4594, "step": 7876 }, { "epoch": 0.23207577744059632, "grad_norm": 1.6323155516514303, "learning_rate": 9.478119707757798e-06, "loss": 0.4788, "step": 7877 }, { "epoch": 0.2321052398980599, "grad_norm": 1.5768698841569466, "learning_rate": 9.477890944099546e-06, "loss": 0.4866, "step": 7878 }, { "epoch": 0.23213470235552347, "grad_norm": 1.6663752708812412, "learning_rate": 9.477662133075529e-06, "loss": 0.5558, "step": 7879 }, { "epoch": 0.23216416481298704, "grad_norm": 1.7010054864019146, "learning_rate": 9.477433274688165e-06, "loss": 0.606, "step": 7880 }, { "epoch": 0.23219362727045062, "grad_norm": 1.597402070780451, "learning_rate": 9.477204368939878e-06, "loss": 0.615, "step": 7881 }, { "epoch": 0.2322230897279142, "grad_norm": 1.6663300501527913, "learning_rate": 9.476975415833088e-06, "loss": 0.5745, "step": 7882 }, { "epoch": 0.23225255218537777, "grad_norm": 1.8289691719455432, "learning_rate": 9.476746415370213e-06, "loss": 0.6199, "step": 7883 }, { "epoch": 0.23228201464284137, "grad_norm": 1.4358489324371486, "learning_rate": 9.47651736755368e-06, "loss": 0.4046, "step": 7884 }, { "epoch": 0.23231147710030495, "grad_norm": 1.568072453957148, "learning_rate": 9.476288272385914e-06, "loss": 0.4214, "step": 7885 }, { "epoch": 0.23234093955776852, "grad_norm": 1.8105974068261892, "learning_rate": 9.476059129869333e-06, "loss": 0.6208, "step": 7886 }, { "epoch": 0.2323704020152321, "grad_norm": 1.485883969772131, "learning_rate": 9.475829940006361e-06, "loss": 0.4536, "step": 7887 }, { "epoch": 0.23239986447269567, "grad_norm": 1.5157551256261617, "learning_rate": 9.475600702799426e-06, "loss": 0.4207, "step": 7888 }, { "epoch": 0.23242932693015925, "grad_norm": 1.5081810828597242, "learning_rate": 9.475371418250949e-06, "loss": 0.5232, "step": 7889 }, { "epoch": 0.23245878938762282, "grad_norm": 1.5754913838256885, "learning_rate": 9.475142086363357e-06, "loss": 0.5296, "step": 7890 }, { "epoch": 0.2324882518450864, "grad_norm": 1.726592886941158, "learning_rate": 9.474912707139077e-06, "loss": 0.5133, "step": 7891 }, { "epoch": 0.23251771430254997, "grad_norm": 1.4975710145679646, "learning_rate": 9.474683280580534e-06, "loss": 0.4211, "step": 7892 }, { "epoch": 0.23254717676001355, "grad_norm": 1.7056188718486234, "learning_rate": 9.474453806690154e-06, "loss": 0.5883, "step": 7893 }, { "epoch": 0.23257663921747712, "grad_norm": 1.7325595791440027, "learning_rate": 9.474224285470366e-06, "loss": 0.4009, "step": 7894 }, { "epoch": 0.2326061016749407, "grad_norm": 1.6259067102521194, "learning_rate": 9.473994716923597e-06, "loss": 0.5456, "step": 7895 }, { "epoch": 0.23263556413240427, "grad_norm": 1.4224216284320816, "learning_rate": 9.473765101052277e-06, "loss": 0.444, "step": 7896 }, { "epoch": 0.23266502658986787, "grad_norm": 1.5412379013063273, "learning_rate": 9.473535437858831e-06, "loss": 0.5784, "step": 7897 }, { "epoch": 0.23269448904733145, "grad_norm": 1.562093575333968, "learning_rate": 9.473305727345692e-06, "loss": 0.428, "step": 7898 }, { "epoch": 0.23272395150479502, "grad_norm": 1.4587781234607156, "learning_rate": 9.473075969515286e-06, "loss": 0.4804, "step": 7899 }, { "epoch": 0.2327534139622586, "grad_norm": 1.5176650449628182, "learning_rate": 9.472846164370048e-06, "loss": 0.4592, "step": 7900 }, { "epoch": 0.23278287641972217, "grad_norm": 1.8433051277147174, "learning_rate": 9.472616311912405e-06, "loss": 0.5224, "step": 7901 }, { "epoch": 0.23281233887718575, "grad_norm": 1.696729844319023, "learning_rate": 9.472386412144791e-06, "loss": 0.6104, "step": 7902 }, { "epoch": 0.23284180133464932, "grad_norm": 2.070117673340512, "learning_rate": 9.472156465069635e-06, "loss": 0.5349, "step": 7903 }, { "epoch": 0.2328712637921129, "grad_norm": 1.6424617055048534, "learning_rate": 9.471926470689373e-06, "loss": 0.4775, "step": 7904 }, { "epoch": 0.23290072624957647, "grad_norm": 1.7682997971178351, "learning_rate": 9.471696429006433e-06, "loss": 0.5898, "step": 7905 }, { "epoch": 0.23293018870704005, "grad_norm": 1.489309555252059, "learning_rate": 9.471466340023251e-06, "loss": 0.437, "step": 7906 }, { "epoch": 0.23295965116450362, "grad_norm": 1.4886251718162986, "learning_rate": 9.471236203742264e-06, "loss": 0.3269, "step": 7907 }, { "epoch": 0.2329891136219672, "grad_norm": 1.46887858492526, "learning_rate": 9.4710060201659e-06, "loss": 0.4648, "step": 7908 }, { "epoch": 0.23301857607943077, "grad_norm": 1.5306555445100403, "learning_rate": 9.470775789296599e-06, "loss": 0.5186, "step": 7909 }, { "epoch": 0.23304803853689438, "grad_norm": 1.600501806976376, "learning_rate": 9.470545511136792e-06, "loss": 0.5244, "step": 7910 }, { "epoch": 0.23307750099435795, "grad_norm": 1.666980036156199, "learning_rate": 9.47031518568892e-06, "loss": 0.574, "step": 7911 }, { "epoch": 0.23310696345182153, "grad_norm": 1.4383167090849889, "learning_rate": 9.470084812955413e-06, "loss": 0.3227, "step": 7912 }, { "epoch": 0.2331364259092851, "grad_norm": 1.7920663624293358, "learning_rate": 9.469854392938713e-06, "loss": 0.6148, "step": 7913 }, { "epoch": 0.23316588836674867, "grad_norm": 1.4745854764379729, "learning_rate": 9.469623925641257e-06, "loss": 0.3035, "step": 7914 }, { "epoch": 0.23319535082421225, "grad_norm": 1.9878092746138578, "learning_rate": 9.469393411065478e-06, "loss": 0.4577, "step": 7915 }, { "epoch": 0.23322481328167582, "grad_norm": 1.5837400059046032, "learning_rate": 9.46916284921382e-06, "loss": 0.5429, "step": 7916 }, { "epoch": 0.2332542757391394, "grad_norm": 1.4703404349625704, "learning_rate": 9.468932240088717e-06, "loss": 0.3736, "step": 7917 }, { "epoch": 0.23328373819660297, "grad_norm": 1.6228117129240776, "learning_rate": 9.468701583692614e-06, "loss": 0.4996, "step": 7918 }, { "epoch": 0.23331320065406655, "grad_norm": 1.4228241540788538, "learning_rate": 9.468470880027944e-06, "loss": 0.4749, "step": 7919 }, { "epoch": 0.23334266311153012, "grad_norm": 1.7882308395433495, "learning_rate": 9.468240129097153e-06, "loss": 0.6283, "step": 7920 }, { "epoch": 0.2333721255689937, "grad_norm": 1.7793858398079354, "learning_rate": 9.46800933090268e-06, "loss": 0.7264, "step": 7921 }, { "epoch": 0.23340158802645727, "grad_norm": 1.8174612273450774, "learning_rate": 9.467778485446965e-06, "loss": 0.5776, "step": 7922 }, { "epoch": 0.23343105048392088, "grad_norm": 1.5839387559849574, "learning_rate": 9.46754759273245e-06, "loss": 0.5586, "step": 7923 }, { "epoch": 0.23346051294138445, "grad_norm": 1.636395956842524, "learning_rate": 9.46731665276158e-06, "loss": 0.5051, "step": 7924 }, { "epoch": 0.23348997539884803, "grad_norm": 1.826868463954119, "learning_rate": 9.467085665536796e-06, "loss": 0.5896, "step": 7925 }, { "epoch": 0.2335194378563116, "grad_norm": 1.452187594722913, "learning_rate": 9.466854631060539e-06, "loss": 0.4265, "step": 7926 }, { "epoch": 0.23354890031377518, "grad_norm": 1.341525222030818, "learning_rate": 9.466623549335257e-06, "loss": 0.416, "step": 7927 }, { "epoch": 0.23357836277123875, "grad_norm": 1.5633632283727776, "learning_rate": 9.466392420363391e-06, "loss": 0.531, "step": 7928 }, { "epoch": 0.23360782522870233, "grad_norm": 1.529434330548143, "learning_rate": 9.466161244147387e-06, "loss": 0.5364, "step": 7929 }, { "epoch": 0.2336372876861659, "grad_norm": 1.5747520819409746, "learning_rate": 9.465930020689692e-06, "loss": 0.5661, "step": 7930 }, { "epoch": 0.23366675014362948, "grad_norm": 1.5319175125055422, "learning_rate": 9.46569874999275e-06, "loss": 0.3986, "step": 7931 }, { "epoch": 0.23369621260109305, "grad_norm": 1.6982829672058324, "learning_rate": 9.465467432059005e-06, "loss": 0.5487, "step": 7932 }, { "epoch": 0.23372567505855663, "grad_norm": 1.6085796487502446, "learning_rate": 9.46523606689091e-06, "loss": 0.5106, "step": 7933 }, { "epoch": 0.2337551375160202, "grad_norm": 1.42625116738989, "learning_rate": 9.465004654490907e-06, "loss": 0.5447, "step": 7934 }, { "epoch": 0.23378459997348378, "grad_norm": 1.4942637867221704, "learning_rate": 9.464773194861445e-06, "loss": 0.5252, "step": 7935 }, { "epoch": 0.23381406243094738, "grad_norm": 1.9110825840913923, "learning_rate": 9.464541688004974e-06, "loss": 0.4544, "step": 7936 }, { "epoch": 0.23384352488841095, "grad_norm": 1.5471560079153757, "learning_rate": 9.464310133923941e-06, "loss": 0.4459, "step": 7937 }, { "epoch": 0.23387298734587453, "grad_norm": 1.6145867370000522, "learning_rate": 9.464078532620797e-06, "loss": 0.5709, "step": 7938 }, { "epoch": 0.2339024498033381, "grad_norm": 1.5765603611936825, "learning_rate": 9.46384688409799e-06, "loss": 0.4475, "step": 7939 }, { "epoch": 0.23393191226080168, "grad_norm": 1.561751299205796, "learning_rate": 9.46361518835797e-06, "loss": 0.4906, "step": 7940 }, { "epoch": 0.23396137471826525, "grad_norm": 1.5371334091951623, "learning_rate": 9.463383445403188e-06, "loss": 0.4828, "step": 7941 }, { "epoch": 0.23399083717572883, "grad_norm": 1.6558510723863464, "learning_rate": 9.463151655236101e-06, "loss": 0.4945, "step": 7942 }, { "epoch": 0.2340202996331924, "grad_norm": 1.6474197782904063, "learning_rate": 9.462919817859151e-06, "loss": 0.6005, "step": 7943 }, { "epoch": 0.23404976209065598, "grad_norm": 1.3695511004630472, "learning_rate": 9.462687933274797e-06, "loss": 0.3702, "step": 7944 }, { "epoch": 0.23407922454811955, "grad_norm": 1.4943953943459367, "learning_rate": 9.462456001485492e-06, "loss": 0.4936, "step": 7945 }, { "epoch": 0.23410868700558313, "grad_norm": 1.7128167525316702, "learning_rate": 9.462224022493685e-06, "loss": 0.4778, "step": 7946 }, { "epoch": 0.2341381494630467, "grad_norm": 1.5495959941065574, "learning_rate": 9.461991996301834e-06, "loss": 0.5136, "step": 7947 }, { "epoch": 0.23416761192051028, "grad_norm": 1.5079553443090432, "learning_rate": 9.461759922912389e-06, "loss": 0.4352, "step": 7948 }, { "epoch": 0.23419707437797388, "grad_norm": 1.7927769456841558, "learning_rate": 9.461527802327808e-06, "loss": 0.5351, "step": 7949 }, { "epoch": 0.23422653683543745, "grad_norm": 1.535848604062603, "learning_rate": 9.461295634550546e-06, "loss": 0.5208, "step": 7950 }, { "epoch": 0.23425599929290103, "grad_norm": 1.5750024005666088, "learning_rate": 9.46106341958306e-06, "loss": 0.3895, "step": 7951 }, { "epoch": 0.2342854617503646, "grad_norm": 1.6211132028596291, "learning_rate": 9.460831157427803e-06, "loss": 0.5625, "step": 7952 }, { "epoch": 0.23431492420782818, "grad_norm": 1.8068876209593336, "learning_rate": 9.460598848087234e-06, "loss": 0.5121, "step": 7953 }, { "epoch": 0.23434438666529175, "grad_norm": 1.6294501362479832, "learning_rate": 9.460366491563811e-06, "loss": 0.5216, "step": 7954 }, { "epoch": 0.23437384912275533, "grad_norm": 2.096990535635007, "learning_rate": 9.460134087859989e-06, "loss": 0.3686, "step": 7955 }, { "epoch": 0.2344033115802189, "grad_norm": 1.5501974389551243, "learning_rate": 9.459901636978228e-06, "loss": 0.4502, "step": 7956 }, { "epoch": 0.23443277403768248, "grad_norm": 1.6099590407060342, "learning_rate": 9.459669138920986e-06, "loss": 0.4519, "step": 7957 }, { "epoch": 0.23446223649514605, "grad_norm": 1.430505169120324, "learning_rate": 9.459436593690724e-06, "loss": 0.4707, "step": 7958 }, { "epoch": 0.23449169895260963, "grad_norm": 1.6135370786968297, "learning_rate": 9.4592040012899e-06, "loss": 0.519, "step": 7959 }, { "epoch": 0.2345211614100732, "grad_norm": 1.5798543845134843, "learning_rate": 9.458971361720976e-06, "loss": 0.5905, "step": 7960 }, { "epoch": 0.23455062386753678, "grad_norm": 1.4930333557627697, "learning_rate": 9.458738674986412e-06, "loss": 0.5571, "step": 7961 }, { "epoch": 0.23458008632500038, "grad_norm": 1.483986949197841, "learning_rate": 9.45850594108867e-06, "loss": 0.494, "step": 7962 }, { "epoch": 0.23460954878246396, "grad_norm": 1.4853087078279632, "learning_rate": 9.458273160030208e-06, "loss": 0.5726, "step": 7963 }, { "epoch": 0.23463901123992753, "grad_norm": 1.6021802279688755, "learning_rate": 9.458040331813493e-06, "loss": 0.6183, "step": 7964 }, { "epoch": 0.2346684736973911, "grad_norm": 1.4964073591415323, "learning_rate": 9.457807456440987e-06, "loss": 0.4665, "step": 7965 }, { "epoch": 0.23469793615485468, "grad_norm": 1.701930296202216, "learning_rate": 9.457574533915151e-06, "loss": 0.5557, "step": 7966 }, { "epoch": 0.23472739861231826, "grad_norm": 1.5458768110869867, "learning_rate": 9.457341564238449e-06, "loss": 0.5479, "step": 7967 }, { "epoch": 0.23475686106978183, "grad_norm": 1.4363435435193692, "learning_rate": 9.457108547413348e-06, "loss": 0.5459, "step": 7968 }, { "epoch": 0.2347863235272454, "grad_norm": 1.6184788719019478, "learning_rate": 9.456875483442309e-06, "loss": 0.527, "step": 7969 }, { "epoch": 0.23481578598470898, "grad_norm": 1.6770828396588853, "learning_rate": 9.456642372327803e-06, "loss": 0.5102, "step": 7970 }, { "epoch": 0.23484524844217256, "grad_norm": 1.5991714037174156, "learning_rate": 9.456409214072288e-06, "loss": 0.473, "step": 7971 }, { "epoch": 0.23487471089963613, "grad_norm": 1.718608948305388, "learning_rate": 9.456176008678237e-06, "loss": 0.5812, "step": 7972 }, { "epoch": 0.2349041733570997, "grad_norm": 1.5280775511870714, "learning_rate": 9.455942756148115e-06, "loss": 0.4128, "step": 7973 }, { "epoch": 0.23493363581456328, "grad_norm": 1.6628583845216345, "learning_rate": 9.455709456484388e-06, "loss": 0.5963, "step": 7974 }, { "epoch": 0.23496309827202688, "grad_norm": 1.780967505068791, "learning_rate": 9.455476109689524e-06, "loss": 0.6091, "step": 7975 }, { "epoch": 0.23499256072949046, "grad_norm": 1.5002137952636774, "learning_rate": 9.45524271576599e-06, "loss": 0.5026, "step": 7976 }, { "epoch": 0.23502202318695403, "grad_norm": 1.5468519975194706, "learning_rate": 9.455009274716257e-06, "loss": 0.4458, "step": 7977 }, { "epoch": 0.2350514856444176, "grad_norm": 1.4837604387471932, "learning_rate": 9.454775786542795e-06, "loss": 0.5124, "step": 7978 }, { "epoch": 0.23508094810188118, "grad_norm": 1.5208364681574056, "learning_rate": 9.454542251248072e-06, "loss": 0.5077, "step": 7979 }, { "epoch": 0.23511041055934476, "grad_norm": 1.4983979485598629, "learning_rate": 9.45430866883456e-06, "loss": 0.4791, "step": 7980 }, { "epoch": 0.23513987301680833, "grad_norm": 1.500972017669691, "learning_rate": 9.454075039304726e-06, "loss": 0.4436, "step": 7981 }, { "epoch": 0.2351693354742719, "grad_norm": 1.6132051448765627, "learning_rate": 9.453841362661042e-06, "loss": 0.5147, "step": 7982 }, { "epoch": 0.23519879793173548, "grad_norm": 1.612198077055355, "learning_rate": 9.453607638905984e-06, "loss": 0.5448, "step": 7983 }, { "epoch": 0.23522826038919906, "grad_norm": 1.8496912857233816, "learning_rate": 9.453373868042022e-06, "loss": 0.6615, "step": 7984 }, { "epoch": 0.23525772284666263, "grad_norm": 1.7957040445704722, "learning_rate": 9.453140050071628e-06, "loss": 0.5988, "step": 7985 }, { "epoch": 0.2352871853041262, "grad_norm": 1.4963017126885119, "learning_rate": 9.452906184997276e-06, "loss": 0.4987, "step": 7986 }, { "epoch": 0.23531664776158978, "grad_norm": 1.8566975223043924, "learning_rate": 9.452672272821439e-06, "loss": 0.5495, "step": 7987 }, { "epoch": 0.23534611021905338, "grad_norm": 1.478459772512386, "learning_rate": 9.452438313546591e-06, "loss": 0.5172, "step": 7988 }, { "epoch": 0.23537557267651696, "grad_norm": 1.4628619541199994, "learning_rate": 9.452204307175207e-06, "loss": 0.4178, "step": 7989 }, { "epoch": 0.23540503513398053, "grad_norm": 1.5774306692307856, "learning_rate": 9.451970253709766e-06, "loss": 0.6205, "step": 7990 }, { "epoch": 0.2354344975914441, "grad_norm": 1.4759053560651192, "learning_rate": 9.451736153152737e-06, "loss": 0.4414, "step": 7991 }, { "epoch": 0.23546396004890768, "grad_norm": 1.4459265016639222, "learning_rate": 9.451502005506602e-06, "loss": 0.4742, "step": 7992 }, { "epoch": 0.23549342250637126, "grad_norm": 1.4897709888193977, "learning_rate": 9.451267810773834e-06, "loss": 0.4721, "step": 7993 }, { "epoch": 0.23552288496383483, "grad_norm": 1.6557795901940624, "learning_rate": 9.451033568956911e-06, "loss": 0.5038, "step": 7994 }, { "epoch": 0.2355523474212984, "grad_norm": 1.85701505280186, "learning_rate": 9.450799280058313e-06, "loss": 0.6249, "step": 7995 }, { "epoch": 0.23558180987876198, "grad_norm": 1.4390847606277652, "learning_rate": 9.450564944080516e-06, "loss": 0.496, "step": 7996 }, { "epoch": 0.23561127233622556, "grad_norm": 1.7675013449006962, "learning_rate": 9.450330561026e-06, "loss": 0.5644, "step": 7997 }, { "epoch": 0.23564073479368913, "grad_norm": 1.6886629677171519, "learning_rate": 9.450096130897243e-06, "loss": 0.4937, "step": 7998 }, { "epoch": 0.2356701972511527, "grad_norm": 1.867415118054838, "learning_rate": 9.449861653696726e-06, "loss": 0.7051, "step": 7999 }, { "epoch": 0.23569965970861628, "grad_norm": 1.4917545140342363, "learning_rate": 9.449627129426926e-06, "loss": 0.5287, "step": 8000 }, { "epoch": 0.23572912216607989, "grad_norm": 1.6521564854094168, "learning_rate": 9.449392558090328e-06, "loss": 0.6075, "step": 8001 }, { "epoch": 0.23575858462354346, "grad_norm": 1.7128894650708892, "learning_rate": 9.449157939689412e-06, "loss": 0.4743, "step": 8002 }, { "epoch": 0.23578804708100704, "grad_norm": 1.5847078606682348, "learning_rate": 9.44892327422666e-06, "loss": 0.482, "step": 8003 }, { "epoch": 0.2358175095384706, "grad_norm": 1.5259069430204186, "learning_rate": 9.448688561704553e-06, "loss": 0.3943, "step": 8004 }, { "epoch": 0.23584697199593418, "grad_norm": 1.3672933894032053, "learning_rate": 9.448453802125574e-06, "loss": 0.4197, "step": 8005 }, { "epoch": 0.23587643445339776, "grad_norm": 1.674498176173808, "learning_rate": 9.448218995492205e-06, "loss": 0.528, "step": 8006 }, { "epoch": 0.23590589691086133, "grad_norm": 1.7009501950564052, "learning_rate": 9.447984141806932e-06, "loss": 0.5109, "step": 8007 }, { "epoch": 0.2359353593683249, "grad_norm": 1.8184868229208786, "learning_rate": 9.44774924107224e-06, "loss": 0.5156, "step": 8008 }, { "epoch": 0.23596482182578848, "grad_norm": 1.637873393765888, "learning_rate": 9.44751429329061e-06, "loss": 0.4668, "step": 8009 }, { "epoch": 0.23599428428325206, "grad_norm": 1.2849575204773314, "learning_rate": 9.447279298464528e-06, "loss": 0.3082, "step": 8010 }, { "epoch": 0.23602374674071563, "grad_norm": 1.46127969348948, "learning_rate": 9.447044256596483e-06, "loss": 0.5569, "step": 8011 }, { "epoch": 0.2360532091981792, "grad_norm": 1.7775266147404019, "learning_rate": 9.44680916768896e-06, "loss": 0.565, "step": 8012 }, { "epoch": 0.23608267165564278, "grad_norm": 1.5664825865785212, "learning_rate": 9.446574031744443e-06, "loss": 0.5036, "step": 8013 }, { "epoch": 0.2361121341131064, "grad_norm": 1.537620389580085, "learning_rate": 9.446338848765421e-06, "loss": 0.5316, "step": 8014 }, { "epoch": 0.23614159657056996, "grad_norm": 1.5133411052257804, "learning_rate": 9.446103618754384e-06, "loss": 0.5692, "step": 8015 }, { "epoch": 0.23617105902803354, "grad_norm": 1.5680675395156818, "learning_rate": 9.445868341713816e-06, "loss": 0.5274, "step": 8016 }, { "epoch": 0.2362005214854971, "grad_norm": 1.677817111045517, "learning_rate": 9.445633017646207e-06, "loss": 0.491, "step": 8017 }, { "epoch": 0.2362299839429607, "grad_norm": 1.779573507747482, "learning_rate": 9.445397646554048e-06, "loss": 0.5498, "step": 8018 }, { "epoch": 0.23625944640042426, "grad_norm": 1.6095568488224639, "learning_rate": 9.445162228439827e-06, "loss": 0.5937, "step": 8019 }, { "epoch": 0.23628890885788784, "grad_norm": 1.6558724738022828, "learning_rate": 9.444926763306035e-06, "loss": 0.6556, "step": 8020 }, { "epoch": 0.2363183713153514, "grad_norm": 1.55190904159019, "learning_rate": 9.444691251155162e-06, "loss": 0.5163, "step": 8021 }, { "epoch": 0.23634783377281499, "grad_norm": 1.506595747407147, "learning_rate": 9.4444556919897e-06, "loss": 0.5197, "step": 8022 }, { "epoch": 0.23637729623027856, "grad_norm": 1.4743696440154992, "learning_rate": 9.444220085812139e-06, "loss": 0.5127, "step": 8023 }, { "epoch": 0.23640675868774214, "grad_norm": 1.5399751669881168, "learning_rate": 9.443984432624972e-06, "loss": 0.5066, "step": 8024 }, { "epoch": 0.2364362211452057, "grad_norm": 1.6492006070192822, "learning_rate": 9.443748732430696e-06, "loss": 0.5221, "step": 8025 }, { "epoch": 0.23646568360266929, "grad_norm": 1.6626625397098416, "learning_rate": 9.443512985231795e-06, "loss": 0.5324, "step": 8026 }, { "epoch": 0.2364951460601329, "grad_norm": 1.623701663649926, "learning_rate": 9.44327719103077e-06, "loss": 0.4978, "step": 8027 }, { "epoch": 0.23652460851759646, "grad_norm": 1.6959748095574083, "learning_rate": 9.443041349830114e-06, "loss": 0.4935, "step": 8028 }, { "epoch": 0.23655407097506004, "grad_norm": 1.7761441286549768, "learning_rate": 9.44280546163232e-06, "loss": 0.3685, "step": 8029 }, { "epoch": 0.2365835334325236, "grad_norm": 1.4596728094737625, "learning_rate": 9.442569526439884e-06, "loss": 0.4232, "step": 8030 }, { "epoch": 0.2366129958899872, "grad_norm": 1.5938602251799114, "learning_rate": 9.4423335442553e-06, "loss": 0.5895, "step": 8031 }, { "epoch": 0.23664245834745076, "grad_norm": 1.7708797391263682, "learning_rate": 9.442097515081068e-06, "loss": 0.4495, "step": 8032 }, { "epoch": 0.23667192080491434, "grad_norm": 1.5420373894409756, "learning_rate": 9.44186143891968e-06, "loss": 0.5228, "step": 8033 }, { "epoch": 0.2367013832623779, "grad_norm": 1.6100557874900998, "learning_rate": 9.441625315773637e-06, "loss": 0.5991, "step": 8034 }, { "epoch": 0.2367308457198415, "grad_norm": 1.6348887246251402, "learning_rate": 9.441389145645434e-06, "loss": 0.4573, "step": 8035 }, { "epoch": 0.23676030817730506, "grad_norm": 1.5350860805095035, "learning_rate": 9.441152928537572e-06, "loss": 0.4296, "step": 8036 }, { "epoch": 0.23678977063476864, "grad_norm": 1.4686899906158368, "learning_rate": 9.440916664452546e-06, "loss": 0.3888, "step": 8037 }, { "epoch": 0.2368192330922322, "grad_norm": 1.825508876487071, "learning_rate": 9.440680353392857e-06, "loss": 0.632, "step": 8038 }, { "epoch": 0.2368486955496958, "grad_norm": 1.6720140024671424, "learning_rate": 9.440443995361004e-06, "loss": 0.6045, "step": 8039 }, { "epoch": 0.2368781580071594, "grad_norm": 1.5380744402972004, "learning_rate": 9.44020759035949e-06, "loss": 0.4586, "step": 8040 }, { "epoch": 0.23690762046462296, "grad_norm": 1.427843718476124, "learning_rate": 9.439971138390811e-06, "loss": 0.3811, "step": 8041 }, { "epoch": 0.23693708292208654, "grad_norm": 1.5136715121119915, "learning_rate": 9.439734639457471e-06, "loss": 0.4433, "step": 8042 }, { "epoch": 0.23696654537955011, "grad_norm": 1.5871706305150417, "learning_rate": 9.439498093561971e-06, "loss": 0.527, "step": 8043 }, { "epoch": 0.2369960078370137, "grad_norm": 1.8533305296456462, "learning_rate": 9.439261500706816e-06, "loss": 0.5744, "step": 8044 }, { "epoch": 0.23702547029447726, "grad_norm": 1.5891908602122267, "learning_rate": 9.439024860894502e-06, "loss": 0.4858, "step": 8045 }, { "epoch": 0.23705493275194084, "grad_norm": 1.5156226319660444, "learning_rate": 9.438788174127538e-06, "loss": 0.4752, "step": 8046 }, { "epoch": 0.2370843952094044, "grad_norm": 1.9238232703558429, "learning_rate": 9.438551440408424e-06, "loss": 0.4373, "step": 8047 }, { "epoch": 0.237113857666868, "grad_norm": 1.5543381464922568, "learning_rate": 9.438314659739667e-06, "loss": 0.5245, "step": 8048 }, { "epoch": 0.23714332012433156, "grad_norm": 1.3109758674887078, "learning_rate": 9.438077832123769e-06, "loss": 0.342, "step": 8049 }, { "epoch": 0.23717278258179514, "grad_norm": 2.096602597612729, "learning_rate": 9.437840957563236e-06, "loss": 0.5699, "step": 8050 }, { "epoch": 0.2372022450392587, "grad_norm": 1.5348716257680504, "learning_rate": 9.437604036060575e-06, "loss": 0.5099, "step": 8051 }, { "epoch": 0.2372317074967223, "grad_norm": 1.5309549650739671, "learning_rate": 9.43736706761829e-06, "loss": 0.4575, "step": 8052 }, { "epoch": 0.2372611699541859, "grad_norm": 1.7501443724770447, "learning_rate": 9.437130052238889e-06, "loss": 0.5965, "step": 8053 }, { "epoch": 0.23729063241164947, "grad_norm": 1.6769650262724263, "learning_rate": 9.436892989924877e-06, "loss": 0.5263, "step": 8054 }, { "epoch": 0.23732009486911304, "grad_norm": 1.6395551287084051, "learning_rate": 9.436655880678765e-06, "loss": 0.5438, "step": 8055 }, { "epoch": 0.23734955732657662, "grad_norm": 1.6599685089048937, "learning_rate": 9.43641872450306e-06, "loss": 0.4505, "step": 8056 }, { "epoch": 0.2373790197840402, "grad_norm": 1.4437326849326115, "learning_rate": 9.436181521400268e-06, "loss": 0.3994, "step": 8057 }, { "epoch": 0.23740848224150377, "grad_norm": 1.754102534628943, "learning_rate": 9.4359442713729e-06, "loss": 0.5716, "step": 8058 }, { "epoch": 0.23743794469896734, "grad_norm": 1.851197250998084, "learning_rate": 9.435706974423465e-06, "loss": 0.4164, "step": 8059 }, { "epoch": 0.23746740715643092, "grad_norm": 1.586931941411166, "learning_rate": 9.435469630554475e-06, "loss": 0.4223, "step": 8060 }, { "epoch": 0.2374968696138945, "grad_norm": 1.5870551058913582, "learning_rate": 9.435232239768437e-06, "loss": 0.4287, "step": 8061 }, { "epoch": 0.23752633207135807, "grad_norm": 1.4170865232763679, "learning_rate": 9.434994802067867e-06, "loss": 0.401, "step": 8062 }, { "epoch": 0.23755579452882164, "grad_norm": 1.4292839456298385, "learning_rate": 9.43475731745527e-06, "loss": 0.4324, "step": 8063 }, { "epoch": 0.23758525698628521, "grad_norm": 1.6414585471673278, "learning_rate": 9.434519785933165e-06, "loss": 0.4592, "step": 8064 }, { "epoch": 0.2376147194437488, "grad_norm": 1.650304014831042, "learning_rate": 9.434282207504059e-06, "loss": 0.6069, "step": 8065 }, { "epoch": 0.2376441819012124, "grad_norm": 1.7308441595405846, "learning_rate": 9.434044582170468e-06, "loss": 0.7705, "step": 8066 }, { "epoch": 0.23767364435867597, "grad_norm": 1.7209438202931746, "learning_rate": 9.433806909934904e-06, "loss": 0.538, "step": 8067 }, { "epoch": 0.23770310681613954, "grad_norm": 1.8037198876757772, "learning_rate": 9.433569190799883e-06, "loss": 0.5215, "step": 8068 }, { "epoch": 0.23773256927360312, "grad_norm": 1.8401720079405568, "learning_rate": 9.433331424767917e-06, "loss": 0.5643, "step": 8069 }, { "epoch": 0.2377620317310667, "grad_norm": 1.7861126752908707, "learning_rate": 9.433093611841522e-06, "loss": 0.4707, "step": 8070 }, { "epoch": 0.23779149418853027, "grad_norm": 1.4246418568852586, "learning_rate": 9.432855752023216e-06, "loss": 0.4999, "step": 8071 }, { "epoch": 0.23782095664599384, "grad_norm": 1.5996672631618765, "learning_rate": 9.43261784531551e-06, "loss": 0.5292, "step": 8072 }, { "epoch": 0.23785041910345742, "grad_norm": 1.6716336011899442, "learning_rate": 9.432379891720927e-06, "loss": 0.5293, "step": 8073 }, { "epoch": 0.237879881560921, "grad_norm": 1.5973132635253504, "learning_rate": 9.432141891241976e-06, "loss": 0.5129, "step": 8074 }, { "epoch": 0.23790934401838457, "grad_norm": 1.6052879042463941, "learning_rate": 9.431903843881182e-06, "loss": 0.556, "step": 8075 }, { "epoch": 0.23793880647584814, "grad_norm": 1.286782902100363, "learning_rate": 9.431665749641059e-06, "loss": 0.3069, "step": 8076 }, { "epoch": 0.23796826893331172, "grad_norm": 1.5235325436774552, "learning_rate": 9.431427608524126e-06, "loss": 0.4572, "step": 8077 }, { "epoch": 0.2379977313907753, "grad_norm": 1.5520814736698128, "learning_rate": 9.431189420532902e-06, "loss": 0.5105, "step": 8078 }, { "epoch": 0.2380271938482389, "grad_norm": 1.505894016970032, "learning_rate": 9.430951185669906e-06, "loss": 0.5206, "step": 8079 }, { "epoch": 0.23805665630570247, "grad_norm": 1.661877940515656, "learning_rate": 9.43071290393766e-06, "loss": 0.4887, "step": 8080 }, { "epoch": 0.23808611876316604, "grad_norm": 1.8315410602931017, "learning_rate": 9.430474575338684e-06, "loss": 0.3999, "step": 8081 }, { "epoch": 0.23811558122062962, "grad_norm": 1.5825584688734886, "learning_rate": 9.430236199875498e-06, "loss": 0.5447, "step": 8082 }, { "epoch": 0.2381450436780932, "grad_norm": 1.5641566201683283, "learning_rate": 9.42999777755062e-06, "loss": 0.5307, "step": 8083 }, { "epoch": 0.23817450613555677, "grad_norm": 1.548115789760373, "learning_rate": 9.42975930836658e-06, "loss": 0.4384, "step": 8084 }, { "epoch": 0.23820396859302034, "grad_norm": 1.5732711967863307, "learning_rate": 9.429520792325894e-06, "loss": 0.5855, "step": 8085 }, { "epoch": 0.23823343105048392, "grad_norm": 1.8348269229115586, "learning_rate": 9.429282229431087e-06, "loss": 0.4499, "step": 8086 }, { "epoch": 0.2382628935079475, "grad_norm": 1.689390302240217, "learning_rate": 9.429043619684683e-06, "loss": 0.6606, "step": 8087 }, { "epoch": 0.23829235596541107, "grad_norm": 1.5476931325769463, "learning_rate": 9.428804963089207e-06, "loss": 0.5512, "step": 8088 }, { "epoch": 0.23832181842287464, "grad_norm": 1.3563806684348059, "learning_rate": 9.428566259647178e-06, "loss": 0.4002, "step": 8089 }, { "epoch": 0.23835128088033822, "grad_norm": 1.616688108989648, "learning_rate": 9.428327509361128e-06, "loss": 0.4213, "step": 8090 }, { "epoch": 0.2383807433378018, "grad_norm": 1.5529368768786824, "learning_rate": 9.428088712233579e-06, "loss": 0.4297, "step": 8091 }, { "epoch": 0.2384102057952654, "grad_norm": 1.515870303149556, "learning_rate": 9.427849868267056e-06, "loss": 0.3985, "step": 8092 }, { "epoch": 0.23843966825272897, "grad_norm": 1.601476207023752, "learning_rate": 9.427610977464085e-06, "loss": 0.6117, "step": 8093 }, { "epoch": 0.23846913071019255, "grad_norm": 1.5477916253860005, "learning_rate": 9.427372039827197e-06, "loss": 0.5115, "step": 8094 }, { "epoch": 0.23849859316765612, "grad_norm": 1.691677095405598, "learning_rate": 9.427133055358916e-06, "loss": 0.5394, "step": 8095 }, { "epoch": 0.2385280556251197, "grad_norm": 1.6966259180257, "learning_rate": 9.42689402406177e-06, "loss": 0.6995, "step": 8096 }, { "epoch": 0.23855751808258327, "grad_norm": 1.7408968860477056, "learning_rate": 9.426654945938289e-06, "loss": 0.5846, "step": 8097 }, { "epoch": 0.23858698054004684, "grad_norm": 1.7019073454298939, "learning_rate": 9.426415820991002e-06, "loss": 0.5527, "step": 8098 }, { "epoch": 0.23861644299751042, "grad_norm": 1.7480140696927973, "learning_rate": 9.426176649222437e-06, "loss": 0.6524, "step": 8099 }, { "epoch": 0.238645905454974, "grad_norm": 1.5078964232555785, "learning_rate": 9.425937430635122e-06, "loss": 0.4858, "step": 8100 }, { "epoch": 0.23867536791243757, "grad_norm": 1.7057364769777914, "learning_rate": 9.425698165231591e-06, "loss": 0.5028, "step": 8101 }, { "epoch": 0.23870483036990114, "grad_norm": 1.5106589896229954, "learning_rate": 9.425458853014374e-06, "loss": 0.4045, "step": 8102 }, { "epoch": 0.23873429282736472, "grad_norm": 1.4720901803276933, "learning_rate": 9.425219493986003e-06, "loss": 0.5471, "step": 8103 }, { "epoch": 0.2387637552848283, "grad_norm": 1.5753329299524501, "learning_rate": 9.424980088149007e-06, "loss": 0.4187, "step": 8104 }, { "epoch": 0.2387932177422919, "grad_norm": 1.7299810123973427, "learning_rate": 9.42474063550592e-06, "loss": 0.4479, "step": 8105 }, { "epoch": 0.23882268019975547, "grad_norm": 1.4890606938887243, "learning_rate": 9.424501136059277e-06, "loss": 0.379, "step": 8106 }, { "epoch": 0.23885214265721905, "grad_norm": 1.6250941294345267, "learning_rate": 9.424261589811608e-06, "loss": 0.5641, "step": 8107 }, { "epoch": 0.23888160511468262, "grad_norm": 1.5494614135962639, "learning_rate": 9.424021996765449e-06, "loss": 0.6097, "step": 8108 }, { "epoch": 0.2389110675721462, "grad_norm": 1.6680225381379243, "learning_rate": 9.423782356923331e-06, "loss": 0.553, "step": 8109 }, { "epoch": 0.23894053002960977, "grad_norm": 1.3526882029408815, "learning_rate": 9.423542670287795e-06, "loss": 0.317, "step": 8110 }, { "epoch": 0.23896999248707335, "grad_norm": 1.852356877007818, "learning_rate": 9.423302936861371e-06, "loss": 0.6973, "step": 8111 }, { "epoch": 0.23899945494453692, "grad_norm": 1.5742951280959132, "learning_rate": 9.423063156646597e-06, "loss": 0.4657, "step": 8112 }, { "epoch": 0.2390289174020005, "grad_norm": 1.3634190596750297, "learning_rate": 9.42282332964601e-06, "loss": 0.3804, "step": 8113 }, { "epoch": 0.23905837985946407, "grad_norm": 1.3991854126753693, "learning_rate": 9.422583455862145e-06, "loss": 0.4801, "step": 8114 }, { "epoch": 0.23908784231692765, "grad_norm": 1.457713381728137, "learning_rate": 9.422343535297539e-06, "loss": 0.331, "step": 8115 }, { "epoch": 0.23911730477439122, "grad_norm": 1.4522999843358915, "learning_rate": 9.422103567954731e-06, "loss": 0.4043, "step": 8116 }, { "epoch": 0.2391467672318548, "grad_norm": 1.6427416382036235, "learning_rate": 9.42186355383626e-06, "loss": 0.5305, "step": 8117 }, { "epoch": 0.2391762296893184, "grad_norm": 1.698495587574686, "learning_rate": 9.421623492944662e-06, "loss": 0.5441, "step": 8118 }, { "epoch": 0.23920569214678197, "grad_norm": 1.5959193471084903, "learning_rate": 9.421383385282482e-06, "loss": 0.4863, "step": 8119 }, { "epoch": 0.23923515460424555, "grad_norm": 1.5510943840251565, "learning_rate": 9.421143230852251e-06, "loss": 0.533, "step": 8120 }, { "epoch": 0.23926461706170912, "grad_norm": 1.6997304468470271, "learning_rate": 9.420903029656519e-06, "loss": 0.4679, "step": 8121 }, { "epoch": 0.2392940795191727, "grad_norm": 1.7231163198291315, "learning_rate": 9.42066278169782e-06, "loss": 0.5846, "step": 8122 }, { "epoch": 0.23932354197663627, "grad_norm": 1.4475164618968583, "learning_rate": 9.420422486978698e-06, "loss": 0.4152, "step": 8123 }, { "epoch": 0.23935300443409985, "grad_norm": 1.7679220119933419, "learning_rate": 9.420182145501692e-06, "loss": 0.5466, "step": 8124 }, { "epoch": 0.23938246689156342, "grad_norm": 1.5416479309951958, "learning_rate": 9.419941757269349e-06, "loss": 0.406, "step": 8125 }, { "epoch": 0.239411929349027, "grad_norm": 1.4987621803473992, "learning_rate": 9.419701322284207e-06, "loss": 0.4707, "step": 8126 }, { "epoch": 0.23944139180649057, "grad_norm": 1.7501795451151887, "learning_rate": 9.419460840548813e-06, "loss": 0.6134, "step": 8127 }, { "epoch": 0.23947085426395415, "grad_norm": 1.5890113903668308, "learning_rate": 9.419220312065709e-06, "loss": 0.4962, "step": 8128 }, { "epoch": 0.23950031672141772, "grad_norm": 1.4849998994410953, "learning_rate": 9.418979736837439e-06, "loss": 0.3721, "step": 8129 }, { "epoch": 0.2395297791788813, "grad_norm": 1.4948370340692503, "learning_rate": 9.418739114866546e-06, "loss": 0.4849, "step": 8130 }, { "epoch": 0.2395592416363449, "grad_norm": 1.757847634894617, "learning_rate": 9.41849844615558e-06, "loss": 0.4484, "step": 8131 }, { "epoch": 0.23958870409380847, "grad_norm": 1.7706298291098252, "learning_rate": 9.418257730707083e-06, "loss": 0.5496, "step": 8132 }, { "epoch": 0.23961816655127205, "grad_norm": 1.4604992017238219, "learning_rate": 9.418016968523602e-06, "loss": 0.5154, "step": 8133 }, { "epoch": 0.23964762900873562, "grad_norm": 1.5007771638397072, "learning_rate": 9.417776159607686e-06, "loss": 0.5147, "step": 8134 }, { "epoch": 0.2396770914661992, "grad_norm": 1.6619086037499127, "learning_rate": 9.417535303961878e-06, "loss": 0.5133, "step": 8135 }, { "epoch": 0.23970655392366277, "grad_norm": 1.6723383956050348, "learning_rate": 9.417294401588728e-06, "loss": 0.5219, "step": 8136 }, { "epoch": 0.23973601638112635, "grad_norm": 1.4159946008746644, "learning_rate": 9.417053452490784e-06, "loss": 0.3787, "step": 8137 }, { "epoch": 0.23976547883858992, "grad_norm": 1.7113706985319181, "learning_rate": 9.416812456670596e-06, "loss": 0.6063, "step": 8138 }, { "epoch": 0.2397949412960535, "grad_norm": 1.716723128561238, "learning_rate": 9.416571414130711e-06, "loss": 0.5183, "step": 8139 }, { "epoch": 0.23982440375351707, "grad_norm": 1.5302733056901774, "learning_rate": 9.41633032487368e-06, "loss": 0.4951, "step": 8140 }, { "epoch": 0.23985386621098065, "grad_norm": 1.4627966952283167, "learning_rate": 9.416089188902052e-06, "loss": 0.4032, "step": 8141 }, { "epoch": 0.23988332866844422, "grad_norm": 1.4880337220184472, "learning_rate": 9.415848006218377e-06, "loss": 0.4494, "step": 8142 }, { "epoch": 0.2399127911259078, "grad_norm": 1.6458075306722133, "learning_rate": 9.41560677682521e-06, "loss": 0.5482, "step": 8143 }, { "epoch": 0.2399422535833714, "grad_norm": 1.5197750548671574, "learning_rate": 9.4153655007251e-06, "loss": 0.4876, "step": 8144 }, { "epoch": 0.23997171604083498, "grad_norm": 1.6333924979500125, "learning_rate": 9.415124177920598e-06, "loss": 0.6146, "step": 8145 }, { "epoch": 0.24000117849829855, "grad_norm": 1.5613224487622845, "learning_rate": 9.414882808414258e-06, "loss": 0.5821, "step": 8146 }, { "epoch": 0.24003064095576213, "grad_norm": 1.598424766315887, "learning_rate": 9.414641392208635e-06, "loss": 0.5965, "step": 8147 }, { "epoch": 0.2400601034132257, "grad_norm": 1.6917787297752682, "learning_rate": 9.414399929306279e-06, "loss": 0.5598, "step": 8148 }, { "epoch": 0.24008956587068928, "grad_norm": 1.6909953315964559, "learning_rate": 9.414158419709747e-06, "loss": 0.6301, "step": 8149 }, { "epoch": 0.24011902832815285, "grad_norm": 1.76643708682213, "learning_rate": 9.413916863421593e-06, "loss": 0.5603, "step": 8150 }, { "epoch": 0.24014849078561643, "grad_norm": 1.9815795666360243, "learning_rate": 9.41367526044437e-06, "loss": 0.5636, "step": 8151 }, { "epoch": 0.24017795324308, "grad_norm": 1.6203249428684812, "learning_rate": 9.413433610780636e-06, "loss": 0.5842, "step": 8152 }, { "epoch": 0.24020741570054358, "grad_norm": 1.7615858682823724, "learning_rate": 9.413191914432947e-06, "loss": 0.4951, "step": 8153 }, { "epoch": 0.24023687815800715, "grad_norm": 1.5866977542580827, "learning_rate": 9.412950171403857e-06, "loss": 0.3204, "step": 8154 }, { "epoch": 0.24026634061547072, "grad_norm": 1.82462153286358, "learning_rate": 9.412708381695927e-06, "loss": 0.4851, "step": 8155 }, { "epoch": 0.2402958030729343, "grad_norm": 1.5309259246674838, "learning_rate": 9.41246654531171e-06, "loss": 0.4751, "step": 8156 }, { "epoch": 0.2403252655303979, "grad_norm": 1.758704383017332, "learning_rate": 9.412224662253769e-06, "loss": 0.4793, "step": 8157 }, { "epoch": 0.24035472798786148, "grad_norm": 1.6696391293770354, "learning_rate": 9.41198273252466e-06, "loss": 0.4315, "step": 8158 }, { "epoch": 0.24038419044532505, "grad_norm": 1.3176378249990817, "learning_rate": 9.411740756126942e-06, "loss": 0.4417, "step": 8159 }, { "epoch": 0.24041365290278863, "grad_norm": 1.5904840305688508, "learning_rate": 9.411498733063174e-06, "loss": 0.5763, "step": 8160 }, { "epoch": 0.2404431153602522, "grad_norm": 1.6183813326618335, "learning_rate": 9.411256663335916e-06, "loss": 0.5794, "step": 8161 }, { "epoch": 0.24047257781771578, "grad_norm": 1.5865210461420725, "learning_rate": 9.411014546947733e-06, "loss": 0.6745, "step": 8162 }, { "epoch": 0.24050204027517935, "grad_norm": 1.651490108147706, "learning_rate": 9.41077238390118e-06, "loss": 0.5776, "step": 8163 }, { "epoch": 0.24053150273264293, "grad_norm": 1.515885776295567, "learning_rate": 9.41053017419882e-06, "loss": 0.3705, "step": 8164 }, { "epoch": 0.2405609651901065, "grad_norm": 1.6895795491874714, "learning_rate": 9.410287917843218e-06, "loss": 0.5119, "step": 8165 }, { "epoch": 0.24059042764757008, "grad_norm": 1.4390555879221971, "learning_rate": 9.410045614836933e-06, "loss": 0.4364, "step": 8166 }, { "epoch": 0.24061989010503365, "grad_norm": 1.4791200946690304, "learning_rate": 9.409803265182532e-06, "loss": 0.4987, "step": 8167 }, { "epoch": 0.24064935256249723, "grad_norm": 1.4439508814729693, "learning_rate": 9.409560868882573e-06, "loss": 0.4282, "step": 8168 }, { "epoch": 0.2406788150199608, "grad_norm": 1.6408963174290414, "learning_rate": 9.409318425939624e-06, "loss": 0.4321, "step": 8169 }, { "epoch": 0.2407082774774244, "grad_norm": 1.673226309870634, "learning_rate": 9.409075936356247e-06, "loss": 0.5675, "step": 8170 }, { "epoch": 0.24073773993488798, "grad_norm": 1.565316616339735, "learning_rate": 9.40883340013501e-06, "loss": 0.5774, "step": 8171 }, { "epoch": 0.24076720239235155, "grad_norm": 1.6578190979683107, "learning_rate": 9.408590817278478e-06, "loss": 0.5485, "step": 8172 }, { "epoch": 0.24079666484981513, "grad_norm": 1.4024533359439715, "learning_rate": 9.408348187789214e-06, "loss": 0.3165, "step": 8173 }, { "epoch": 0.2408261273072787, "grad_norm": 1.5490106624939133, "learning_rate": 9.408105511669787e-06, "loss": 0.5119, "step": 8174 }, { "epoch": 0.24085558976474228, "grad_norm": 1.623028725993803, "learning_rate": 9.407862788922763e-06, "loss": 0.5057, "step": 8175 }, { "epoch": 0.24088505222220585, "grad_norm": 1.5576882598509771, "learning_rate": 9.407620019550712e-06, "loss": 0.4974, "step": 8176 }, { "epoch": 0.24091451467966943, "grad_norm": 1.535240758743825, "learning_rate": 9.407377203556198e-06, "loss": 0.6423, "step": 8177 }, { "epoch": 0.240943977137133, "grad_norm": 1.6440754281486547, "learning_rate": 9.407134340941791e-06, "loss": 0.5673, "step": 8178 }, { "epoch": 0.24097343959459658, "grad_norm": 1.6696534716669766, "learning_rate": 9.406891431710059e-06, "loss": 0.6123, "step": 8179 }, { "epoch": 0.24100290205206015, "grad_norm": 1.7763722002256028, "learning_rate": 9.406648475863574e-06, "loss": 0.5468, "step": 8180 }, { "epoch": 0.24103236450952373, "grad_norm": 1.9119071279523203, "learning_rate": 9.406405473404906e-06, "loss": 0.6222, "step": 8181 }, { "epoch": 0.2410618269669873, "grad_norm": 1.5481099926817876, "learning_rate": 9.40616242433662e-06, "loss": 0.3809, "step": 8182 }, { "epoch": 0.2410912894244509, "grad_norm": 1.4758441846780055, "learning_rate": 9.405919328661292e-06, "loss": 0.413, "step": 8183 }, { "epoch": 0.24112075188191448, "grad_norm": 1.451983252231406, "learning_rate": 9.405676186381492e-06, "loss": 0.415, "step": 8184 }, { "epoch": 0.24115021433937806, "grad_norm": 1.691552505613605, "learning_rate": 9.405432997499794e-06, "loss": 0.5048, "step": 8185 }, { "epoch": 0.24117967679684163, "grad_norm": 1.4143949559468318, "learning_rate": 9.405189762018767e-06, "loss": 0.4545, "step": 8186 }, { "epoch": 0.2412091392543052, "grad_norm": 1.5147037621065504, "learning_rate": 9.404946479940983e-06, "loss": 0.4537, "step": 8187 }, { "epoch": 0.24123860171176878, "grad_norm": 1.6832717457840183, "learning_rate": 9.404703151269022e-06, "loss": 0.5358, "step": 8188 }, { "epoch": 0.24126806416923235, "grad_norm": 1.495537619169018, "learning_rate": 9.40445977600545e-06, "loss": 0.5715, "step": 8189 }, { "epoch": 0.24129752662669593, "grad_norm": 1.899992850758579, "learning_rate": 9.404216354152847e-06, "loss": 0.528, "step": 8190 }, { "epoch": 0.2413269890841595, "grad_norm": 1.8469079927405017, "learning_rate": 9.403972885713783e-06, "loss": 0.4498, "step": 8191 }, { "epoch": 0.24135645154162308, "grad_norm": 1.7800524817878032, "learning_rate": 9.403729370690839e-06, "loss": 0.4694, "step": 8192 }, { "epoch": 0.24138591399908665, "grad_norm": 1.6521430529486725, "learning_rate": 9.403485809086586e-06, "loss": 0.5919, "step": 8193 }, { "epoch": 0.24141537645655023, "grad_norm": 1.4859085742586284, "learning_rate": 9.403242200903601e-06, "loss": 0.5002, "step": 8194 }, { "epoch": 0.2414448389140138, "grad_norm": 1.5491355736503576, "learning_rate": 9.402998546144463e-06, "loss": 0.4631, "step": 8195 }, { "epoch": 0.2414743013714774, "grad_norm": 1.5340372587891504, "learning_rate": 9.40275484481175e-06, "loss": 0.3622, "step": 8196 }, { "epoch": 0.24150376382894098, "grad_norm": 1.734071782326691, "learning_rate": 9.402511096908037e-06, "loss": 0.4312, "step": 8197 }, { "epoch": 0.24153322628640456, "grad_norm": 1.5997337356221009, "learning_rate": 9.402267302435903e-06, "loss": 0.5228, "step": 8198 }, { "epoch": 0.24156268874386813, "grad_norm": 1.3797163523893774, "learning_rate": 9.402023461397927e-06, "loss": 0.4887, "step": 8199 }, { "epoch": 0.2415921512013317, "grad_norm": 1.5415752012663515, "learning_rate": 9.40177957379669e-06, "loss": 0.5246, "step": 8200 }, { "epoch": 0.24162161365879528, "grad_norm": 1.4545944532729758, "learning_rate": 9.401535639634767e-06, "loss": 0.5383, "step": 8201 }, { "epoch": 0.24165107611625886, "grad_norm": 1.5615715009298168, "learning_rate": 9.401291658914744e-06, "loss": 0.5997, "step": 8202 }, { "epoch": 0.24168053857372243, "grad_norm": 1.485271628688337, "learning_rate": 9.401047631639198e-06, "loss": 0.3974, "step": 8203 }, { "epoch": 0.241710001031186, "grad_norm": 1.5220815920378363, "learning_rate": 9.400803557810711e-06, "loss": 0.5363, "step": 8204 }, { "epoch": 0.24173946348864958, "grad_norm": 1.7894755824056294, "learning_rate": 9.400559437431867e-06, "loss": 0.5273, "step": 8205 }, { "epoch": 0.24176892594611316, "grad_norm": 1.6135019511695845, "learning_rate": 9.400315270505245e-06, "loss": 0.3949, "step": 8206 }, { "epoch": 0.24179838840357673, "grad_norm": 1.4998606701121895, "learning_rate": 9.40007105703343e-06, "loss": 0.4431, "step": 8207 }, { "epoch": 0.24182785086104033, "grad_norm": 1.4830792711868237, "learning_rate": 9.399826797019003e-06, "loss": 0.4773, "step": 8208 }, { "epoch": 0.2418573133185039, "grad_norm": 1.726810918714283, "learning_rate": 9.39958249046455e-06, "loss": 0.4885, "step": 8209 }, { "epoch": 0.24188677577596748, "grad_norm": 1.7462897416081982, "learning_rate": 9.399338137372653e-06, "loss": 0.5015, "step": 8210 }, { "epoch": 0.24191623823343106, "grad_norm": 1.5617811951808425, "learning_rate": 9.399093737745897e-06, "loss": 0.5261, "step": 8211 }, { "epoch": 0.24194570069089463, "grad_norm": 1.715107577489639, "learning_rate": 9.398849291586872e-06, "loss": 0.5871, "step": 8212 }, { "epoch": 0.2419751631483582, "grad_norm": 2.0455083764088235, "learning_rate": 9.398604798898156e-06, "loss": 0.3765, "step": 8213 }, { "epoch": 0.24200462560582178, "grad_norm": 1.6781143201030324, "learning_rate": 9.398360259682341e-06, "loss": 0.5044, "step": 8214 }, { "epoch": 0.24203408806328536, "grad_norm": 1.624565802173273, "learning_rate": 9.398115673942012e-06, "loss": 0.4362, "step": 8215 }, { "epoch": 0.24206355052074893, "grad_norm": 1.6972492926655567, "learning_rate": 9.397871041679755e-06, "loss": 0.6345, "step": 8216 }, { "epoch": 0.2420930129782125, "grad_norm": 1.7657804716348187, "learning_rate": 9.397626362898158e-06, "loss": 0.4807, "step": 8217 }, { "epoch": 0.24212247543567608, "grad_norm": 1.7366801608192954, "learning_rate": 9.39738163759981e-06, "loss": 0.6431, "step": 8218 }, { "epoch": 0.24215193789313966, "grad_norm": 1.552475644262411, "learning_rate": 9.397136865787299e-06, "loss": 0.4501, "step": 8219 }, { "epoch": 0.24218140035060323, "grad_norm": 1.6619944687343524, "learning_rate": 9.396892047463215e-06, "loss": 0.4017, "step": 8220 }, { "epoch": 0.24221086280806683, "grad_norm": 1.5344942638586658, "learning_rate": 9.396647182630147e-06, "loss": 0.462, "step": 8221 }, { "epoch": 0.2422403252655304, "grad_norm": 1.427587582671734, "learning_rate": 9.396402271290683e-06, "loss": 0.4738, "step": 8222 }, { "epoch": 0.24226978772299398, "grad_norm": 1.9748564297396474, "learning_rate": 9.396157313447418e-06, "loss": 0.5255, "step": 8223 }, { "epoch": 0.24229925018045756, "grad_norm": 1.563255288136597, "learning_rate": 9.39591230910294e-06, "loss": 0.5128, "step": 8224 }, { "epoch": 0.24232871263792113, "grad_norm": 1.4263734106069146, "learning_rate": 9.395667258259841e-06, "loss": 0.4317, "step": 8225 }, { "epoch": 0.2423581750953847, "grad_norm": 1.7149038438848379, "learning_rate": 9.395422160920714e-06, "loss": 0.5967, "step": 8226 }, { "epoch": 0.24238763755284828, "grad_norm": 1.6159802052176047, "learning_rate": 9.39517701708815e-06, "loss": 0.63, "step": 8227 }, { "epoch": 0.24241710001031186, "grad_norm": 1.8916062355595757, "learning_rate": 9.394931826764743e-06, "loss": 0.3752, "step": 8228 }, { "epoch": 0.24244656246777543, "grad_norm": 1.4717994601809912, "learning_rate": 9.394686589953086e-06, "loss": 0.4891, "step": 8229 }, { "epoch": 0.242476024925239, "grad_norm": 1.5552441611714303, "learning_rate": 9.394441306655775e-06, "loss": 0.4178, "step": 8230 }, { "epoch": 0.24250548738270258, "grad_norm": 1.478602537178466, "learning_rate": 9.394195976875402e-06, "loss": 0.4728, "step": 8231 }, { "epoch": 0.24253494984016616, "grad_norm": 1.5338900086967413, "learning_rate": 9.393950600614564e-06, "loss": 0.4136, "step": 8232 }, { "epoch": 0.24256441229762973, "grad_norm": 1.6814006577917475, "learning_rate": 9.393705177875857e-06, "loss": 0.5752, "step": 8233 }, { "epoch": 0.24259387475509334, "grad_norm": 1.7095932592291958, "learning_rate": 9.393459708661872e-06, "loss": 0.589, "step": 8234 }, { "epoch": 0.2426233372125569, "grad_norm": 1.4663280714303337, "learning_rate": 9.393214192975212e-06, "loss": 0.4197, "step": 8235 }, { "epoch": 0.24265279967002049, "grad_norm": 1.5410523030329195, "learning_rate": 9.39296863081847e-06, "loss": 0.4364, "step": 8236 }, { "epoch": 0.24268226212748406, "grad_norm": 1.4398416175112627, "learning_rate": 9.392723022194246e-06, "loss": 0.4329, "step": 8237 }, { "epoch": 0.24271172458494764, "grad_norm": 1.7157998588796575, "learning_rate": 9.392477367105135e-06, "loss": 0.5551, "step": 8238 }, { "epoch": 0.2427411870424112, "grad_norm": 1.6766038028291181, "learning_rate": 9.392231665553737e-06, "loss": 0.4592, "step": 8239 }, { "epoch": 0.24277064949987479, "grad_norm": 1.4834993837501473, "learning_rate": 9.391985917542651e-06, "loss": 0.4223, "step": 8240 }, { "epoch": 0.24280011195733836, "grad_norm": 1.547109519097727, "learning_rate": 9.391740123074478e-06, "loss": 0.5708, "step": 8241 }, { "epoch": 0.24282957441480194, "grad_norm": 1.5792783944269013, "learning_rate": 9.391494282151815e-06, "loss": 0.4979, "step": 8242 }, { "epoch": 0.2428590368722655, "grad_norm": 1.6587331753756949, "learning_rate": 9.391248394777265e-06, "loss": 0.5812, "step": 8243 }, { "epoch": 0.24288849932972909, "grad_norm": 1.813076307038269, "learning_rate": 9.391002460953427e-06, "loss": 0.6772, "step": 8244 }, { "epoch": 0.24291796178719266, "grad_norm": 1.478940360566123, "learning_rate": 9.390756480682901e-06, "loss": 0.52, "step": 8245 }, { "epoch": 0.24294742424465623, "grad_norm": 1.738906545582052, "learning_rate": 9.390510453968294e-06, "loss": 0.5766, "step": 8246 }, { "epoch": 0.24297688670211984, "grad_norm": 1.508467288783078, "learning_rate": 9.390264380812207e-06, "loss": 0.462, "step": 8247 }, { "epoch": 0.2430063491595834, "grad_norm": 1.558088124696946, "learning_rate": 9.390018261217237e-06, "loss": 0.6097, "step": 8248 }, { "epoch": 0.243035811617047, "grad_norm": 1.6139157521625402, "learning_rate": 9.389772095185994e-06, "loss": 0.6003, "step": 8249 }, { "epoch": 0.24306527407451056, "grad_norm": 1.5087734532453752, "learning_rate": 9.38952588272108e-06, "loss": 0.52, "step": 8250 }, { "epoch": 0.24309473653197414, "grad_norm": 1.5576022102035352, "learning_rate": 9.389279623825098e-06, "loss": 0.4718, "step": 8251 }, { "epoch": 0.2431241989894377, "grad_norm": 1.4861522954555901, "learning_rate": 9.389033318500654e-06, "loss": 0.4359, "step": 8252 }, { "epoch": 0.2431536614469013, "grad_norm": 1.7777830489593844, "learning_rate": 9.388786966750354e-06, "loss": 0.4656, "step": 8253 }, { "epoch": 0.24318312390436486, "grad_norm": 1.736888829692971, "learning_rate": 9.388540568576803e-06, "loss": 0.5695, "step": 8254 }, { "epoch": 0.24321258636182844, "grad_norm": 1.5277214503417749, "learning_rate": 9.388294123982606e-06, "loss": 0.4713, "step": 8255 }, { "epoch": 0.243242048819292, "grad_norm": 1.6187262751874292, "learning_rate": 9.388047632970372e-06, "loss": 0.5021, "step": 8256 }, { "epoch": 0.2432715112767556, "grad_norm": 1.8918389887484766, "learning_rate": 9.387801095542707e-06, "loss": 0.6847, "step": 8257 }, { "epoch": 0.24330097373421916, "grad_norm": 1.5166128039208453, "learning_rate": 9.38755451170222e-06, "loss": 0.5266, "step": 8258 }, { "epoch": 0.24333043619168274, "grad_norm": 1.6528652121672058, "learning_rate": 9.387307881451518e-06, "loss": 0.4756, "step": 8259 }, { "epoch": 0.24335989864914634, "grad_norm": 1.490097362090964, "learning_rate": 9.38706120479321e-06, "loss": 0.3516, "step": 8260 }, { "epoch": 0.2433893611066099, "grad_norm": 1.6698774574939952, "learning_rate": 9.386814481729905e-06, "loss": 0.5895, "step": 8261 }, { "epoch": 0.2434188235640735, "grad_norm": 1.5810994999076116, "learning_rate": 9.386567712264216e-06, "loss": 0.5801, "step": 8262 }, { "epoch": 0.24344828602153706, "grad_norm": 1.5560240642383014, "learning_rate": 9.386320896398749e-06, "loss": 0.5236, "step": 8263 }, { "epoch": 0.24347774847900064, "grad_norm": 1.6973056311216168, "learning_rate": 9.386074034136114e-06, "loss": 0.5447, "step": 8264 }, { "epoch": 0.2435072109364642, "grad_norm": 1.5643631452771674, "learning_rate": 9.385827125478929e-06, "loss": 0.477, "step": 8265 }, { "epoch": 0.2435366733939278, "grad_norm": 1.6611723199885093, "learning_rate": 9.385580170429797e-06, "loss": 0.4973, "step": 8266 }, { "epoch": 0.24356613585139136, "grad_norm": 1.74110288220799, "learning_rate": 9.385333168991337e-06, "loss": 0.6266, "step": 8267 }, { "epoch": 0.24359559830885494, "grad_norm": 1.6033886268313318, "learning_rate": 9.385086121166158e-06, "loss": 0.4966, "step": 8268 }, { "epoch": 0.2436250607663185, "grad_norm": 2.004018224531048, "learning_rate": 9.384839026956876e-06, "loss": 0.575, "step": 8269 }, { "epoch": 0.2436545232237821, "grad_norm": 1.5996197697766814, "learning_rate": 9.384591886366102e-06, "loss": 0.4802, "step": 8270 }, { "epoch": 0.24368398568124566, "grad_norm": 1.4399714172620315, "learning_rate": 9.384344699396451e-06, "loss": 0.5323, "step": 8271 }, { "epoch": 0.24371344813870924, "grad_norm": 1.5881046464110808, "learning_rate": 9.384097466050539e-06, "loss": 0.4935, "step": 8272 }, { "epoch": 0.24374291059617284, "grad_norm": 1.7520496839558677, "learning_rate": 9.38385018633098e-06, "loss": 0.4443, "step": 8273 }, { "epoch": 0.24377237305363642, "grad_norm": 1.6381050653540246, "learning_rate": 9.38360286024039e-06, "loss": 0.5302, "step": 8274 }, { "epoch": 0.2438018355111, "grad_norm": 1.7308242082537542, "learning_rate": 9.383355487781384e-06, "loss": 0.5831, "step": 8275 }, { "epoch": 0.24383129796856357, "grad_norm": 1.42977192730514, "learning_rate": 9.38310806895658e-06, "loss": 0.4044, "step": 8276 }, { "epoch": 0.24386076042602714, "grad_norm": 1.6702407885364308, "learning_rate": 9.382860603768593e-06, "loss": 0.5727, "step": 8277 }, { "epoch": 0.24389022288349071, "grad_norm": 1.7384438105029862, "learning_rate": 9.382613092220044e-06, "loss": 0.6958, "step": 8278 }, { "epoch": 0.2439196853409543, "grad_norm": 1.46687771592112, "learning_rate": 9.38236553431355e-06, "loss": 0.4832, "step": 8279 }, { "epoch": 0.24394914779841786, "grad_norm": 1.6329674022367489, "learning_rate": 9.382117930051727e-06, "loss": 0.6396, "step": 8280 }, { "epoch": 0.24397861025588144, "grad_norm": 1.6293520907913808, "learning_rate": 9.381870279437198e-06, "loss": 0.4622, "step": 8281 }, { "epoch": 0.24400807271334501, "grad_norm": 1.425485066275591, "learning_rate": 9.381622582472578e-06, "loss": 0.4564, "step": 8282 }, { "epoch": 0.2440375351708086, "grad_norm": 1.5984979530870047, "learning_rate": 9.381374839160491e-06, "loss": 0.5299, "step": 8283 }, { "epoch": 0.24406699762827216, "grad_norm": 1.4049720031874162, "learning_rate": 9.381127049503558e-06, "loss": 0.4398, "step": 8284 }, { "epoch": 0.24409646008573574, "grad_norm": 1.6338347103196473, "learning_rate": 9.380879213504396e-06, "loss": 0.6038, "step": 8285 }, { "epoch": 0.24412592254319934, "grad_norm": 1.615445517866959, "learning_rate": 9.38063133116563e-06, "loss": 0.4676, "step": 8286 }, { "epoch": 0.24415538500066292, "grad_norm": 1.6045288131905424, "learning_rate": 9.38038340248988e-06, "loss": 0.4944, "step": 8287 }, { "epoch": 0.2441848474581265, "grad_norm": 1.4960560779071004, "learning_rate": 9.380135427479769e-06, "loss": 0.495, "step": 8288 }, { "epoch": 0.24421430991559007, "grad_norm": 1.6615271651374595, "learning_rate": 9.379887406137922e-06, "loss": 0.6175, "step": 8289 }, { "epoch": 0.24424377237305364, "grad_norm": 1.4804749751674446, "learning_rate": 9.37963933846696e-06, "loss": 0.4195, "step": 8290 }, { "epoch": 0.24427323483051722, "grad_norm": 1.4949896292409368, "learning_rate": 9.379391224469507e-06, "loss": 0.4261, "step": 8291 }, { "epoch": 0.2443026972879808, "grad_norm": 1.773274973593002, "learning_rate": 9.379143064148188e-06, "loss": 0.5011, "step": 8292 }, { "epoch": 0.24433215974544437, "grad_norm": 1.5087257664846305, "learning_rate": 9.37889485750563e-06, "loss": 0.4815, "step": 8293 }, { "epoch": 0.24436162220290794, "grad_norm": 1.8623601306883497, "learning_rate": 9.378646604544456e-06, "loss": 0.5576, "step": 8294 }, { "epoch": 0.24439108466037152, "grad_norm": 1.526061091114744, "learning_rate": 9.378398305267292e-06, "loss": 0.4301, "step": 8295 }, { "epoch": 0.2444205471178351, "grad_norm": 1.7790144107271562, "learning_rate": 9.378149959676765e-06, "loss": 0.5091, "step": 8296 }, { "epoch": 0.24445000957529867, "grad_norm": 1.7310241782356817, "learning_rate": 9.377901567775503e-06, "loss": 0.4927, "step": 8297 }, { "epoch": 0.24447947203276224, "grad_norm": 1.803646156006399, "learning_rate": 9.377653129566134e-06, "loss": 0.5224, "step": 8298 }, { "epoch": 0.24450893449022584, "grad_norm": 1.78684895803287, "learning_rate": 9.37740464505128e-06, "loss": 0.5175, "step": 8299 }, { "epoch": 0.24453839694768942, "grad_norm": 1.6605839626786931, "learning_rate": 9.377156114233576e-06, "loss": 0.5114, "step": 8300 }, { "epoch": 0.244567859405153, "grad_norm": 1.6766529322819048, "learning_rate": 9.37690753711565e-06, "loss": 0.5773, "step": 8301 }, { "epoch": 0.24459732186261657, "grad_norm": 1.7426783996606627, "learning_rate": 9.376658913700129e-06, "loss": 0.5096, "step": 8302 }, { "epoch": 0.24462678432008014, "grad_norm": 1.5798368315111635, "learning_rate": 9.376410243989643e-06, "loss": 0.5426, "step": 8303 }, { "epoch": 0.24465624677754372, "grad_norm": 1.6800815490798895, "learning_rate": 9.376161527986824e-06, "loss": 0.4939, "step": 8304 }, { "epoch": 0.2446857092350073, "grad_norm": 1.602179806688203, "learning_rate": 9.375912765694302e-06, "loss": 0.5354, "step": 8305 }, { "epoch": 0.24471517169247087, "grad_norm": 1.5790288526709169, "learning_rate": 9.375663957114707e-06, "loss": 0.5405, "step": 8306 }, { "epoch": 0.24474463414993444, "grad_norm": 1.7491757155860035, "learning_rate": 9.375415102250673e-06, "loss": 0.4351, "step": 8307 }, { "epoch": 0.24477409660739802, "grad_norm": 1.5758168130928363, "learning_rate": 9.375166201104832e-06, "loss": 0.512, "step": 8308 }, { "epoch": 0.2448035590648616, "grad_norm": 1.440329830915166, "learning_rate": 9.374917253679816e-06, "loss": 0.4773, "step": 8309 }, { "epoch": 0.24483302152232517, "grad_norm": 1.893294688282277, "learning_rate": 9.374668259978258e-06, "loss": 0.672, "step": 8310 }, { "epoch": 0.24486248397978874, "grad_norm": 1.6404294504167467, "learning_rate": 9.374419220002795e-06, "loss": 0.4841, "step": 8311 }, { "epoch": 0.24489194643725234, "grad_norm": 1.533408102802798, "learning_rate": 9.374170133756056e-06, "loss": 0.4818, "step": 8312 }, { "epoch": 0.24492140889471592, "grad_norm": 1.5822002150779897, "learning_rate": 9.37392100124068e-06, "loss": 0.4997, "step": 8313 }, { "epoch": 0.2449508713521795, "grad_norm": 1.5013362241840886, "learning_rate": 9.373671822459299e-06, "loss": 0.4839, "step": 8314 }, { "epoch": 0.24498033380964307, "grad_norm": 1.4353505487004825, "learning_rate": 9.373422597414553e-06, "loss": 0.4389, "step": 8315 }, { "epoch": 0.24500979626710664, "grad_norm": 1.6777078012688533, "learning_rate": 9.373173326109072e-06, "loss": 0.4622, "step": 8316 }, { "epoch": 0.24503925872457022, "grad_norm": 1.5494577029004628, "learning_rate": 9.372924008545499e-06, "loss": 0.475, "step": 8317 }, { "epoch": 0.2450687211820338, "grad_norm": 1.5447142399019933, "learning_rate": 9.372674644726468e-06, "loss": 0.4704, "step": 8318 }, { "epoch": 0.24509818363949737, "grad_norm": 1.6845438635346197, "learning_rate": 9.372425234654617e-06, "loss": 0.5762, "step": 8319 }, { "epoch": 0.24512764609696094, "grad_norm": 1.1900555515357525, "learning_rate": 9.372175778332583e-06, "loss": 0.2913, "step": 8320 }, { "epoch": 0.24515710855442452, "grad_norm": 1.84248656943054, "learning_rate": 9.371926275763006e-06, "loss": 0.4893, "step": 8321 }, { "epoch": 0.2451865710118881, "grad_norm": 1.5202045398578916, "learning_rate": 9.371676726948527e-06, "loss": 0.5216, "step": 8322 }, { "epoch": 0.24521603346935167, "grad_norm": 1.7068626500077824, "learning_rate": 9.371427131891783e-06, "loss": 0.5004, "step": 8323 }, { "epoch": 0.24524549592681524, "grad_norm": 1.596104481782928, "learning_rate": 9.371177490595416e-06, "loss": 0.4984, "step": 8324 }, { "epoch": 0.24527495838427885, "grad_norm": 1.5654750051231645, "learning_rate": 9.370927803062067e-06, "loss": 0.5259, "step": 8325 }, { "epoch": 0.24530442084174242, "grad_norm": 1.4564443472010504, "learning_rate": 9.370678069294373e-06, "loss": 0.4966, "step": 8326 }, { "epoch": 0.245333883299206, "grad_norm": 1.7601410279052323, "learning_rate": 9.37042828929498e-06, "loss": 0.5307, "step": 8327 }, { "epoch": 0.24536334575666957, "grad_norm": 1.6368881033676201, "learning_rate": 9.370178463066526e-06, "loss": 0.4497, "step": 8328 }, { "epoch": 0.24539280821413315, "grad_norm": 1.8084645758504811, "learning_rate": 9.369928590611659e-06, "loss": 0.601, "step": 8329 }, { "epoch": 0.24542227067159672, "grad_norm": 1.6205802325228262, "learning_rate": 9.369678671933018e-06, "loss": 0.489, "step": 8330 }, { "epoch": 0.2454517331290603, "grad_norm": 1.455483415939387, "learning_rate": 9.369428707033248e-06, "loss": 0.3554, "step": 8331 }, { "epoch": 0.24548119558652387, "grad_norm": 1.4870838955819972, "learning_rate": 9.369178695914992e-06, "loss": 0.4087, "step": 8332 }, { "epoch": 0.24551065804398745, "grad_norm": 1.7001419458091518, "learning_rate": 9.368928638580896e-06, "loss": 0.5989, "step": 8333 }, { "epoch": 0.24554012050145102, "grad_norm": 1.661642572977957, "learning_rate": 9.368678535033605e-06, "loss": 0.5375, "step": 8334 }, { "epoch": 0.2455695829589146, "grad_norm": 1.5249277190706667, "learning_rate": 9.368428385275762e-06, "loss": 0.4917, "step": 8335 }, { "epoch": 0.24559904541637817, "grad_norm": 2.1706953331861416, "learning_rate": 9.368178189310015e-06, "loss": 0.4465, "step": 8336 }, { "epoch": 0.24562850787384174, "grad_norm": 1.586255218879907, "learning_rate": 9.367927947139012e-06, "loss": 0.5391, "step": 8337 }, { "epoch": 0.24565797033130535, "grad_norm": 1.3963105739404247, "learning_rate": 9.367677658765398e-06, "loss": 0.3791, "step": 8338 }, { "epoch": 0.24568743278876892, "grad_norm": 1.4154475379952485, "learning_rate": 9.36742732419182e-06, "loss": 0.4212, "step": 8339 }, { "epoch": 0.2457168952462325, "grad_norm": 1.6791860684832505, "learning_rate": 9.367176943420928e-06, "loss": 0.4268, "step": 8340 }, { "epoch": 0.24574635770369607, "grad_norm": 1.2730223895554889, "learning_rate": 9.366926516455368e-06, "loss": 0.4025, "step": 8341 }, { "epoch": 0.24577582016115965, "grad_norm": 1.499075352930719, "learning_rate": 9.36667604329779e-06, "loss": 0.5027, "step": 8342 }, { "epoch": 0.24580528261862322, "grad_norm": 1.5231597598084343, "learning_rate": 9.366425523950845e-06, "loss": 0.5354, "step": 8343 }, { "epoch": 0.2458347450760868, "grad_norm": 1.5927881755356206, "learning_rate": 9.366174958417181e-06, "loss": 0.5695, "step": 8344 }, { "epoch": 0.24586420753355037, "grad_norm": 1.6453591708946353, "learning_rate": 9.365924346699447e-06, "loss": 0.5317, "step": 8345 }, { "epoch": 0.24589366999101395, "grad_norm": 1.4617551082767584, "learning_rate": 9.365673688800298e-06, "loss": 0.5316, "step": 8346 }, { "epoch": 0.24592313244847752, "grad_norm": 1.686694356159644, "learning_rate": 9.365422984722383e-06, "loss": 0.5727, "step": 8347 }, { "epoch": 0.2459525949059411, "grad_norm": 1.4557681768211768, "learning_rate": 9.365172234468354e-06, "loss": 0.4984, "step": 8348 }, { "epoch": 0.24598205736340467, "grad_norm": 1.4674074828092416, "learning_rate": 9.36492143804086e-06, "loss": 0.4757, "step": 8349 }, { "epoch": 0.24601151982086825, "grad_norm": 1.3417938504132347, "learning_rate": 9.364670595442562e-06, "loss": 0.3955, "step": 8350 }, { "epoch": 0.24604098227833185, "grad_norm": 1.7230787207192113, "learning_rate": 9.364419706676108e-06, "loss": 0.5397, "step": 8351 }, { "epoch": 0.24607044473579542, "grad_norm": 1.3797059989307277, "learning_rate": 9.364168771744151e-06, "loss": 0.4721, "step": 8352 }, { "epoch": 0.246099907193259, "grad_norm": 1.475643366586036, "learning_rate": 9.363917790649347e-06, "loss": 0.5275, "step": 8353 }, { "epoch": 0.24612936965072257, "grad_norm": 1.3991188029222523, "learning_rate": 9.363666763394351e-06, "loss": 0.3817, "step": 8354 }, { "epoch": 0.24615883210818615, "grad_norm": 1.6051615114073974, "learning_rate": 9.363415689981818e-06, "loss": 0.5072, "step": 8355 }, { "epoch": 0.24618829456564972, "grad_norm": 1.607815244775462, "learning_rate": 9.363164570414403e-06, "loss": 0.5577, "step": 8356 }, { "epoch": 0.2462177570231133, "grad_norm": 1.828905469453142, "learning_rate": 9.362913404694763e-06, "loss": 0.5948, "step": 8357 }, { "epoch": 0.24624721948057687, "grad_norm": 1.692404751744672, "learning_rate": 9.362662192825555e-06, "loss": 0.5776, "step": 8358 }, { "epoch": 0.24627668193804045, "grad_norm": 1.5432175867468074, "learning_rate": 9.362410934809436e-06, "loss": 0.7214, "step": 8359 }, { "epoch": 0.24630614439550402, "grad_norm": 1.7490305688525696, "learning_rate": 9.362159630649063e-06, "loss": 0.5415, "step": 8360 }, { "epoch": 0.2463356068529676, "grad_norm": 1.5372074007193175, "learning_rate": 9.361908280347095e-06, "loss": 0.5622, "step": 8361 }, { "epoch": 0.24636506931043117, "grad_norm": 1.8429240856743954, "learning_rate": 9.361656883906192e-06, "loss": 0.4235, "step": 8362 }, { "epoch": 0.24639453176789475, "grad_norm": 1.3844461135050372, "learning_rate": 9.361405441329011e-06, "loss": 0.4977, "step": 8363 }, { "epoch": 0.24642399422535835, "grad_norm": 1.5717969998058279, "learning_rate": 9.361153952618209e-06, "loss": 0.4304, "step": 8364 }, { "epoch": 0.24645345668282193, "grad_norm": 1.5210629730926153, "learning_rate": 9.360902417776454e-06, "loss": 0.4627, "step": 8365 }, { "epoch": 0.2464829191402855, "grad_norm": 1.6620281634993115, "learning_rate": 9.3606508368064e-06, "loss": 0.5373, "step": 8366 }, { "epoch": 0.24651238159774908, "grad_norm": 1.6148102545822929, "learning_rate": 9.36039920971071e-06, "loss": 0.421, "step": 8367 }, { "epoch": 0.24654184405521265, "grad_norm": 1.3969624993870111, "learning_rate": 9.360147536492048e-06, "loss": 0.3975, "step": 8368 }, { "epoch": 0.24657130651267622, "grad_norm": 1.663854832539962, "learning_rate": 9.359895817153074e-06, "loss": 0.4202, "step": 8369 }, { "epoch": 0.2466007689701398, "grad_norm": 1.3834626361490248, "learning_rate": 9.35964405169645e-06, "loss": 0.3959, "step": 8370 }, { "epoch": 0.24663023142760337, "grad_norm": 1.3136184132304705, "learning_rate": 9.35939224012484e-06, "loss": 0.3887, "step": 8371 }, { "epoch": 0.24665969388506695, "grad_norm": 1.5835025741417479, "learning_rate": 9.359140382440908e-06, "loss": 0.4794, "step": 8372 }, { "epoch": 0.24668915634253052, "grad_norm": 1.5281853907250846, "learning_rate": 9.358888478647317e-06, "loss": 0.4117, "step": 8373 }, { "epoch": 0.2467186187999941, "grad_norm": 1.5076063989299853, "learning_rate": 9.358636528746732e-06, "loss": 0.4461, "step": 8374 }, { "epoch": 0.24674808125745767, "grad_norm": 1.6782000405445303, "learning_rate": 9.358384532741817e-06, "loss": 0.5941, "step": 8375 }, { "epoch": 0.24677754371492125, "grad_norm": 1.4830684033555874, "learning_rate": 9.35813249063524e-06, "loss": 0.4027, "step": 8376 }, { "epoch": 0.24680700617238485, "grad_norm": 1.5110054701213145, "learning_rate": 9.357880402429666e-06, "loss": 0.5165, "step": 8377 }, { "epoch": 0.24683646862984843, "grad_norm": 1.5657451288237192, "learning_rate": 9.357628268127761e-06, "loss": 0.4378, "step": 8378 }, { "epoch": 0.246865931087312, "grad_norm": 1.4730326847836812, "learning_rate": 9.35737608773219e-06, "loss": 0.4476, "step": 8379 }, { "epoch": 0.24689539354477558, "grad_norm": 1.468186110706745, "learning_rate": 9.357123861245625e-06, "loss": 0.4646, "step": 8380 }, { "epoch": 0.24692485600223915, "grad_norm": 1.785542867188127, "learning_rate": 9.356871588670733e-06, "loss": 0.6012, "step": 8381 }, { "epoch": 0.24695431845970273, "grad_norm": 1.6726172170257443, "learning_rate": 9.356619270010178e-06, "loss": 0.5235, "step": 8382 }, { "epoch": 0.2469837809171663, "grad_norm": 1.5390599224647665, "learning_rate": 9.356366905266635e-06, "loss": 0.4357, "step": 8383 }, { "epoch": 0.24701324337462988, "grad_norm": 1.4770909356293174, "learning_rate": 9.356114494442767e-06, "loss": 0.5263, "step": 8384 }, { "epoch": 0.24704270583209345, "grad_norm": 1.493010163477772, "learning_rate": 9.35586203754125e-06, "loss": 0.5502, "step": 8385 }, { "epoch": 0.24707216828955703, "grad_norm": 1.5164946441841192, "learning_rate": 9.355609534564752e-06, "loss": 0.516, "step": 8386 }, { "epoch": 0.2471016307470206, "grad_norm": 1.6317968127134495, "learning_rate": 9.355356985515943e-06, "loss": 0.5958, "step": 8387 }, { "epoch": 0.24713109320448418, "grad_norm": 1.498081888360898, "learning_rate": 9.355104390397494e-06, "loss": 0.4587, "step": 8388 }, { "epoch": 0.24716055566194775, "grad_norm": 1.7094229770955416, "learning_rate": 9.35485174921208e-06, "loss": 0.5588, "step": 8389 }, { "epoch": 0.24719001811941135, "grad_norm": 1.6106699460853604, "learning_rate": 9.35459906196237e-06, "loss": 0.5031, "step": 8390 }, { "epoch": 0.24721948057687493, "grad_norm": 1.7097934436545779, "learning_rate": 9.35434632865104e-06, "loss": 0.5591, "step": 8391 }, { "epoch": 0.2472489430343385, "grad_norm": 1.4337283439429702, "learning_rate": 9.354093549280761e-06, "loss": 0.5058, "step": 8392 }, { "epoch": 0.24727840549180208, "grad_norm": 1.6288153496650624, "learning_rate": 9.353840723854205e-06, "loss": 0.4645, "step": 8393 }, { "epoch": 0.24730786794926565, "grad_norm": 1.4998014497042567, "learning_rate": 9.353587852374052e-06, "loss": 0.5024, "step": 8394 }, { "epoch": 0.24733733040672923, "grad_norm": 1.5557146906471686, "learning_rate": 9.353334934842971e-06, "loss": 0.4435, "step": 8395 }, { "epoch": 0.2473667928641928, "grad_norm": 1.8881145167334892, "learning_rate": 9.353081971263641e-06, "loss": 0.5287, "step": 8396 }, { "epoch": 0.24739625532165638, "grad_norm": 1.4985100685087887, "learning_rate": 9.352828961638737e-06, "loss": 0.3441, "step": 8397 }, { "epoch": 0.24742571777911995, "grad_norm": 1.4394039709166873, "learning_rate": 9.352575905970934e-06, "loss": 0.4727, "step": 8398 }, { "epoch": 0.24745518023658353, "grad_norm": 1.7662986648000298, "learning_rate": 9.35232280426291e-06, "loss": 0.5695, "step": 8399 }, { "epoch": 0.2474846426940471, "grad_norm": 1.6227488827123133, "learning_rate": 9.352069656517341e-06, "loss": 0.695, "step": 8400 }, { "epoch": 0.24751410515151068, "grad_norm": 1.4836313326167243, "learning_rate": 9.351816462736907e-06, "loss": 0.4363, "step": 8401 }, { "epoch": 0.24754356760897425, "grad_norm": 1.6693497934248804, "learning_rate": 9.351563222924283e-06, "loss": 0.4863, "step": 8402 }, { "epoch": 0.24757303006643785, "grad_norm": 1.5663546388599896, "learning_rate": 9.35130993708215e-06, "loss": 0.5562, "step": 8403 }, { "epoch": 0.24760249252390143, "grad_norm": 1.549837370870661, "learning_rate": 9.351056605213186e-06, "loss": 0.5091, "step": 8404 }, { "epoch": 0.247631954981365, "grad_norm": 1.6236577341033167, "learning_rate": 9.350803227320074e-06, "loss": 0.5142, "step": 8405 }, { "epoch": 0.24766141743882858, "grad_norm": 1.8612833978302779, "learning_rate": 9.350549803405489e-06, "loss": 0.6291, "step": 8406 }, { "epoch": 0.24769087989629215, "grad_norm": 1.5927138009154866, "learning_rate": 9.350296333472113e-06, "loss": 0.4882, "step": 8407 }, { "epoch": 0.24772034235375573, "grad_norm": 1.4811351982671659, "learning_rate": 9.350042817522631e-06, "loss": 0.4034, "step": 8408 }, { "epoch": 0.2477498048112193, "grad_norm": 1.5973387338741196, "learning_rate": 9.34978925555972e-06, "loss": 0.33, "step": 8409 }, { "epoch": 0.24777926726868288, "grad_norm": 1.6384282479429124, "learning_rate": 9.349535647586063e-06, "loss": 0.4128, "step": 8410 }, { "epoch": 0.24780872972614645, "grad_norm": 1.588933679377785, "learning_rate": 9.349281993604347e-06, "loss": 0.49, "step": 8411 }, { "epoch": 0.24783819218361003, "grad_norm": 1.730355146198057, "learning_rate": 9.349028293617249e-06, "loss": 0.5929, "step": 8412 }, { "epoch": 0.2478676546410736, "grad_norm": 1.659492246634229, "learning_rate": 9.348774547627454e-06, "loss": 0.4376, "step": 8413 }, { "epoch": 0.24789711709853718, "grad_norm": 1.613745552142544, "learning_rate": 9.348520755637648e-06, "loss": 0.4755, "step": 8414 }, { "epoch": 0.24792657955600075, "grad_norm": 1.6145615215094116, "learning_rate": 9.348266917650516e-06, "loss": 0.5428, "step": 8415 }, { "epoch": 0.24795604201346436, "grad_norm": 1.9006169658694416, "learning_rate": 9.348013033668741e-06, "loss": 0.633, "step": 8416 }, { "epoch": 0.24798550447092793, "grad_norm": 1.5122856028087102, "learning_rate": 9.34775910369501e-06, "loss": 0.4765, "step": 8417 }, { "epoch": 0.2480149669283915, "grad_norm": 1.436757577340162, "learning_rate": 9.347505127732008e-06, "loss": 0.5004, "step": 8418 }, { "epoch": 0.24804442938585508, "grad_norm": 1.5554113777837622, "learning_rate": 9.347251105782422e-06, "loss": 0.5966, "step": 8419 }, { "epoch": 0.24807389184331866, "grad_norm": 1.4140904493030892, "learning_rate": 9.346997037848938e-06, "loss": 0.3998, "step": 8420 }, { "epoch": 0.24810335430078223, "grad_norm": 1.6069948293921548, "learning_rate": 9.346742923934242e-06, "loss": 0.6202, "step": 8421 }, { "epoch": 0.2481328167582458, "grad_norm": 1.5267982131775246, "learning_rate": 9.346488764041027e-06, "loss": 0.4328, "step": 8422 }, { "epoch": 0.24816227921570938, "grad_norm": 1.658879954851769, "learning_rate": 9.346234558171978e-06, "loss": 0.3762, "step": 8423 }, { "epoch": 0.24819174167317296, "grad_norm": 1.6731203468703553, "learning_rate": 9.345980306329782e-06, "loss": 0.4924, "step": 8424 }, { "epoch": 0.24822120413063653, "grad_norm": 1.438907549870995, "learning_rate": 9.345726008517134e-06, "loss": 0.3781, "step": 8425 }, { "epoch": 0.2482506665881001, "grad_norm": 1.5311630899665007, "learning_rate": 9.34547166473672e-06, "loss": 0.5141, "step": 8426 }, { "epoch": 0.24828012904556368, "grad_norm": 1.6002734798586251, "learning_rate": 9.34521727499123e-06, "loss": 0.5442, "step": 8427 }, { "epoch": 0.24830959150302725, "grad_norm": 1.5743909810106118, "learning_rate": 9.344962839283355e-06, "loss": 0.5973, "step": 8428 }, { "epoch": 0.24833905396049086, "grad_norm": 1.533579102267951, "learning_rate": 9.34470835761579e-06, "loss": 0.4861, "step": 8429 }, { "epoch": 0.24836851641795443, "grad_norm": 1.555803824024943, "learning_rate": 9.344453829991222e-06, "loss": 0.4565, "step": 8430 }, { "epoch": 0.248397978875418, "grad_norm": 1.6287156483861134, "learning_rate": 9.344199256412347e-06, "loss": 0.5613, "step": 8431 }, { "epoch": 0.24842744133288158, "grad_norm": 1.4556942085835025, "learning_rate": 9.343944636881855e-06, "loss": 0.3404, "step": 8432 }, { "epoch": 0.24845690379034516, "grad_norm": 1.4761558858264172, "learning_rate": 9.34368997140244e-06, "loss": 0.4071, "step": 8433 }, { "epoch": 0.24848636624780873, "grad_norm": 1.7685618485235104, "learning_rate": 9.343435259976799e-06, "loss": 0.6233, "step": 8434 }, { "epoch": 0.2485158287052723, "grad_norm": 1.5676184346378637, "learning_rate": 9.34318050260762e-06, "loss": 0.5278, "step": 8435 }, { "epoch": 0.24854529116273588, "grad_norm": 1.6594120343165315, "learning_rate": 9.342925699297604e-06, "loss": 0.5812, "step": 8436 }, { "epoch": 0.24857475362019946, "grad_norm": 1.7890505511958612, "learning_rate": 9.342670850049442e-06, "loss": 0.5432, "step": 8437 }, { "epoch": 0.24860421607766303, "grad_norm": 1.5782491884995575, "learning_rate": 9.342415954865832e-06, "loss": 0.375, "step": 8438 }, { "epoch": 0.2486336785351266, "grad_norm": 1.5720964316452364, "learning_rate": 9.34216101374947e-06, "loss": 0.4702, "step": 8439 }, { "epoch": 0.24866314099259018, "grad_norm": 1.5128652064010233, "learning_rate": 9.341906026703052e-06, "loss": 0.5076, "step": 8440 }, { "epoch": 0.24869260345005376, "grad_norm": 1.609247805686754, "learning_rate": 9.341650993729273e-06, "loss": 0.4581, "step": 8441 }, { "epoch": 0.24872206590751736, "grad_norm": 1.618258608988337, "learning_rate": 9.341395914830835e-06, "loss": 0.4763, "step": 8442 }, { "epoch": 0.24875152836498093, "grad_norm": 1.5051544977409599, "learning_rate": 9.341140790010432e-06, "loss": 0.6136, "step": 8443 }, { "epoch": 0.2487809908224445, "grad_norm": 1.6418430489727769, "learning_rate": 9.340885619270768e-06, "loss": 0.3218, "step": 8444 }, { "epoch": 0.24881045327990808, "grad_norm": 1.4288371323236186, "learning_rate": 9.340630402614537e-06, "loss": 0.3596, "step": 8445 }, { "epoch": 0.24883991573737166, "grad_norm": 1.4820625503551592, "learning_rate": 9.34037514004444e-06, "loss": 0.5171, "step": 8446 }, { "epoch": 0.24886937819483523, "grad_norm": 1.5558904499670296, "learning_rate": 9.340119831563179e-06, "loss": 0.5645, "step": 8447 }, { "epoch": 0.2488988406522988, "grad_norm": 1.5643944967960033, "learning_rate": 9.339864477173451e-06, "loss": 0.4943, "step": 8448 }, { "epoch": 0.24892830310976238, "grad_norm": 1.5786738858631548, "learning_rate": 9.33960907687796e-06, "loss": 0.4627, "step": 8449 }, { "epoch": 0.24895776556722596, "grad_norm": 1.7472195379894628, "learning_rate": 9.339353630679408e-06, "loss": 0.5151, "step": 8450 }, { "epoch": 0.24898722802468953, "grad_norm": 1.4491526821984524, "learning_rate": 9.339098138580495e-06, "loss": 0.4851, "step": 8451 }, { "epoch": 0.2490166904821531, "grad_norm": 1.7439788404588616, "learning_rate": 9.338842600583923e-06, "loss": 0.6354, "step": 8452 }, { "epoch": 0.24904615293961668, "grad_norm": 1.7066008528127468, "learning_rate": 9.338587016692397e-06, "loss": 0.5667, "step": 8453 }, { "epoch": 0.24907561539708026, "grad_norm": 1.4941045340020767, "learning_rate": 9.338331386908621e-06, "loss": 0.4914, "step": 8454 }, { "epoch": 0.24910507785454386, "grad_norm": 1.5561868001520107, "learning_rate": 9.338075711235297e-06, "loss": 0.5124, "step": 8455 }, { "epoch": 0.24913454031200744, "grad_norm": 1.6080394664901303, "learning_rate": 9.33781998967513e-06, "loss": 0.5265, "step": 8456 }, { "epoch": 0.249164002769471, "grad_norm": 1.5390464535140758, "learning_rate": 9.337564222230827e-06, "loss": 0.501, "step": 8457 }, { "epoch": 0.24919346522693459, "grad_norm": 1.945080614128046, "learning_rate": 9.337308408905087e-06, "loss": 0.5135, "step": 8458 }, { "epoch": 0.24922292768439816, "grad_norm": 1.5942779413068264, "learning_rate": 9.337052549700625e-06, "loss": 0.4998, "step": 8459 }, { "epoch": 0.24925239014186173, "grad_norm": 1.7156719989931677, "learning_rate": 9.336796644620141e-06, "loss": 0.5403, "step": 8460 }, { "epoch": 0.2492818525993253, "grad_norm": 1.5964920262341482, "learning_rate": 9.336540693666343e-06, "loss": 0.4827, "step": 8461 }, { "epoch": 0.24931131505678888, "grad_norm": 1.7337481935517005, "learning_rate": 9.336284696841943e-06, "loss": 0.5001, "step": 8462 }, { "epoch": 0.24934077751425246, "grad_norm": 1.7843001245889045, "learning_rate": 9.33602865414964e-06, "loss": 0.5435, "step": 8463 }, { "epoch": 0.24937023997171603, "grad_norm": 1.6332632822899402, "learning_rate": 9.335772565592149e-06, "loss": 0.535, "step": 8464 }, { "epoch": 0.2493997024291796, "grad_norm": 1.6906265775340028, "learning_rate": 9.335516431172179e-06, "loss": 0.5901, "step": 8465 }, { "epoch": 0.24942916488664318, "grad_norm": 1.5218162739882466, "learning_rate": 9.335260250892435e-06, "loss": 0.5764, "step": 8466 }, { "epoch": 0.24945862734410676, "grad_norm": 1.89132049934812, "learning_rate": 9.335004024755631e-06, "loss": 0.5256, "step": 8467 }, { "epoch": 0.24948808980157036, "grad_norm": 1.697745735767173, "learning_rate": 9.334747752764475e-06, "loss": 0.5366, "step": 8468 }, { "epoch": 0.24951755225903394, "grad_norm": 1.6205601807389094, "learning_rate": 9.334491434921678e-06, "loss": 0.4726, "step": 8469 }, { "epoch": 0.2495470147164975, "grad_norm": 1.371383501752957, "learning_rate": 9.334235071229952e-06, "loss": 0.4294, "step": 8470 }, { "epoch": 0.2495764771739611, "grad_norm": 1.7255926128676657, "learning_rate": 9.333978661692008e-06, "loss": 0.4767, "step": 8471 }, { "epoch": 0.24960593963142466, "grad_norm": 1.7928965252194489, "learning_rate": 9.33372220631056e-06, "loss": 0.5875, "step": 8472 }, { "epoch": 0.24963540208888824, "grad_norm": 1.721700042602955, "learning_rate": 9.333465705088317e-06, "loss": 0.4093, "step": 8473 }, { "epoch": 0.2496648645463518, "grad_norm": 1.6120859096312008, "learning_rate": 9.333209158027996e-06, "loss": 0.4974, "step": 8474 }, { "epoch": 0.2496943270038154, "grad_norm": 1.8241855730414775, "learning_rate": 9.332952565132307e-06, "loss": 0.6012, "step": 8475 }, { "epoch": 0.24972378946127896, "grad_norm": 1.4276380554066892, "learning_rate": 9.332695926403969e-06, "loss": 0.521, "step": 8476 }, { "epoch": 0.24975325191874254, "grad_norm": 1.7122909775713124, "learning_rate": 9.332439241845691e-06, "loss": 0.5177, "step": 8477 }, { "epoch": 0.2497827143762061, "grad_norm": 1.5307370643207499, "learning_rate": 9.332182511460196e-06, "loss": 0.4411, "step": 8478 }, { "epoch": 0.24981217683366969, "grad_norm": 1.5223107338168074, "learning_rate": 9.331925735250188e-06, "loss": 0.3675, "step": 8479 }, { "epoch": 0.24984163929113326, "grad_norm": 1.419710897560564, "learning_rate": 9.331668913218394e-06, "loss": 0.4375, "step": 8480 }, { "epoch": 0.24987110174859686, "grad_norm": 1.6570578956856838, "learning_rate": 9.331412045367526e-06, "loss": 0.5838, "step": 8481 }, { "epoch": 0.24990056420606044, "grad_norm": 1.458507022930848, "learning_rate": 9.331155131700301e-06, "loss": 0.4776, "step": 8482 }, { "epoch": 0.249930026663524, "grad_norm": 1.7545779593979551, "learning_rate": 9.330898172219439e-06, "loss": 0.5755, "step": 8483 }, { "epoch": 0.2499594891209876, "grad_norm": 1.398258754623477, "learning_rate": 9.330641166927652e-06, "loss": 0.5017, "step": 8484 }, { "epoch": 0.24998895157845116, "grad_norm": 1.8215174858929577, "learning_rate": 9.330384115827664e-06, "loss": 0.7108, "step": 8485 }, { "epoch": 0.25001841403591474, "grad_norm": 1.916868983597844, "learning_rate": 9.330127018922195e-06, "loss": 0.706, "step": 8486 }, { "epoch": 0.25004787649337834, "grad_norm": 1.651936796268687, "learning_rate": 9.32986987621396e-06, "loss": 0.5732, "step": 8487 }, { "epoch": 0.2500773389508419, "grad_norm": 1.616313326389689, "learning_rate": 9.32961268770568e-06, "loss": 0.5221, "step": 8488 }, { "epoch": 0.2501068014083055, "grad_norm": 1.4951794822206799, "learning_rate": 9.329355453400077e-06, "loss": 0.508, "step": 8489 }, { "epoch": 0.25013626386576904, "grad_norm": 1.5038958166224792, "learning_rate": 9.329098173299873e-06, "loss": 0.4998, "step": 8490 }, { "epoch": 0.25016572632323264, "grad_norm": 1.9431009441966385, "learning_rate": 9.328840847407787e-06, "loss": 0.5661, "step": 8491 }, { "epoch": 0.2501951887806962, "grad_norm": 1.5592164163274158, "learning_rate": 9.328583475726542e-06, "loss": 0.6016, "step": 8492 }, { "epoch": 0.2502246512381598, "grad_norm": 1.5803518697423038, "learning_rate": 9.32832605825886e-06, "loss": 0.5149, "step": 8493 }, { "epoch": 0.25025411369562334, "grad_norm": 1.583258644051019, "learning_rate": 9.328068595007464e-06, "loss": 0.521, "step": 8494 }, { "epoch": 0.25028357615308694, "grad_norm": 1.511171354515986, "learning_rate": 9.327811085975077e-06, "loss": 0.5047, "step": 8495 }, { "epoch": 0.2503130386105505, "grad_norm": 1.4491934254697874, "learning_rate": 9.327553531164424e-06, "loss": 0.5, "step": 8496 }, { "epoch": 0.2503425010680141, "grad_norm": 1.6796129986212536, "learning_rate": 9.327295930578227e-06, "loss": 0.4802, "step": 8497 }, { "epoch": 0.25037196352547764, "grad_norm": 1.5272535631012416, "learning_rate": 9.327038284219215e-06, "loss": 0.5751, "step": 8498 }, { "epoch": 0.25040142598294124, "grad_norm": 1.7148382222098286, "learning_rate": 9.326780592090108e-06, "loss": 0.5269, "step": 8499 }, { "epoch": 0.25043088844040484, "grad_norm": 1.521100019524873, "learning_rate": 9.326522854193636e-06, "loss": 0.4712, "step": 8500 }, { "epoch": 0.2504603508978684, "grad_norm": 1.4985112120309816, "learning_rate": 9.326265070532525e-06, "loss": 0.4163, "step": 8501 }, { "epoch": 0.250489813355332, "grad_norm": 1.5556172058140265, "learning_rate": 9.326007241109498e-06, "loss": 0.5368, "step": 8502 }, { "epoch": 0.25051927581279554, "grad_norm": 1.650191654444347, "learning_rate": 9.325749365927287e-06, "loss": 0.5366, "step": 8503 }, { "epoch": 0.25054873827025914, "grad_norm": 1.7623154173077282, "learning_rate": 9.325491444988618e-06, "loss": 0.5241, "step": 8504 }, { "epoch": 0.2505782007277227, "grad_norm": 1.6270925557128655, "learning_rate": 9.325233478296216e-06, "loss": 0.4209, "step": 8505 }, { "epoch": 0.2506076631851863, "grad_norm": 1.566060179268635, "learning_rate": 9.324975465852813e-06, "loss": 0.4209, "step": 8506 }, { "epoch": 0.25063712564264984, "grad_norm": 1.8009035081414995, "learning_rate": 9.324717407661139e-06, "loss": 0.6398, "step": 8507 }, { "epoch": 0.25066658810011344, "grad_norm": 1.5104417894816995, "learning_rate": 9.32445930372392e-06, "loss": 0.4532, "step": 8508 }, { "epoch": 0.250696050557577, "grad_norm": 1.7968432198346855, "learning_rate": 9.324201154043891e-06, "loss": 0.5773, "step": 8509 }, { "epoch": 0.2507255130150406, "grad_norm": 1.6059270479576297, "learning_rate": 9.323942958623779e-06, "loss": 0.609, "step": 8510 }, { "epoch": 0.25075497547250414, "grad_norm": 1.507310219301052, "learning_rate": 9.323684717466317e-06, "loss": 0.388, "step": 8511 }, { "epoch": 0.25078443792996774, "grad_norm": 1.6157782313589262, "learning_rate": 9.323426430574234e-06, "loss": 0.5, "step": 8512 }, { "epoch": 0.25081390038743134, "grad_norm": 1.4346686533331827, "learning_rate": 9.323168097950267e-06, "loss": 0.4304, "step": 8513 }, { "epoch": 0.2508433628448949, "grad_norm": 1.638740522055684, "learning_rate": 9.322909719597142e-06, "loss": 0.4638, "step": 8514 }, { "epoch": 0.2508728253023585, "grad_norm": 1.42967632968632, "learning_rate": 9.322651295517597e-06, "loss": 0.3568, "step": 8515 }, { "epoch": 0.25090228775982204, "grad_norm": 1.814081110261512, "learning_rate": 9.322392825714363e-06, "loss": 0.608, "step": 8516 }, { "epoch": 0.25093175021728564, "grad_norm": 1.50204579964445, "learning_rate": 9.322134310190177e-06, "loss": 0.3986, "step": 8517 }, { "epoch": 0.2509612126747492, "grad_norm": 1.5646867723818028, "learning_rate": 9.321875748947768e-06, "loss": 0.5151, "step": 8518 }, { "epoch": 0.2509906751322128, "grad_norm": 1.5691513215094555, "learning_rate": 9.321617141989877e-06, "loss": 0.419, "step": 8519 }, { "epoch": 0.25102013758967634, "grad_norm": 1.829857192777803, "learning_rate": 9.321358489319236e-06, "loss": 0.5373, "step": 8520 }, { "epoch": 0.25104960004713994, "grad_norm": 1.4484306231616517, "learning_rate": 9.321099790938582e-06, "loss": 0.4878, "step": 8521 }, { "epoch": 0.2510790625046035, "grad_norm": 1.5099682584749952, "learning_rate": 9.320841046850652e-06, "loss": 0.543, "step": 8522 }, { "epoch": 0.2511085249620671, "grad_norm": 1.7904323541990954, "learning_rate": 9.320582257058182e-06, "loss": 0.7477, "step": 8523 }, { "epoch": 0.25113798741953064, "grad_norm": 1.5529974913237872, "learning_rate": 9.32032342156391e-06, "loss": 0.4681, "step": 8524 }, { "epoch": 0.25116744987699424, "grad_norm": 1.9309984807908704, "learning_rate": 9.320064540370573e-06, "loss": 0.5819, "step": 8525 }, { "epoch": 0.25119691233445784, "grad_norm": 1.6019172836853077, "learning_rate": 9.319805613480909e-06, "loss": 0.529, "step": 8526 }, { "epoch": 0.2512263747919214, "grad_norm": 1.668200807620291, "learning_rate": 9.31954664089766e-06, "loss": 0.5519, "step": 8527 }, { "epoch": 0.251255837249385, "grad_norm": 1.6001752923051509, "learning_rate": 9.31928762262356e-06, "loss": 0.4342, "step": 8528 }, { "epoch": 0.25128529970684854, "grad_norm": 1.5895010294164753, "learning_rate": 9.319028558661355e-06, "loss": 0.4142, "step": 8529 }, { "epoch": 0.25131476216431214, "grad_norm": 1.7052227328246166, "learning_rate": 9.31876944901378e-06, "loss": 0.3877, "step": 8530 }, { "epoch": 0.2513442246217757, "grad_norm": 1.4274774142898765, "learning_rate": 9.318510293683578e-06, "loss": 0.445, "step": 8531 }, { "epoch": 0.2513736870792393, "grad_norm": 1.475611551535529, "learning_rate": 9.318251092673491e-06, "loss": 0.4051, "step": 8532 }, { "epoch": 0.25140314953670284, "grad_norm": 1.6401944779644173, "learning_rate": 9.31799184598626e-06, "loss": 0.492, "step": 8533 }, { "epoch": 0.25143261199416644, "grad_norm": 1.6090680635817265, "learning_rate": 9.317732553624627e-06, "loss": 0.5695, "step": 8534 }, { "epoch": 0.25146207445163, "grad_norm": 1.755403495811788, "learning_rate": 9.317473215591335e-06, "loss": 0.6626, "step": 8535 }, { "epoch": 0.2514915369090936, "grad_norm": 1.522724247466391, "learning_rate": 9.317213831889126e-06, "loss": 0.3884, "step": 8536 }, { "epoch": 0.25152099936655714, "grad_norm": 1.658442122442066, "learning_rate": 9.316954402520748e-06, "loss": 0.413, "step": 8537 }, { "epoch": 0.25155046182402074, "grad_norm": 1.732999144287802, "learning_rate": 9.316694927488938e-06, "loss": 0.5752, "step": 8538 }, { "epoch": 0.25157992428148435, "grad_norm": 1.6409290784892099, "learning_rate": 9.316435406796447e-06, "loss": 0.4019, "step": 8539 }, { "epoch": 0.2516093867389479, "grad_norm": 1.399836891223383, "learning_rate": 9.316175840446018e-06, "loss": 0.5295, "step": 8540 }, { "epoch": 0.2516388491964115, "grad_norm": 1.5600293633418638, "learning_rate": 9.315916228440395e-06, "loss": 0.4802, "step": 8541 }, { "epoch": 0.25166831165387504, "grad_norm": 1.6731775120093184, "learning_rate": 9.315656570782326e-06, "loss": 0.5734, "step": 8542 }, { "epoch": 0.25169777411133865, "grad_norm": 1.4844088677089915, "learning_rate": 9.315396867474557e-06, "loss": 0.5429, "step": 8543 }, { "epoch": 0.2517272365688022, "grad_norm": 1.397582672147216, "learning_rate": 9.315137118519834e-06, "loss": 0.4239, "step": 8544 }, { "epoch": 0.2517566990262658, "grad_norm": 1.579663435813411, "learning_rate": 9.314877323920908e-06, "loss": 0.5913, "step": 8545 }, { "epoch": 0.25178616148372934, "grad_norm": 1.3873035950137274, "learning_rate": 9.314617483680523e-06, "loss": 0.3744, "step": 8546 }, { "epoch": 0.25181562394119295, "grad_norm": 1.588431778627802, "learning_rate": 9.314357597801428e-06, "loss": 0.4917, "step": 8547 }, { "epoch": 0.2518450863986565, "grad_norm": 1.4912649794830506, "learning_rate": 9.314097666286377e-06, "loss": 0.3864, "step": 8548 }, { "epoch": 0.2518745488561201, "grad_norm": 1.6394962762185188, "learning_rate": 9.313837689138112e-06, "loss": 0.5128, "step": 8549 }, { "epoch": 0.25190401131358364, "grad_norm": 1.7542746390853239, "learning_rate": 9.313577666359389e-06, "loss": 0.6505, "step": 8550 }, { "epoch": 0.25193347377104724, "grad_norm": 1.7038736688996534, "learning_rate": 9.313317597952956e-06, "loss": 0.4598, "step": 8551 }, { "epoch": 0.25196293622851085, "grad_norm": 1.619722679117845, "learning_rate": 9.31305748392156e-06, "loss": 0.491, "step": 8552 }, { "epoch": 0.2519923986859744, "grad_norm": 1.6334421825766783, "learning_rate": 9.312797324267961e-06, "loss": 0.5535, "step": 8553 }, { "epoch": 0.252021861143438, "grad_norm": 1.606287198547655, "learning_rate": 9.312537118994906e-06, "loss": 0.5516, "step": 8554 }, { "epoch": 0.25205132360090154, "grad_norm": 1.6001155685453852, "learning_rate": 9.312276868105145e-06, "loss": 0.4645, "step": 8555 }, { "epoch": 0.25208078605836515, "grad_norm": 1.602003259407875, "learning_rate": 9.312016571601435e-06, "loss": 0.4493, "step": 8556 }, { "epoch": 0.2521102485158287, "grad_norm": 1.7007851841284733, "learning_rate": 9.311756229486529e-06, "loss": 0.6204, "step": 8557 }, { "epoch": 0.2521397109732923, "grad_norm": 1.52748670762586, "learning_rate": 9.311495841763178e-06, "loss": 0.3669, "step": 8558 }, { "epoch": 0.25216917343075584, "grad_norm": 1.6135735108349745, "learning_rate": 9.311235408434138e-06, "loss": 0.6092, "step": 8559 }, { "epoch": 0.25219863588821945, "grad_norm": 1.584608729110099, "learning_rate": 9.310974929502164e-06, "loss": 0.5664, "step": 8560 }, { "epoch": 0.252228098345683, "grad_norm": 1.4455159979330727, "learning_rate": 9.310714404970012e-06, "loss": 0.4957, "step": 8561 }, { "epoch": 0.2522575608031466, "grad_norm": 1.5031531524456512, "learning_rate": 9.310453834840435e-06, "loss": 0.4687, "step": 8562 }, { "epoch": 0.25228702326061014, "grad_norm": 1.616058109879776, "learning_rate": 9.310193219116192e-06, "loss": 0.6145, "step": 8563 }, { "epoch": 0.25231648571807375, "grad_norm": 1.4722094589397665, "learning_rate": 9.309932557800039e-06, "loss": 0.4603, "step": 8564 }, { "epoch": 0.25234594817553735, "grad_norm": 1.5596067490302528, "learning_rate": 9.30967185089473e-06, "loss": 0.4281, "step": 8565 }, { "epoch": 0.2523754106330009, "grad_norm": 1.6556212990778687, "learning_rate": 9.309411098403029e-06, "loss": 0.4814, "step": 8566 }, { "epoch": 0.2524048730904645, "grad_norm": 1.691289454495541, "learning_rate": 9.30915030032769e-06, "loss": 0.6086, "step": 8567 }, { "epoch": 0.25243433554792805, "grad_norm": 1.7370209939812322, "learning_rate": 9.308889456671472e-06, "loss": 0.5461, "step": 8568 }, { "epoch": 0.25246379800539165, "grad_norm": 1.728097818359109, "learning_rate": 9.308628567437135e-06, "loss": 0.5105, "step": 8569 }, { "epoch": 0.2524932604628552, "grad_norm": 1.7387358365846275, "learning_rate": 9.308367632627438e-06, "loss": 0.5007, "step": 8570 }, { "epoch": 0.2525227229203188, "grad_norm": 1.6541095589674626, "learning_rate": 9.30810665224514e-06, "loss": 0.5424, "step": 8571 }, { "epoch": 0.25255218537778235, "grad_norm": 1.8052452060679731, "learning_rate": 9.307845626293004e-06, "loss": 0.4729, "step": 8572 }, { "epoch": 0.25258164783524595, "grad_norm": 1.454825824917293, "learning_rate": 9.30758455477379e-06, "loss": 0.3783, "step": 8573 }, { "epoch": 0.2526111102927095, "grad_norm": 1.5164473746173894, "learning_rate": 9.307323437690258e-06, "loss": 0.4449, "step": 8574 }, { "epoch": 0.2526405727501731, "grad_norm": 1.6756262287427859, "learning_rate": 9.307062275045173e-06, "loss": 0.5564, "step": 8575 }, { "epoch": 0.25267003520763665, "grad_norm": 1.552691979417572, "learning_rate": 9.306801066841294e-06, "loss": 0.3994, "step": 8576 }, { "epoch": 0.25269949766510025, "grad_norm": 1.6223080015081957, "learning_rate": 9.306539813081388e-06, "loss": 0.5392, "step": 8577 }, { "epoch": 0.25272896012256385, "grad_norm": 1.4293787482568505, "learning_rate": 9.306278513768214e-06, "loss": 0.3455, "step": 8578 }, { "epoch": 0.2527584225800274, "grad_norm": 1.4473182606845711, "learning_rate": 9.30601716890454e-06, "loss": 0.4326, "step": 8579 }, { "epoch": 0.252787885037491, "grad_norm": 1.502652204083097, "learning_rate": 9.305755778493128e-06, "loss": 0.6073, "step": 8580 }, { "epoch": 0.25281734749495455, "grad_norm": 1.4248406546943286, "learning_rate": 9.305494342536745e-06, "loss": 0.4253, "step": 8581 }, { "epoch": 0.25284680995241815, "grad_norm": 1.6687358489779862, "learning_rate": 9.305232861038154e-06, "loss": 0.6053, "step": 8582 }, { "epoch": 0.2528762724098817, "grad_norm": 1.7496312642507525, "learning_rate": 9.304971334000121e-06, "loss": 0.5503, "step": 8583 }, { "epoch": 0.2529057348673453, "grad_norm": 1.6188996102494426, "learning_rate": 9.304709761425415e-06, "loss": 0.473, "step": 8584 }, { "epoch": 0.25293519732480885, "grad_norm": 1.508048975241397, "learning_rate": 9.304448143316797e-06, "loss": 0.4179, "step": 8585 }, { "epoch": 0.25296465978227245, "grad_norm": 1.4783225679812355, "learning_rate": 9.304186479677043e-06, "loss": 0.5108, "step": 8586 }, { "epoch": 0.252994122239736, "grad_norm": 1.758442052139326, "learning_rate": 9.303924770508914e-06, "loss": 0.6023, "step": 8587 }, { "epoch": 0.2530235846971996, "grad_norm": 1.506639656273484, "learning_rate": 9.30366301581518e-06, "loss": 0.4344, "step": 8588 }, { "epoch": 0.25305304715466315, "grad_norm": 1.5599318523876742, "learning_rate": 9.303401215598614e-06, "loss": 0.4638, "step": 8589 }, { "epoch": 0.25308250961212675, "grad_norm": 1.6406216322541616, "learning_rate": 9.303139369861977e-06, "loss": 0.6578, "step": 8590 }, { "epoch": 0.25311197206959035, "grad_norm": 1.683656832537571, "learning_rate": 9.302877478608044e-06, "loss": 0.5067, "step": 8591 }, { "epoch": 0.2531414345270539, "grad_norm": 1.9068337016218344, "learning_rate": 9.302615541839586e-06, "loss": 0.6052, "step": 8592 }, { "epoch": 0.2531708969845175, "grad_norm": 1.6252231082051298, "learning_rate": 9.302353559559371e-06, "loss": 0.5458, "step": 8593 }, { "epoch": 0.25320035944198105, "grad_norm": 1.4002943196949655, "learning_rate": 9.302091531770172e-06, "loss": 0.3677, "step": 8594 }, { "epoch": 0.25322982189944465, "grad_norm": 1.4806383303215287, "learning_rate": 9.301829458474759e-06, "loss": 0.5561, "step": 8595 }, { "epoch": 0.2532592843569082, "grad_norm": 1.5194055862130713, "learning_rate": 9.301567339675905e-06, "loss": 0.655, "step": 8596 }, { "epoch": 0.2532887468143718, "grad_norm": 1.6658123023729485, "learning_rate": 9.301305175376383e-06, "loss": 0.5001, "step": 8597 }, { "epoch": 0.25331820927183535, "grad_norm": 1.550137319744802, "learning_rate": 9.301042965578965e-06, "loss": 0.4752, "step": 8598 }, { "epoch": 0.25334767172929895, "grad_norm": 1.6325677581859823, "learning_rate": 9.300780710286426e-06, "loss": 0.5896, "step": 8599 }, { "epoch": 0.2533771341867625, "grad_norm": 1.608729902153307, "learning_rate": 9.30051840950154e-06, "loss": 0.5426, "step": 8600 }, { "epoch": 0.2534065966442261, "grad_norm": 1.6063890304523276, "learning_rate": 9.30025606322708e-06, "loss": 0.4984, "step": 8601 }, { "epoch": 0.25343605910168965, "grad_norm": 1.6414694349300427, "learning_rate": 9.299993671465821e-06, "loss": 0.465, "step": 8602 }, { "epoch": 0.25346552155915325, "grad_norm": 1.5967468399110167, "learning_rate": 9.29973123422054e-06, "loss": 0.4459, "step": 8603 }, { "epoch": 0.25349498401661685, "grad_norm": 1.5561333473117094, "learning_rate": 9.299468751494015e-06, "loss": 0.4451, "step": 8604 }, { "epoch": 0.2535244464740804, "grad_norm": 1.5775140208923981, "learning_rate": 9.299206223289017e-06, "loss": 0.4978, "step": 8605 }, { "epoch": 0.253553908931544, "grad_norm": 1.538832849584989, "learning_rate": 9.298943649608327e-06, "loss": 0.4647, "step": 8606 }, { "epoch": 0.25358337138900755, "grad_norm": 1.5735708210456205, "learning_rate": 9.29868103045472e-06, "loss": 0.6361, "step": 8607 }, { "epoch": 0.25361283384647115, "grad_norm": 1.6943203616565443, "learning_rate": 9.298418365830977e-06, "loss": 0.5271, "step": 8608 }, { "epoch": 0.2536422963039347, "grad_norm": 1.6132453989141513, "learning_rate": 9.298155655739872e-06, "loss": 0.6205, "step": 8609 }, { "epoch": 0.2536717587613983, "grad_norm": 1.5767036822303073, "learning_rate": 9.297892900184188e-06, "loss": 0.4391, "step": 8610 }, { "epoch": 0.25370122121886185, "grad_norm": 1.7374641140723905, "learning_rate": 9.297630099166703e-06, "loss": 0.4145, "step": 8611 }, { "epoch": 0.25373068367632545, "grad_norm": 1.5895315068730733, "learning_rate": 9.297367252690194e-06, "loss": 0.537, "step": 8612 }, { "epoch": 0.253760146133789, "grad_norm": 1.6740521117408143, "learning_rate": 9.297104360757447e-06, "loss": 0.5828, "step": 8613 }, { "epoch": 0.2537896085912526, "grad_norm": 1.6151400691981108, "learning_rate": 9.296841423371239e-06, "loss": 0.4764, "step": 8614 }, { "epoch": 0.25381907104871615, "grad_norm": 1.4925764496297098, "learning_rate": 9.296578440534351e-06, "loss": 0.4555, "step": 8615 }, { "epoch": 0.25384853350617975, "grad_norm": 1.6056708846637309, "learning_rate": 9.296315412249567e-06, "loss": 0.5322, "step": 8616 }, { "epoch": 0.25387799596364335, "grad_norm": 1.5759085409743119, "learning_rate": 9.296052338519666e-06, "loss": 0.509, "step": 8617 }, { "epoch": 0.2539074584211069, "grad_norm": 1.6579353579936227, "learning_rate": 9.295789219347435e-06, "loss": 0.6324, "step": 8618 }, { "epoch": 0.2539369208785705, "grad_norm": 1.7430902112561415, "learning_rate": 9.295526054735652e-06, "loss": 0.6279, "step": 8619 }, { "epoch": 0.25396638333603405, "grad_norm": 1.6408137297723668, "learning_rate": 9.295262844687106e-06, "loss": 0.4357, "step": 8620 }, { "epoch": 0.25399584579349765, "grad_norm": 1.441476009607914, "learning_rate": 9.294999589204577e-06, "loss": 0.3879, "step": 8621 }, { "epoch": 0.2540253082509612, "grad_norm": 1.7111212290802218, "learning_rate": 9.294736288290853e-06, "loss": 0.5656, "step": 8622 }, { "epoch": 0.2540547707084248, "grad_norm": 1.8373769226954704, "learning_rate": 9.294472941948716e-06, "loss": 0.6538, "step": 8623 }, { "epoch": 0.25408423316588835, "grad_norm": 1.4949772636729477, "learning_rate": 9.294209550180953e-06, "loss": 0.4813, "step": 8624 }, { "epoch": 0.25411369562335195, "grad_norm": 1.4772684783389276, "learning_rate": 9.293946112990351e-06, "loss": 0.3756, "step": 8625 }, { "epoch": 0.2541431580808155, "grad_norm": 1.778374482070684, "learning_rate": 9.293682630379695e-06, "loss": 0.4498, "step": 8626 }, { "epoch": 0.2541726205382791, "grad_norm": 1.5408369249935794, "learning_rate": 9.293419102351774e-06, "loss": 0.5147, "step": 8627 }, { "epoch": 0.25420208299574265, "grad_norm": 1.5748375178852099, "learning_rate": 9.293155528909373e-06, "loss": 0.4011, "step": 8628 }, { "epoch": 0.25423154545320625, "grad_norm": 1.5612375462880377, "learning_rate": 9.292891910055279e-06, "loss": 0.5191, "step": 8629 }, { "epoch": 0.25426100791066986, "grad_norm": 1.5289887560404671, "learning_rate": 9.292628245792286e-06, "loss": 0.5224, "step": 8630 }, { "epoch": 0.2542904703681334, "grad_norm": 1.5471506142617961, "learning_rate": 9.29236453612318e-06, "loss": 0.4398, "step": 8631 }, { "epoch": 0.254319932825597, "grad_norm": 1.4482122098071568, "learning_rate": 9.292100781050746e-06, "loss": 0.5033, "step": 8632 }, { "epoch": 0.25434939528306055, "grad_norm": 1.6909627556844025, "learning_rate": 9.291836980577781e-06, "loss": 0.4745, "step": 8633 }, { "epoch": 0.25437885774052416, "grad_norm": 1.6928542361591945, "learning_rate": 9.291573134707072e-06, "loss": 0.4147, "step": 8634 }, { "epoch": 0.2544083201979877, "grad_norm": 1.5829686662706193, "learning_rate": 9.291309243441411e-06, "loss": 0.5173, "step": 8635 }, { "epoch": 0.2544377826554513, "grad_norm": 1.5858947053924404, "learning_rate": 9.291045306783588e-06, "loss": 0.5654, "step": 8636 }, { "epoch": 0.25446724511291485, "grad_norm": 1.5622966233916393, "learning_rate": 9.290781324736395e-06, "loss": 0.4886, "step": 8637 }, { "epoch": 0.25449670757037846, "grad_norm": 1.5318536961769247, "learning_rate": 9.290517297302625e-06, "loss": 0.4291, "step": 8638 }, { "epoch": 0.254526170027842, "grad_norm": 1.6809256683848162, "learning_rate": 9.29025322448507e-06, "loss": 0.4935, "step": 8639 }, { "epoch": 0.2545556324853056, "grad_norm": 1.4394718409730487, "learning_rate": 9.289989106286525e-06, "loss": 0.4949, "step": 8640 }, { "epoch": 0.25458509494276915, "grad_norm": 1.5214314767528567, "learning_rate": 9.289724942709782e-06, "loss": 0.4695, "step": 8641 }, { "epoch": 0.25461455740023275, "grad_norm": 1.796875131737262, "learning_rate": 9.289460733757637e-06, "loss": 0.5946, "step": 8642 }, { "epoch": 0.25464401985769636, "grad_norm": 1.8142695632035049, "learning_rate": 9.289196479432883e-06, "loss": 0.4807, "step": 8643 }, { "epoch": 0.2546734823151599, "grad_norm": 1.9515242505988093, "learning_rate": 9.288932179738315e-06, "loss": 0.6691, "step": 8644 }, { "epoch": 0.2547029447726235, "grad_norm": 1.665811256964107, "learning_rate": 9.28866783467673e-06, "loss": 0.5172, "step": 8645 }, { "epoch": 0.25473240723008705, "grad_norm": 1.6606952212460677, "learning_rate": 9.288403444250924e-06, "loss": 0.5378, "step": 8646 }, { "epoch": 0.25476186968755066, "grad_norm": 1.5403897575725984, "learning_rate": 9.288139008463692e-06, "loss": 0.5559, "step": 8647 }, { "epoch": 0.2547913321450142, "grad_norm": 1.5462368696445092, "learning_rate": 9.287874527317836e-06, "loss": 0.4711, "step": 8648 }, { "epoch": 0.2548207946024778, "grad_norm": 1.470144200948184, "learning_rate": 9.287610000816146e-06, "loss": 0.4302, "step": 8649 }, { "epoch": 0.25485025705994135, "grad_norm": 1.488701440336026, "learning_rate": 9.287345428961425e-06, "loss": 0.3931, "step": 8650 }, { "epoch": 0.25487971951740496, "grad_norm": 1.6578226816950168, "learning_rate": 9.287080811756473e-06, "loss": 0.5442, "step": 8651 }, { "epoch": 0.2549091819748685, "grad_norm": 1.5560318969480254, "learning_rate": 9.286816149204084e-06, "loss": 0.5421, "step": 8652 }, { "epoch": 0.2549386444323321, "grad_norm": 1.5697824670394476, "learning_rate": 9.286551441307062e-06, "loss": 0.4694, "step": 8653 }, { "epoch": 0.25496810688979565, "grad_norm": 1.6986026058734747, "learning_rate": 9.286286688068205e-06, "loss": 0.5293, "step": 8654 }, { "epoch": 0.25499756934725926, "grad_norm": 1.5995576366617008, "learning_rate": 9.286021889490312e-06, "loss": 0.4961, "step": 8655 }, { "epoch": 0.25502703180472286, "grad_norm": 1.7597806305807766, "learning_rate": 9.285757045576187e-06, "loss": 0.6664, "step": 8656 }, { "epoch": 0.2550564942621864, "grad_norm": 1.5979041917485743, "learning_rate": 9.28549215632863e-06, "loss": 0.4451, "step": 8657 }, { "epoch": 0.25508595671965, "grad_norm": 1.6058510680270022, "learning_rate": 9.285227221750443e-06, "loss": 0.4335, "step": 8658 }, { "epoch": 0.25511541917711356, "grad_norm": 1.5917985760455078, "learning_rate": 9.284962241844428e-06, "loss": 0.5424, "step": 8659 }, { "epoch": 0.25514488163457716, "grad_norm": 1.5741734247584185, "learning_rate": 9.284697216613389e-06, "loss": 0.388, "step": 8660 }, { "epoch": 0.2551743440920407, "grad_norm": 1.4996842745676453, "learning_rate": 9.284432146060128e-06, "loss": 0.3929, "step": 8661 }, { "epoch": 0.2552038065495043, "grad_norm": 1.5614660828537021, "learning_rate": 9.28416703018745e-06, "loss": 0.4057, "step": 8662 }, { "epoch": 0.25523326900696786, "grad_norm": 1.5753651282590044, "learning_rate": 9.283901868998159e-06, "loss": 0.501, "step": 8663 }, { "epoch": 0.25526273146443146, "grad_norm": 1.6674838472297826, "learning_rate": 9.283636662495058e-06, "loss": 0.4993, "step": 8664 }, { "epoch": 0.255292193921895, "grad_norm": 1.6285152789764636, "learning_rate": 9.283371410680955e-06, "loss": 0.4568, "step": 8665 }, { "epoch": 0.2553216563793586, "grad_norm": 1.5122474166086284, "learning_rate": 9.283106113558655e-06, "loss": 0.5049, "step": 8666 }, { "epoch": 0.25535111883682216, "grad_norm": 1.6175991619150552, "learning_rate": 9.282840771130963e-06, "loss": 0.4852, "step": 8667 }, { "epoch": 0.25538058129428576, "grad_norm": 1.6608783292037406, "learning_rate": 9.282575383400686e-06, "loss": 0.5623, "step": 8668 }, { "epoch": 0.25541004375174936, "grad_norm": 1.5258238227738683, "learning_rate": 9.282309950370634e-06, "loss": 0.4323, "step": 8669 }, { "epoch": 0.2554395062092129, "grad_norm": 1.6479451182980458, "learning_rate": 9.282044472043612e-06, "loss": 0.5291, "step": 8670 }, { "epoch": 0.2554689686666765, "grad_norm": 1.4094491396197804, "learning_rate": 9.281778948422428e-06, "loss": 0.507, "step": 8671 }, { "epoch": 0.25549843112414006, "grad_norm": 1.6085836665798585, "learning_rate": 9.28151337950989e-06, "loss": 0.5033, "step": 8672 }, { "epoch": 0.25552789358160366, "grad_norm": 1.5866691205502532, "learning_rate": 9.28124776530881e-06, "loss": 0.523, "step": 8673 }, { "epoch": 0.2555573560390672, "grad_norm": 1.652143462112378, "learning_rate": 9.280982105821993e-06, "loss": 0.5596, "step": 8674 }, { "epoch": 0.2555868184965308, "grad_norm": 1.5350724396753999, "learning_rate": 9.280716401052255e-06, "loss": 0.3613, "step": 8675 }, { "epoch": 0.25561628095399436, "grad_norm": 1.767758246203915, "learning_rate": 9.280450651002403e-06, "loss": 0.5414, "step": 8676 }, { "epoch": 0.25564574341145796, "grad_norm": 1.4891090879383233, "learning_rate": 9.280184855675249e-06, "loss": 0.5057, "step": 8677 }, { "epoch": 0.2556752058689215, "grad_norm": 1.6071752632016691, "learning_rate": 9.279919015073603e-06, "loss": 0.4812, "step": 8678 }, { "epoch": 0.2557046683263851, "grad_norm": 1.5369091204458458, "learning_rate": 9.279653129200278e-06, "loss": 0.4745, "step": 8679 }, { "epoch": 0.25573413078384866, "grad_norm": 1.5380693473016236, "learning_rate": 9.279387198058088e-06, "loss": 0.4426, "step": 8680 }, { "epoch": 0.25576359324131226, "grad_norm": 1.489643328110203, "learning_rate": 9.279121221649842e-06, "loss": 0.4957, "step": 8681 }, { "epoch": 0.25579305569877586, "grad_norm": 1.5241487392954707, "learning_rate": 9.278855199978355e-06, "loss": 0.3103, "step": 8682 }, { "epoch": 0.2558225181562394, "grad_norm": 1.4816194663348494, "learning_rate": 9.278589133046444e-06, "loss": 0.4342, "step": 8683 }, { "epoch": 0.255851980613703, "grad_norm": 1.7676146708432994, "learning_rate": 9.278323020856923e-06, "loss": 0.5772, "step": 8684 }, { "epoch": 0.25588144307116656, "grad_norm": 1.5402191189139907, "learning_rate": 9.2780568634126e-06, "loss": 0.3097, "step": 8685 }, { "epoch": 0.25591090552863016, "grad_norm": 1.682412066039458, "learning_rate": 9.2777906607163e-06, "loss": 0.5446, "step": 8686 }, { "epoch": 0.2559403679860937, "grad_norm": 1.531590706756256, "learning_rate": 9.27752441277083e-06, "loss": 0.5484, "step": 8687 }, { "epoch": 0.2559698304435573, "grad_norm": 1.7755635445504467, "learning_rate": 9.277258119579014e-06, "loss": 0.5514, "step": 8688 }, { "epoch": 0.25599929290102086, "grad_norm": 1.4991451287169006, "learning_rate": 9.276991781143662e-06, "loss": 0.4543, "step": 8689 }, { "epoch": 0.25602875535848446, "grad_norm": 1.547632791691179, "learning_rate": 9.276725397467597e-06, "loss": 0.3696, "step": 8690 }, { "epoch": 0.256058217815948, "grad_norm": 1.5867902312962459, "learning_rate": 9.276458968553634e-06, "loss": 0.5342, "step": 8691 }, { "epoch": 0.2560876802734116, "grad_norm": 1.4578036833955006, "learning_rate": 9.27619249440459e-06, "loss": 0.4182, "step": 8692 }, { "epoch": 0.25611714273087516, "grad_norm": 1.5392246311374196, "learning_rate": 9.275925975023285e-06, "loss": 0.5392, "step": 8693 }, { "epoch": 0.25614660518833876, "grad_norm": 1.5693891254630337, "learning_rate": 9.275659410412538e-06, "loss": 0.5801, "step": 8694 }, { "epoch": 0.25617606764580236, "grad_norm": 1.5950755488147692, "learning_rate": 9.27539280057517e-06, "loss": 0.5306, "step": 8695 }, { "epoch": 0.2562055301032659, "grad_norm": 1.425630088780366, "learning_rate": 9.275126145513999e-06, "loss": 0.4923, "step": 8696 }, { "epoch": 0.2562349925607295, "grad_norm": 1.5529794138101631, "learning_rate": 9.274859445231848e-06, "loss": 0.4405, "step": 8697 }, { "epoch": 0.25626445501819306, "grad_norm": 1.9174361440733538, "learning_rate": 9.274592699731535e-06, "loss": 0.6634, "step": 8698 }, { "epoch": 0.25629391747565666, "grad_norm": 1.7396248001483439, "learning_rate": 9.274325909015883e-06, "loss": 0.6831, "step": 8699 }, { "epoch": 0.2563233799331202, "grad_norm": 1.568583285129387, "learning_rate": 9.274059073087714e-06, "loss": 0.5009, "step": 8700 }, { "epoch": 0.2563528423905838, "grad_norm": 1.5913469715206794, "learning_rate": 9.273792191949852e-06, "loss": 0.4939, "step": 8701 }, { "epoch": 0.25638230484804736, "grad_norm": 1.6669763139097364, "learning_rate": 9.273525265605118e-06, "loss": 0.5786, "step": 8702 }, { "epoch": 0.25641176730551096, "grad_norm": 1.6180188570652914, "learning_rate": 9.273258294056336e-06, "loss": 0.5786, "step": 8703 }, { "epoch": 0.2564412297629745, "grad_norm": 1.6677341712848528, "learning_rate": 9.27299127730633e-06, "loss": 0.5244, "step": 8704 }, { "epoch": 0.2564706922204381, "grad_norm": 1.4934534562021615, "learning_rate": 9.272724215357926e-06, "loss": 0.5192, "step": 8705 }, { "epoch": 0.25650015467790166, "grad_norm": 1.5339753876954252, "learning_rate": 9.272457108213947e-06, "loss": 0.586, "step": 8706 }, { "epoch": 0.25652961713536526, "grad_norm": 1.4057323953920493, "learning_rate": 9.272189955877215e-06, "loss": 0.3814, "step": 8707 }, { "epoch": 0.25655907959282886, "grad_norm": 1.4172534967014407, "learning_rate": 9.271922758350565e-06, "loss": 0.4511, "step": 8708 }, { "epoch": 0.2565885420502924, "grad_norm": 1.4386217833407047, "learning_rate": 9.271655515636816e-06, "loss": 0.462, "step": 8709 }, { "epoch": 0.256618004507756, "grad_norm": 1.6575823331570028, "learning_rate": 9.271388227738795e-06, "loss": 0.5416, "step": 8710 }, { "epoch": 0.25664746696521956, "grad_norm": 1.6468748707562393, "learning_rate": 9.271120894659333e-06, "loss": 0.4931, "step": 8711 }, { "epoch": 0.25667692942268316, "grad_norm": 1.5433847346349803, "learning_rate": 9.270853516401256e-06, "loss": 0.4114, "step": 8712 }, { "epoch": 0.2567063918801467, "grad_norm": 1.552967449200447, "learning_rate": 9.270586092967391e-06, "loss": 0.422, "step": 8713 }, { "epoch": 0.2567358543376103, "grad_norm": 1.344010828354368, "learning_rate": 9.270318624360568e-06, "loss": 0.4192, "step": 8714 }, { "epoch": 0.25676531679507386, "grad_norm": 1.5001006131412098, "learning_rate": 9.270051110583616e-06, "loss": 0.4373, "step": 8715 }, { "epoch": 0.25679477925253746, "grad_norm": 1.5296486773630233, "learning_rate": 9.269783551639364e-06, "loss": 0.5157, "step": 8716 }, { "epoch": 0.256824241710001, "grad_norm": 1.575442019837139, "learning_rate": 9.269515947530644e-06, "loss": 0.6595, "step": 8717 }, { "epoch": 0.2568537041674646, "grad_norm": 1.6390138127562612, "learning_rate": 9.269248298260285e-06, "loss": 0.5732, "step": 8718 }, { "epoch": 0.25688316662492816, "grad_norm": 1.6846993009785196, "learning_rate": 9.268980603831119e-06, "loss": 0.5466, "step": 8719 }, { "epoch": 0.25691262908239176, "grad_norm": 1.6702791991522592, "learning_rate": 9.268712864245977e-06, "loss": 0.4996, "step": 8720 }, { "epoch": 0.25694209153985537, "grad_norm": 1.6894392623766041, "learning_rate": 9.26844507950769e-06, "loss": 0.4703, "step": 8721 }, { "epoch": 0.2569715539973189, "grad_norm": 1.6083993881438967, "learning_rate": 9.268177249619094e-06, "loss": 0.4429, "step": 8722 }, { "epoch": 0.2570010164547825, "grad_norm": 1.3423304311529565, "learning_rate": 9.267909374583019e-06, "loss": 0.379, "step": 8723 }, { "epoch": 0.25703047891224606, "grad_norm": 1.4689820141121506, "learning_rate": 9.2676414544023e-06, "loss": 0.3999, "step": 8724 }, { "epoch": 0.25705994136970967, "grad_norm": 1.629697322371692, "learning_rate": 9.26737348907977e-06, "loss": 0.4215, "step": 8725 }, { "epoch": 0.2570894038271732, "grad_norm": 1.731765588803327, "learning_rate": 9.267105478618262e-06, "loss": 0.558, "step": 8726 }, { "epoch": 0.2571188662846368, "grad_norm": 1.7294022117662136, "learning_rate": 9.266837423020614e-06, "loss": 0.6513, "step": 8727 }, { "epoch": 0.25714832874210036, "grad_norm": 1.4894813394667257, "learning_rate": 9.26656932228966e-06, "loss": 0.4472, "step": 8728 }, { "epoch": 0.25717779119956397, "grad_norm": 1.5602249938233743, "learning_rate": 9.266301176428236e-06, "loss": 0.5155, "step": 8729 }, { "epoch": 0.2572072536570275, "grad_norm": 1.9711473868007479, "learning_rate": 9.26603298543918e-06, "loss": 0.5576, "step": 8730 }, { "epoch": 0.2572367161144911, "grad_norm": 1.591047720597855, "learning_rate": 9.265764749325325e-06, "loss": 0.4171, "step": 8731 }, { "epoch": 0.25726617857195466, "grad_norm": 1.6378948770076602, "learning_rate": 9.26549646808951e-06, "loss": 0.4417, "step": 8732 }, { "epoch": 0.25729564102941826, "grad_norm": 1.630235516199999, "learning_rate": 9.265228141734574e-06, "loss": 0.6434, "step": 8733 }, { "epoch": 0.25732510348688187, "grad_norm": 1.79448859539601, "learning_rate": 9.264959770263356e-06, "loss": 0.5988, "step": 8734 }, { "epoch": 0.2573545659443454, "grad_norm": 1.8048410009761737, "learning_rate": 9.264691353678692e-06, "loss": 0.5703, "step": 8735 }, { "epoch": 0.257384028401809, "grad_norm": 1.6033970077843218, "learning_rate": 9.264422891983423e-06, "loss": 0.55, "step": 8736 }, { "epoch": 0.25741349085927256, "grad_norm": 1.6329457592428815, "learning_rate": 9.264154385180389e-06, "loss": 0.5873, "step": 8737 }, { "epoch": 0.25744295331673617, "grad_norm": 1.545216846031565, "learning_rate": 9.263885833272428e-06, "loss": 0.4282, "step": 8738 }, { "epoch": 0.2574724157741997, "grad_norm": 1.701866185411699, "learning_rate": 9.263617236262384e-06, "loss": 0.6084, "step": 8739 }, { "epoch": 0.2575018782316633, "grad_norm": 1.5160784546199364, "learning_rate": 9.263348594153095e-06, "loss": 0.5096, "step": 8740 }, { "epoch": 0.25753134068912686, "grad_norm": 1.5795676216452754, "learning_rate": 9.263079906947403e-06, "loss": 0.4023, "step": 8741 }, { "epoch": 0.25756080314659047, "grad_norm": 1.475876851971862, "learning_rate": 9.262811174648152e-06, "loss": 0.4586, "step": 8742 }, { "epoch": 0.257590265604054, "grad_norm": 1.606365616964312, "learning_rate": 9.262542397258183e-06, "loss": 0.4931, "step": 8743 }, { "epoch": 0.2576197280615176, "grad_norm": 1.483072175170111, "learning_rate": 9.262273574780341e-06, "loss": 0.5206, "step": 8744 }, { "epoch": 0.25764919051898116, "grad_norm": 1.5359919466137832, "learning_rate": 9.262004707217467e-06, "loss": 0.5116, "step": 8745 }, { "epoch": 0.25767865297644477, "grad_norm": 1.4693778449808323, "learning_rate": 9.261735794572404e-06, "loss": 0.4913, "step": 8746 }, { "epoch": 0.25770811543390837, "grad_norm": 1.5504928473020836, "learning_rate": 9.261466836848003e-06, "loss": 0.4448, "step": 8747 }, { "epoch": 0.2577375778913719, "grad_norm": 1.513374937423411, "learning_rate": 9.261197834047101e-06, "loss": 0.4708, "step": 8748 }, { "epoch": 0.2577670403488355, "grad_norm": 1.5368983142866186, "learning_rate": 9.260928786172549e-06, "loss": 0.4416, "step": 8749 }, { "epoch": 0.25779650280629907, "grad_norm": 1.667201373389854, "learning_rate": 9.260659693227189e-06, "loss": 0.5677, "step": 8750 }, { "epoch": 0.25782596526376267, "grad_norm": 1.6219944691793302, "learning_rate": 9.26039055521387e-06, "loss": 0.465, "step": 8751 }, { "epoch": 0.2578554277212262, "grad_norm": 1.645350909212595, "learning_rate": 9.260121372135439e-06, "loss": 0.4364, "step": 8752 }, { "epoch": 0.2578848901786898, "grad_norm": 1.7773337088673453, "learning_rate": 9.259852143994742e-06, "loss": 0.6357, "step": 8753 }, { "epoch": 0.25791435263615337, "grad_norm": 1.6753555161444, "learning_rate": 9.259582870794626e-06, "loss": 0.6306, "step": 8754 }, { "epoch": 0.25794381509361697, "grad_norm": 1.63633100705161, "learning_rate": 9.259313552537943e-06, "loss": 0.5364, "step": 8755 }, { "epoch": 0.2579732775510805, "grad_norm": 1.4992416887781699, "learning_rate": 9.259044189227537e-06, "loss": 0.4391, "step": 8756 }, { "epoch": 0.2580027400085441, "grad_norm": 1.587572105964051, "learning_rate": 9.258774780866261e-06, "loss": 0.4839, "step": 8757 }, { "epoch": 0.25803220246600767, "grad_norm": 1.568605750953781, "learning_rate": 9.258505327456964e-06, "loss": 0.4292, "step": 8758 }, { "epoch": 0.25806166492347127, "grad_norm": 1.634545831995683, "learning_rate": 9.258235829002494e-06, "loss": 0.4903, "step": 8759 }, { "epoch": 0.25809112738093487, "grad_norm": 1.6332200250783977, "learning_rate": 9.257966285505701e-06, "loss": 0.5066, "step": 8760 }, { "epoch": 0.2581205898383984, "grad_norm": 1.6244193201913624, "learning_rate": 9.257696696969443e-06, "loss": 0.669, "step": 8761 }, { "epoch": 0.258150052295862, "grad_norm": 1.8662542263173683, "learning_rate": 9.257427063396563e-06, "loss": 0.7215, "step": 8762 }, { "epoch": 0.25817951475332557, "grad_norm": 1.62393361196683, "learning_rate": 9.257157384789918e-06, "loss": 0.452, "step": 8763 }, { "epoch": 0.25820897721078917, "grad_norm": 1.6221378207562007, "learning_rate": 9.256887661152362e-06, "loss": 0.4418, "step": 8764 }, { "epoch": 0.2582384396682527, "grad_norm": 1.545700684930428, "learning_rate": 9.256617892486743e-06, "loss": 0.5401, "step": 8765 }, { "epoch": 0.2582679021257163, "grad_norm": 1.95167053160615, "learning_rate": 9.256348078795919e-06, "loss": 0.6587, "step": 8766 }, { "epoch": 0.25829736458317987, "grad_norm": 1.5695785848156452, "learning_rate": 9.25607822008274e-06, "loss": 0.5976, "step": 8767 }, { "epoch": 0.25832682704064347, "grad_norm": 1.5556282159822519, "learning_rate": 9.255808316350064e-06, "loss": 0.6439, "step": 8768 }, { "epoch": 0.258356289498107, "grad_norm": 1.5754936450739534, "learning_rate": 9.255538367600744e-06, "loss": 0.5667, "step": 8769 }, { "epoch": 0.2583857519555706, "grad_norm": 1.873474837411692, "learning_rate": 9.255268373837637e-06, "loss": 0.493, "step": 8770 }, { "epoch": 0.25841521441303417, "grad_norm": 1.3900797389029225, "learning_rate": 9.2549983350636e-06, "loss": 0.406, "step": 8771 }, { "epoch": 0.25844467687049777, "grad_norm": 1.6121181977320664, "learning_rate": 9.254728251281483e-06, "loss": 0.5651, "step": 8772 }, { "epoch": 0.25847413932796137, "grad_norm": 1.6145234990584463, "learning_rate": 9.25445812249415e-06, "loss": 0.4675, "step": 8773 }, { "epoch": 0.2585036017854249, "grad_norm": 1.7528807752745346, "learning_rate": 9.254187948704455e-06, "loss": 0.5893, "step": 8774 }, { "epoch": 0.2585330642428885, "grad_norm": 1.5222297571509897, "learning_rate": 9.253917729915255e-06, "loss": 0.4505, "step": 8775 }, { "epoch": 0.25856252670035207, "grad_norm": 1.7060287083282755, "learning_rate": 9.253647466129411e-06, "loss": 0.535, "step": 8776 }, { "epoch": 0.25859198915781567, "grad_norm": 1.782836949923474, "learning_rate": 9.253377157349779e-06, "loss": 0.5908, "step": 8777 }, { "epoch": 0.2586214516152792, "grad_norm": 1.5102861049266858, "learning_rate": 9.253106803579221e-06, "loss": 0.4143, "step": 8778 }, { "epoch": 0.2586509140727428, "grad_norm": 1.8488308489038356, "learning_rate": 9.252836404820595e-06, "loss": 0.4162, "step": 8779 }, { "epoch": 0.25868037653020637, "grad_norm": 1.5784964428177901, "learning_rate": 9.252565961076763e-06, "loss": 0.4909, "step": 8780 }, { "epoch": 0.25870983898766997, "grad_norm": 1.592576342331179, "learning_rate": 9.252295472350584e-06, "loss": 0.4456, "step": 8781 }, { "epoch": 0.2587393014451335, "grad_norm": 1.6554567337668056, "learning_rate": 9.252024938644918e-06, "loss": 0.4973, "step": 8782 }, { "epoch": 0.2587687639025971, "grad_norm": 1.626523950387938, "learning_rate": 9.251754359962629e-06, "loss": 0.4288, "step": 8783 }, { "epoch": 0.25879822636006067, "grad_norm": 1.623756858732205, "learning_rate": 9.251483736306577e-06, "loss": 0.6303, "step": 8784 }, { "epoch": 0.25882768881752427, "grad_norm": 1.4380767906374867, "learning_rate": 9.251213067679626e-06, "loss": 0.5193, "step": 8785 }, { "epoch": 0.2588571512749879, "grad_norm": 1.7883119567245298, "learning_rate": 9.25094235408464e-06, "loss": 0.4839, "step": 8786 }, { "epoch": 0.2588866137324514, "grad_norm": 1.6279711821889105, "learning_rate": 9.250671595524481e-06, "loss": 0.5711, "step": 8787 }, { "epoch": 0.258916076189915, "grad_norm": 1.6251308936000641, "learning_rate": 9.250400792002012e-06, "loss": 0.5328, "step": 8788 }, { "epoch": 0.25894553864737857, "grad_norm": 1.5260373776949696, "learning_rate": 9.250129943520099e-06, "loss": 0.5005, "step": 8789 }, { "epoch": 0.2589750011048422, "grad_norm": 1.5312208157191574, "learning_rate": 9.24985905008161e-06, "loss": 0.513, "step": 8790 }, { "epoch": 0.2590044635623057, "grad_norm": 1.62309080744805, "learning_rate": 9.249588111689403e-06, "loss": 0.4637, "step": 8791 }, { "epoch": 0.2590339260197693, "grad_norm": 1.4148827156352506, "learning_rate": 9.249317128346348e-06, "loss": 0.4485, "step": 8792 }, { "epoch": 0.25906338847723287, "grad_norm": 1.9518704453573368, "learning_rate": 9.249046100055314e-06, "loss": 0.5857, "step": 8793 }, { "epoch": 0.2590928509346965, "grad_norm": 1.617004896877772, "learning_rate": 9.248775026819166e-06, "loss": 0.5194, "step": 8794 }, { "epoch": 0.25912231339216, "grad_norm": 1.6080896446656852, "learning_rate": 9.248503908640768e-06, "loss": 0.4602, "step": 8795 }, { "epoch": 0.2591517758496236, "grad_norm": 1.760058086137084, "learning_rate": 9.24823274552299e-06, "loss": 0.5459, "step": 8796 }, { "epoch": 0.25918123830708717, "grad_norm": 1.567032009249296, "learning_rate": 9.247961537468704e-06, "loss": 0.5705, "step": 8797 }, { "epoch": 0.25921070076455077, "grad_norm": 1.469270997599796, "learning_rate": 9.247690284480773e-06, "loss": 0.4739, "step": 8798 }, { "epoch": 0.2592401632220144, "grad_norm": 1.6579552809147216, "learning_rate": 9.247418986562069e-06, "loss": 0.376, "step": 8799 }, { "epoch": 0.2592696256794779, "grad_norm": 1.6270180632970423, "learning_rate": 9.247147643715462e-06, "loss": 0.4857, "step": 8800 }, { "epoch": 0.2592990881369415, "grad_norm": 1.484024761432434, "learning_rate": 9.246876255943821e-06, "loss": 0.6104, "step": 8801 }, { "epoch": 0.25932855059440507, "grad_norm": 1.6716747238293979, "learning_rate": 9.246604823250017e-06, "loss": 0.5761, "step": 8802 }, { "epoch": 0.2593580130518687, "grad_norm": 1.4653716359298776, "learning_rate": 9.246333345636922e-06, "loss": 0.453, "step": 8803 }, { "epoch": 0.2593874755093322, "grad_norm": 1.4509579745746377, "learning_rate": 9.246061823107407e-06, "loss": 0.4359, "step": 8804 }, { "epoch": 0.2594169379667958, "grad_norm": 1.56856828520623, "learning_rate": 9.245790255664344e-06, "loss": 0.513, "step": 8805 }, { "epoch": 0.25944640042425937, "grad_norm": 1.6039405678506695, "learning_rate": 9.245518643310604e-06, "loss": 0.4517, "step": 8806 }, { "epoch": 0.259475862881723, "grad_norm": 1.446131024375765, "learning_rate": 9.245246986049065e-06, "loss": 0.4884, "step": 8807 }, { "epoch": 0.2595053253391865, "grad_norm": 1.615561855254925, "learning_rate": 9.244975283882596e-06, "loss": 0.5104, "step": 8808 }, { "epoch": 0.2595347877966501, "grad_norm": 1.642285542968565, "learning_rate": 9.24470353681407e-06, "loss": 0.4178, "step": 8809 }, { "epoch": 0.25956425025411367, "grad_norm": 1.8757218761060166, "learning_rate": 9.244431744846365e-06, "loss": 0.5689, "step": 8810 }, { "epoch": 0.2595937127115773, "grad_norm": 1.640341878777351, "learning_rate": 9.244159907982355e-06, "loss": 0.5242, "step": 8811 }, { "epoch": 0.2596231751690409, "grad_norm": 1.539871472241484, "learning_rate": 9.243888026224914e-06, "loss": 0.4528, "step": 8812 }, { "epoch": 0.2596526376265044, "grad_norm": 1.6232015650319036, "learning_rate": 9.24361609957692e-06, "loss": 0.6503, "step": 8813 }, { "epoch": 0.259682100083968, "grad_norm": 1.603913197951585, "learning_rate": 9.243344128041248e-06, "loss": 0.5297, "step": 8814 }, { "epoch": 0.2597115625414316, "grad_norm": 1.6107183058708416, "learning_rate": 9.243072111620775e-06, "loss": 0.4269, "step": 8815 }, { "epoch": 0.2597410249988952, "grad_norm": 1.4672135383460823, "learning_rate": 9.242800050318377e-06, "loss": 0.4437, "step": 8816 }, { "epoch": 0.2597704874563587, "grad_norm": 1.452051414936469, "learning_rate": 9.242527944136934e-06, "loss": 0.5158, "step": 8817 }, { "epoch": 0.2597999499138223, "grad_norm": 1.8544787683818262, "learning_rate": 9.242255793079323e-06, "loss": 0.6056, "step": 8818 }, { "epoch": 0.2598294123712859, "grad_norm": 1.5538386102841968, "learning_rate": 9.241983597148423e-06, "loss": 0.4001, "step": 8819 }, { "epoch": 0.2598588748287495, "grad_norm": 1.4905146148784232, "learning_rate": 9.241711356347115e-06, "loss": 0.4767, "step": 8820 }, { "epoch": 0.259888337286213, "grad_norm": 1.7624851524160543, "learning_rate": 9.241439070678275e-06, "loss": 0.4369, "step": 8821 }, { "epoch": 0.2599177997436766, "grad_norm": 1.6376311684342857, "learning_rate": 9.241166740144786e-06, "loss": 0.4634, "step": 8822 }, { "epoch": 0.2599472622011402, "grad_norm": 1.7228323476797638, "learning_rate": 9.240894364749528e-06, "loss": 0.4722, "step": 8823 }, { "epoch": 0.2599767246586038, "grad_norm": 1.7461796066054467, "learning_rate": 9.24062194449538e-06, "loss": 0.4857, "step": 8824 }, { "epoch": 0.2600061871160674, "grad_norm": 1.6463424408363871, "learning_rate": 9.24034947938523e-06, "loss": 0.6245, "step": 8825 }, { "epoch": 0.2600356495735309, "grad_norm": 1.778443242208855, "learning_rate": 9.24007696942195e-06, "loss": 0.6347, "step": 8826 }, { "epoch": 0.2600651120309945, "grad_norm": 1.432503976105953, "learning_rate": 9.239804414608432e-06, "loss": 0.5097, "step": 8827 }, { "epoch": 0.2600945744884581, "grad_norm": 1.5538789164671973, "learning_rate": 9.239531814947553e-06, "loss": 0.5724, "step": 8828 }, { "epoch": 0.2601240369459217, "grad_norm": 1.576211377936848, "learning_rate": 9.239259170442198e-06, "loss": 0.5104, "step": 8829 }, { "epoch": 0.2601534994033852, "grad_norm": 1.476862548248019, "learning_rate": 9.238986481095253e-06, "loss": 0.5026, "step": 8830 }, { "epoch": 0.2601829618608488, "grad_norm": 1.5009910093824028, "learning_rate": 9.2387137469096e-06, "loss": 0.43, "step": 8831 }, { "epoch": 0.2602124243183124, "grad_norm": 1.744913084067811, "learning_rate": 9.238440967888125e-06, "loss": 0.4365, "step": 8832 }, { "epoch": 0.260241886775776, "grad_norm": 1.5389158954690965, "learning_rate": 9.238168144033714e-06, "loss": 0.5402, "step": 8833 }, { "epoch": 0.2602713492332395, "grad_norm": 1.683775969666612, "learning_rate": 9.237895275349252e-06, "loss": 0.3251, "step": 8834 }, { "epoch": 0.2603008116907031, "grad_norm": 1.553911516594926, "learning_rate": 9.237622361837625e-06, "loss": 0.5796, "step": 8835 }, { "epoch": 0.2603302741481667, "grad_norm": 1.5146938200632492, "learning_rate": 9.237349403501718e-06, "loss": 0.4444, "step": 8836 }, { "epoch": 0.2603597366056303, "grad_norm": 1.8422578825612779, "learning_rate": 9.237076400344423e-06, "loss": 0.5239, "step": 8837 }, { "epoch": 0.2603891990630939, "grad_norm": 1.5392762443387151, "learning_rate": 9.236803352368622e-06, "loss": 0.5501, "step": 8838 }, { "epoch": 0.2604186615205574, "grad_norm": 1.4297734385811185, "learning_rate": 9.23653025957721e-06, "loss": 0.4172, "step": 8839 }, { "epoch": 0.26044812397802103, "grad_norm": 1.5147654628804204, "learning_rate": 9.23625712197307e-06, "loss": 0.5231, "step": 8840 }, { "epoch": 0.2604775864354846, "grad_norm": 1.4407453826486396, "learning_rate": 9.235983939559094e-06, "loss": 0.4492, "step": 8841 }, { "epoch": 0.2605070488929482, "grad_norm": 1.448572346633708, "learning_rate": 9.23571071233817e-06, "loss": 0.4566, "step": 8842 }, { "epoch": 0.2605365113504117, "grad_norm": 1.5462763694011996, "learning_rate": 9.23543744031319e-06, "loss": 0.5379, "step": 8843 }, { "epoch": 0.26056597380787533, "grad_norm": 1.4914206158399737, "learning_rate": 9.235164123487044e-06, "loss": 0.4928, "step": 8844 }, { "epoch": 0.2605954362653389, "grad_norm": 1.5129646528870664, "learning_rate": 9.23489076186262e-06, "loss": 0.4821, "step": 8845 }, { "epoch": 0.2606248987228025, "grad_norm": 1.5944578412615942, "learning_rate": 9.234617355442814e-06, "loss": 0.5858, "step": 8846 }, { "epoch": 0.260654361180266, "grad_norm": 1.8564832581378854, "learning_rate": 9.234343904230516e-06, "loss": 0.4671, "step": 8847 }, { "epoch": 0.26068382363772963, "grad_norm": 1.3463531851949588, "learning_rate": 9.23407040822862e-06, "loss": 0.419, "step": 8848 }, { "epoch": 0.2607132860951932, "grad_norm": 1.5674791239935773, "learning_rate": 9.233796867440014e-06, "loss": 0.4336, "step": 8849 }, { "epoch": 0.2607427485526568, "grad_norm": 1.8132188695966072, "learning_rate": 9.233523281867598e-06, "loss": 0.5429, "step": 8850 }, { "epoch": 0.2607722110101204, "grad_norm": 1.6021769251203475, "learning_rate": 9.233249651514262e-06, "loss": 0.4966, "step": 8851 }, { "epoch": 0.2608016734675839, "grad_norm": 1.479868850433081, "learning_rate": 9.232975976382903e-06, "loss": 0.5184, "step": 8852 }, { "epoch": 0.26083113592504753, "grad_norm": 1.6949027190447852, "learning_rate": 9.23270225647641e-06, "loss": 0.468, "step": 8853 }, { "epoch": 0.2608605983825111, "grad_norm": 1.611506953661677, "learning_rate": 9.232428491797687e-06, "loss": 0.4545, "step": 8854 }, { "epoch": 0.2608900608399747, "grad_norm": 1.4960892439473021, "learning_rate": 9.232154682349624e-06, "loss": 0.5673, "step": 8855 }, { "epoch": 0.2609195232974382, "grad_norm": 1.5727550322047534, "learning_rate": 9.231880828135118e-06, "loss": 0.4576, "step": 8856 }, { "epoch": 0.26094898575490183, "grad_norm": 1.7946277945721365, "learning_rate": 9.231606929157067e-06, "loss": 0.6409, "step": 8857 }, { "epoch": 0.2609784482123654, "grad_norm": 1.791912887141729, "learning_rate": 9.231332985418366e-06, "loss": 0.4916, "step": 8858 }, { "epoch": 0.261007910669829, "grad_norm": 1.577157980142509, "learning_rate": 9.231058996921916e-06, "loss": 0.4548, "step": 8859 }, { "epoch": 0.2610373731272925, "grad_norm": 1.54805558341131, "learning_rate": 9.230784963670612e-06, "loss": 0.5362, "step": 8860 }, { "epoch": 0.26106683558475613, "grad_norm": 1.7796324027540658, "learning_rate": 9.230510885667355e-06, "loss": 0.5658, "step": 8861 }, { "epoch": 0.2610962980422197, "grad_norm": 1.704554265528681, "learning_rate": 9.23023676291504e-06, "loss": 0.5376, "step": 8862 }, { "epoch": 0.2611257604996833, "grad_norm": 1.487773315731938, "learning_rate": 9.229962595416575e-06, "loss": 0.4553, "step": 8863 }, { "epoch": 0.2611552229571469, "grad_norm": 1.6789221316850846, "learning_rate": 9.22968838317485e-06, "loss": 0.5175, "step": 8864 }, { "epoch": 0.26118468541461043, "grad_norm": 1.5085716495708585, "learning_rate": 9.229414126192773e-06, "loss": 0.3867, "step": 8865 }, { "epoch": 0.26121414787207403, "grad_norm": 1.7740476763479571, "learning_rate": 9.229139824473242e-06, "loss": 0.6332, "step": 8866 }, { "epoch": 0.2612436103295376, "grad_norm": 2.072428204949089, "learning_rate": 9.22886547801916e-06, "loss": 0.7005, "step": 8867 }, { "epoch": 0.2612730727870012, "grad_norm": 1.63149699617431, "learning_rate": 9.228591086833425e-06, "loss": 0.448, "step": 8868 }, { "epoch": 0.26130253524446473, "grad_norm": 1.666072729412183, "learning_rate": 9.228316650918943e-06, "loss": 0.6103, "step": 8869 }, { "epoch": 0.26133199770192833, "grad_norm": 1.9060905943980357, "learning_rate": 9.228042170278617e-06, "loss": 0.6644, "step": 8870 }, { "epoch": 0.2613614601593919, "grad_norm": 1.6908684334856585, "learning_rate": 9.227767644915348e-06, "loss": 0.5209, "step": 8871 }, { "epoch": 0.2613909226168555, "grad_norm": 1.5659225926982865, "learning_rate": 9.227493074832044e-06, "loss": 0.4962, "step": 8872 }, { "epoch": 0.26142038507431903, "grad_norm": 1.4687935438058186, "learning_rate": 9.227218460031604e-06, "loss": 0.4861, "step": 8873 }, { "epoch": 0.26144984753178263, "grad_norm": 1.8085584637931054, "learning_rate": 9.226943800516936e-06, "loss": 0.5471, "step": 8874 }, { "epoch": 0.2614793099892462, "grad_norm": 1.4713936462393455, "learning_rate": 9.226669096290947e-06, "loss": 0.5235, "step": 8875 }, { "epoch": 0.2615087724467098, "grad_norm": 1.497248275795905, "learning_rate": 9.226394347356538e-06, "loss": 0.3538, "step": 8876 }, { "epoch": 0.2615382349041734, "grad_norm": 1.522959279153954, "learning_rate": 9.226119553716618e-06, "loss": 0.5254, "step": 8877 }, { "epoch": 0.26156769736163693, "grad_norm": 1.6043297968962553, "learning_rate": 9.225844715374094e-06, "loss": 0.4025, "step": 8878 }, { "epoch": 0.26159715981910053, "grad_norm": 1.5729588458618748, "learning_rate": 9.225569832331871e-06, "loss": 0.5114, "step": 8879 }, { "epoch": 0.2616266222765641, "grad_norm": 1.540933014709698, "learning_rate": 9.22529490459286e-06, "loss": 0.4042, "step": 8880 }, { "epoch": 0.2616560847340277, "grad_norm": 1.7017602726632872, "learning_rate": 9.225019932159966e-06, "loss": 0.6219, "step": 8881 }, { "epoch": 0.26168554719149123, "grad_norm": 1.5860427776200692, "learning_rate": 9.224744915036098e-06, "loss": 0.4794, "step": 8882 }, { "epoch": 0.26171500964895483, "grad_norm": 1.46764946631592, "learning_rate": 9.224469853224167e-06, "loss": 0.41, "step": 8883 }, { "epoch": 0.2617444721064184, "grad_norm": 1.5004365684006102, "learning_rate": 9.224194746727083e-06, "loss": 0.4833, "step": 8884 }, { "epoch": 0.261773934563882, "grad_norm": 1.5661407151173854, "learning_rate": 9.223919595547752e-06, "loss": 0.446, "step": 8885 }, { "epoch": 0.26180339702134553, "grad_norm": 1.830627183991903, "learning_rate": 9.223644399689087e-06, "loss": 0.531, "step": 8886 }, { "epoch": 0.26183285947880913, "grad_norm": 1.8039797114765717, "learning_rate": 9.223369159153998e-06, "loss": 0.6179, "step": 8887 }, { "epoch": 0.2618623219362727, "grad_norm": 1.5584274411358334, "learning_rate": 9.223093873945399e-06, "loss": 0.4798, "step": 8888 }, { "epoch": 0.2618917843937363, "grad_norm": 1.7663292486299187, "learning_rate": 9.2228185440662e-06, "loss": 0.4881, "step": 8889 }, { "epoch": 0.2619212468511999, "grad_norm": 1.6439436389182498, "learning_rate": 9.222543169519312e-06, "loss": 0.5746, "step": 8890 }, { "epoch": 0.26195070930866343, "grad_norm": 1.4832154544048228, "learning_rate": 9.222267750307649e-06, "loss": 0.5456, "step": 8891 }, { "epoch": 0.26198017176612703, "grad_norm": 1.5784318192779063, "learning_rate": 9.221992286434126e-06, "loss": 0.557, "step": 8892 }, { "epoch": 0.2620096342235906, "grad_norm": 1.5862984953822485, "learning_rate": 9.221716777901654e-06, "loss": 0.513, "step": 8893 }, { "epoch": 0.2620390966810542, "grad_norm": 1.4520458613896567, "learning_rate": 9.22144122471315e-06, "loss": 0.506, "step": 8894 }, { "epoch": 0.26206855913851773, "grad_norm": 1.6121361189461123, "learning_rate": 9.221165626871527e-06, "loss": 0.5469, "step": 8895 }, { "epoch": 0.26209802159598133, "grad_norm": 1.6397814760258131, "learning_rate": 9.220889984379699e-06, "loss": 0.4823, "step": 8896 }, { "epoch": 0.2621274840534449, "grad_norm": 1.4385683931550046, "learning_rate": 9.220614297240584e-06, "loss": 0.5344, "step": 8897 }, { "epoch": 0.2621569465109085, "grad_norm": 1.5679087122931976, "learning_rate": 9.220338565457096e-06, "loss": 0.607, "step": 8898 }, { "epoch": 0.26218640896837203, "grad_norm": 1.638227175006115, "learning_rate": 9.220062789032156e-06, "loss": 0.3971, "step": 8899 }, { "epoch": 0.26221587142583563, "grad_norm": 1.4864166329627821, "learning_rate": 9.219786967968675e-06, "loss": 0.543, "step": 8900 }, { "epoch": 0.2622453338832992, "grad_norm": 1.5169807043344836, "learning_rate": 9.219511102269574e-06, "loss": 0.3569, "step": 8901 }, { "epoch": 0.2622747963407628, "grad_norm": 1.7072256722629344, "learning_rate": 9.219235191937771e-06, "loss": 0.5905, "step": 8902 }, { "epoch": 0.2623042587982264, "grad_norm": 1.446251137641611, "learning_rate": 9.218959236976185e-06, "loss": 0.3385, "step": 8903 }, { "epoch": 0.26233372125568993, "grad_norm": 1.4637752144292369, "learning_rate": 9.218683237387731e-06, "loss": 0.4708, "step": 8904 }, { "epoch": 0.26236318371315354, "grad_norm": 1.7932675097555173, "learning_rate": 9.218407193175336e-06, "loss": 0.5992, "step": 8905 }, { "epoch": 0.2623926461706171, "grad_norm": 1.5541034565386782, "learning_rate": 9.218131104341912e-06, "loss": 0.453, "step": 8906 }, { "epoch": 0.2624221086280807, "grad_norm": 1.7520294536334629, "learning_rate": 9.217854970890383e-06, "loss": 0.3817, "step": 8907 }, { "epoch": 0.26245157108554423, "grad_norm": 1.586522687166222, "learning_rate": 9.21757879282367e-06, "loss": 0.4883, "step": 8908 }, { "epoch": 0.26248103354300784, "grad_norm": 1.4887005699385876, "learning_rate": 9.217302570144693e-06, "loss": 0.386, "step": 8909 }, { "epoch": 0.2625104960004714, "grad_norm": 1.5615607819447277, "learning_rate": 9.217026302856376e-06, "loss": 0.5116, "step": 8910 }, { "epoch": 0.262539958457935, "grad_norm": 1.7236779433819078, "learning_rate": 9.21674999096164e-06, "loss": 0.4049, "step": 8911 }, { "epoch": 0.26256942091539853, "grad_norm": 1.5524691263445256, "learning_rate": 9.216473634463407e-06, "loss": 0.5247, "step": 8912 }, { "epoch": 0.26259888337286214, "grad_norm": 1.7475627606053281, "learning_rate": 9.2161972333646e-06, "loss": 0.6763, "step": 8913 }, { "epoch": 0.2626283458303257, "grad_norm": 1.6105455221876197, "learning_rate": 9.215920787668144e-06, "loss": 0.5109, "step": 8914 }, { "epoch": 0.2626578082877893, "grad_norm": 1.6008817977695744, "learning_rate": 9.215644297376965e-06, "loss": 0.5245, "step": 8915 }, { "epoch": 0.2626872707452529, "grad_norm": 1.7961518742168334, "learning_rate": 9.215367762493983e-06, "loss": 0.584, "step": 8916 }, { "epoch": 0.26271673320271643, "grad_norm": 1.7502671744481, "learning_rate": 9.215091183022127e-06, "loss": 0.5688, "step": 8917 }, { "epoch": 0.26274619566018004, "grad_norm": 1.7047436138349523, "learning_rate": 9.214814558964318e-06, "loss": 0.609, "step": 8918 }, { "epoch": 0.2627756581176436, "grad_norm": 1.5861399850660902, "learning_rate": 9.214537890323489e-06, "loss": 0.5478, "step": 8919 }, { "epoch": 0.2628051205751072, "grad_norm": 1.7888257270057024, "learning_rate": 9.21426117710256e-06, "loss": 0.6343, "step": 8920 }, { "epoch": 0.26283458303257073, "grad_norm": 1.5271634064920585, "learning_rate": 9.213984419304463e-06, "loss": 0.5565, "step": 8921 }, { "epoch": 0.26286404549003434, "grad_norm": 1.5947246567244866, "learning_rate": 9.213707616932121e-06, "loss": 0.5167, "step": 8922 }, { "epoch": 0.2628935079474979, "grad_norm": 1.556129762130722, "learning_rate": 9.213430769988464e-06, "loss": 0.4724, "step": 8923 }, { "epoch": 0.2629229704049615, "grad_norm": 1.694366910644868, "learning_rate": 9.213153878476421e-06, "loss": 0.5124, "step": 8924 }, { "epoch": 0.26295243286242503, "grad_norm": 1.4523426648514883, "learning_rate": 9.21287694239892e-06, "loss": 0.3577, "step": 8925 }, { "epoch": 0.26298189531988864, "grad_norm": 1.6219630824581757, "learning_rate": 9.21259996175889e-06, "loss": 0.5603, "step": 8926 }, { "epoch": 0.2630113577773522, "grad_norm": 1.6973269810486684, "learning_rate": 9.212322936559263e-06, "loss": 0.5742, "step": 8927 }, { "epoch": 0.2630408202348158, "grad_norm": 1.568312499305174, "learning_rate": 9.212045866802968e-06, "loss": 0.4914, "step": 8928 }, { "epoch": 0.2630702826922794, "grad_norm": 1.5699271517141957, "learning_rate": 9.211768752492935e-06, "loss": 0.3924, "step": 8929 }, { "epoch": 0.26309974514974294, "grad_norm": 1.660900984014819, "learning_rate": 9.211491593632094e-06, "loss": 0.5931, "step": 8930 }, { "epoch": 0.26312920760720654, "grad_norm": 1.7942782511213384, "learning_rate": 9.21121439022338e-06, "loss": 0.5672, "step": 8931 }, { "epoch": 0.2631586700646701, "grad_norm": 1.4586221064136333, "learning_rate": 9.210937142269724e-06, "loss": 0.4891, "step": 8932 }, { "epoch": 0.2631881325221337, "grad_norm": 1.6488217690561435, "learning_rate": 9.210659849774057e-06, "loss": 0.5202, "step": 8933 }, { "epoch": 0.26321759497959724, "grad_norm": 1.7160532431278128, "learning_rate": 9.210382512739312e-06, "loss": 0.6238, "step": 8934 }, { "epoch": 0.26324705743706084, "grad_norm": 1.651074633739156, "learning_rate": 9.210105131168428e-06, "loss": 0.478, "step": 8935 }, { "epoch": 0.2632765198945244, "grad_norm": 1.5248534728767622, "learning_rate": 9.209827705064333e-06, "loss": 0.465, "step": 8936 }, { "epoch": 0.263305982351988, "grad_norm": 1.3880681376658153, "learning_rate": 9.209550234429962e-06, "loss": 0.4529, "step": 8937 }, { "epoch": 0.26333544480945154, "grad_norm": 1.6681591052673963, "learning_rate": 9.209272719268254e-06, "loss": 0.5167, "step": 8938 }, { "epoch": 0.26336490726691514, "grad_norm": 1.4590605262165484, "learning_rate": 9.20899515958214e-06, "loss": 0.4587, "step": 8939 }, { "epoch": 0.2633943697243787, "grad_norm": 1.5965287435935058, "learning_rate": 9.20871755537456e-06, "loss": 0.5107, "step": 8940 }, { "epoch": 0.2634238321818423, "grad_norm": 1.747450946361797, "learning_rate": 9.208439906648447e-06, "loss": 0.5152, "step": 8941 }, { "epoch": 0.2634532946393059, "grad_norm": 1.546707239106013, "learning_rate": 9.20816221340674e-06, "loss": 0.3377, "step": 8942 }, { "epoch": 0.26348275709676944, "grad_norm": 1.5113572206391725, "learning_rate": 9.207884475652373e-06, "loss": 0.4709, "step": 8943 }, { "epoch": 0.26351221955423304, "grad_norm": 1.6291634618645456, "learning_rate": 9.207606693388289e-06, "loss": 0.471, "step": 8944 }, { "epoch": 0.2635416820116966, "grad_norm": 1.524165671939151, "learning_rate": 9.207328866617424e-06, "loss": 0.5341, "step": 8945 }, { "epoch": 0.2635711444691602, "grad_norm": 1.5217536143060726, "learning_rate": 9.207050995342714e-06, "loss": 0.5035, "step": 8946 }, { "epoch": 0.26360060692662374, "grad_norm": 1.532762760866321, "learning_rate": 9.206773079567103e-06, "loss": 0.59, "step": 8947 }, { "epoch": 0.26363006938408734, "grad_norm": 1.417751847715374, "learning_rate": 9.206495119293527e-06, "loss": 0.3507, "step": 8948 }, { "epoch": 0.2636595318415509, "grad_norm": 1.5871692859815631, "learning_rate": 9.20621711452493e-06, "loss": 0.4569, "step": 8949 }, { "epoch": 0.2636889942990145, "grad_norm": 1.6058091866865973, "learning_rate": 9.205939065264247e-06, "loss": 0.4663, "step": 8950 }, { "epoch": 0.26371845675647804, "grad_norm": 1.678529866663454, "learning_rate": 9.205660971514423e-06, "loss": 0.4409, "step": 8951 }, { "epoch": 0.26374791921394164, "grad_norm": 1.6407109012923597, "learning_rate": 9.2053828332784e-06, "loss": 0.4541, "step": 8952 }, { "epoch": 0.2637773816714052, "grad_norm": 1.4731341266824056, "learning_rate": 9.20510465055912e-06, "loss": 0.3713, "step": 8953 }, { "epoch": 0.2638068441288688, "grad_norm": 1.6705980073796651, "learning_rate": 9.204826423359523e-06, "loss": 0.4061, "step": 8954 }, { "epoch": 0.2638363065863324, "grad_norm": 1.6795653904946932, "learning_rate": 9.204548151682555e-06, "loss": 0.4104, "step": 8955 }, { "epoch": 0.26386576904379594, "grad_norm": 1.720813197151284, "learning_rate": 9.204269835531156e-06, "loss": 0.4994, "step": 8956 }, { "epoch": 0.26389523150125954, "grad_norm": 1.5366686225144341, "learning_rate": 9.203991474908274e-06, "loss": 0.4683, "step": 8957 }, { "epoch": 0.2639246939587231, "grad_norm": 1.6442109837243817, "learning_rate": 9.203713069816848e-06, "loss": 0.6008, "step": 8958 }, { "epoch": 0.2639541564161867, "grad_norm": 1.5012045263000335, "learning_rate": 9.20343462025983e-06, "loss": 0.5337, "step": 8959 }, { "epoch": 0.26398361887365024, "grad_norm": 1.5981322942429599, "learning_rate": 9.20315612624016e-06, "loss": 0.5186, "step": 8960 }, { "epoch": 0.26401308133111384, "grad_norm": 1.8440026306751367, "learning_rate": 9.202877587760785e-06, "loss": 0.5429, "step": 8961 }, { "epoch": 0.2640425437885774, "grad_norm": 1.3927526895932434, "learning_rate": 9.202599004824653e-06, "loss": 0.3878, "step": 8962 }, { "epoch": 0.264072006246041, "grad_norm": 1.5733499936466613, "learning_rate": 9.20232037743471e-06, "loss": 0.5844, "step": 8963 }, { "epoch": 0.26410146870350454, "grad_norm": 1.7976164408384385, "learning_rate": 9.2020417055939e-06, "loss": 0.6326, "step": 8964 }, { "epoch": 0.26413093116096814, "grad_norm": 1.4559795324259872, "learning_rate": 9.201762989305177e-06, "loss": 0.4436, "step": 8965 }, { "epoch": 0.2641603936184317, "grad_norm": 1.7955139295618983, "learning_rate": 9.201484228571483e-06, "loss": 0.5128, "step": 8966 }, { "epoch": 0.2641898560758953, "grad_norm": 1.8518734879752106, "learning_rate": 9.20120542339577e-06, "loss": 0.5109, "step": 8967 }, { "epoch": 0.2642193185333589, "grad_norm": 1.4657714027151072, "learning_rate": 9.200926573780986e-06, "loss": 0.4858, "step": 8968 }, { "epoch": 0.26424878099082244, "grad_norm": 1.4233010673481743, "learning_rate": 9.200647679730081e-06, "loss": 0.3479, "step": 8969 }, { "epoch": 0.26427824344828604, "grad_norm": 1.368788729660028, "learning_rate": 9.200368741246006e-06, "loss": 0.3293, "step": 8970 }, { "epoch": 0.2643077059057496, "grad_norm": 1.676640331405768, "learning_rate": 9.20008975833171e-06, "loss": 0.5262, "step": 8971 }, { "epoch": 0.2643371683632132, "grad_norm": 1.53982167997332, "learning_rate": 9.199810730990144e-06, "loss": 0.3704, "step": 8972 }, { "epoch": 0.26436663082067674, "grad_norm": 1.6056261713653952, "learning_rate": 9.19953165922426e-06, "loss": 0.4575, "step": 8973 }, { "epoch": 0.26439609327814034, "grad_norm": 1.7322318383638207, "learning_rate": 9.199252543037012e-06, "loss": 0.5273, "step": 8974 }, { "epoch": 0.2644255557356039, "grad_norm": 1.5088127880408833, "learning_rate": 9.19897338243135e-06, "loss": 0.5719, "step": 8975 }, { "epoch": 0.2644550181930675, "grad_norm": 1.5895174144861512, "learning_rate": 9.198694177410225e-06, "loss": 0.5556, "step": 8976 }, { "epoch": 0.26448448065053104, "grad_norm": 1.5890907813780888, "learning_rate": 9.198414927976593e-06, "loss": 0.6697, "step": 8977 }, { "epoch": 0.26451394310799464, "grad_norm": 1.6097154261848015, "learning_rate": 9.198135634133408e-06, "loss": 0.5017, "step": 8978 }, { "epoch": 0.2645434055654582, "grad_norm": 1.5769643567393399, "learning_rate": 9.197856295883625e-06, "loss": 0.3615, "step": 8979 }, { "epoch": 0.2645728680229218, "grad_norm": 1.5613037696473946, "learning_rate": 9.197576913230196e-06, "loss": 0.5363, "step": 8980 }, { "epoch": 0.2646023304803854, "grad_norm": 1.5448119916989913, "learning_rate": 9.197297486176077e-06, "loss": 0.5243, "step": 8981 }, { "epoch": 0.26463179293784894, "grad_norm": 1.5469611219358832, "learning_rate": 9.197018014724226e-06, "loss": 0.4209, "step": 8982 }, { "epoch": 0.26466125539531254, "grad_norm": 1.4219768480005741, "learning_rate": 9.196738498877597e-06, "loss": 0.3895, "step": 8983 }, { "epoch": 0.2646907178527761, "grad_norm": 1.7053003150609236, "learning_rate": 9.196458938639146e-06, "loss": 0.4759, "step": 8984 }, { "epoch": 0.2647201803102397, "grad_norm": 1.6845818166576714, "learning_rate": 9.196179334011833e-06, "loss": 0.5535, "step": 8985 }, { "epoch": 0.26474964276770324, "grad_norm": 1.3694835543502115, "learning_rate": 9.195899684998612e-06, "loss": 0.3415, "step": 8986 }, { "epoch": 0.26477910522516684, "grad_norm": 1.6678322268561956, "learning_rate": 9.195619991602444e-06, "loss": 0.523, "step": 8987 }, { "epoch": 0.2648085676826304, "grad_norm": 1.4708408807450686, "learning_rate": 9.195340253826287e-06, "loss": 0.4155, "step": 8988 }, { "epoch": 0.264838030140094, "grad_norm": 1.5684625979289262, "learning_rate": 9.195060471673099e-06, "loss": 0.4169, "step": 8989 }, { "epoch": 0.26486749259755754, "grad_norm": 1.4592414479166462, "learning_rate": 9.19478064514584e-06, "loss": 0.3548, "step": 8990 }, { "epoch": 0.26489695505502114, "grad_norm": 1.6858677066036676, "learning_rate": 9.194500774247469e-06, "loss": 0.5081, "step": 8991 }, { "epoch": 0.2649264175124847, "grad_norm": 1.3815289965995727, "learning_rate": 9.194220858980947e-06, "loss": 0.3672, "step": 8992 }, { "epoch": 0.2649558799699483, "grad_norm": 1.5601405222907776, "learning_rate": 9.193940899349235e-06, "loss": 0.5409, "step": 8993 }, { "epoch": 0.2649853424274119, "grad_norm": 1.8333790176865064, "learning_rate": 9.193660895355295e-06, "loss": 0.4849, "step": 8994 }, { "epoch": 0.26501480488487544, "grad_norm": 1.6297443058063783, "learning_rate": 9.193380847002088e-06, "loss": 0.6568, "step": 8995 }, { "epoch": 0.26504426734233905, "grad_norm": 1.458527344799091, "learning_rate": 9.193100754292575e-06, "loss": 0.4808, "step": 8996 }, { "epoch": 0.2650737297998026, "grad_norm": 1.6626842502403967, "learning_rate": 9.192820617229721e-06, "loss": 0.4514, "step": 8997 }, { "epoch": 0.2651031922572662, "grad_norm": 1.5050255534603783, "learning_rate": 9.19254043581649e-06, "loss": 0.4135, "step": 8998 }, { "epoch": 0.26513265471472974, "grad_norm": 1.5098886307444208, "learning_rate": 9.192260210055842e-06, "loss": 0.4135, "step": 8999 }, { "epoch": 0.26516211717219335, "grad_norm": 1.4997134053657017, "learning_rate": 9.191979939950744e-06, "loss": 0.5281, "step": 9000 }, { "epoch": 0.2651915796296569, "grad_norm": 1.6124491026360617, "learning_rate": 9.191699625504158e-06, "loss": 0.482, "step": 9001 }, { "epoch": 0.2652210420871205, "grad_norm": 1.6772901036534837, "learning_rate": 9.191419266719052e-06, "loss": 0.5279, "step": 9002 }, { "epoch": 0.26525050454458404, "grad_norm": 1.6307338196058816, "learning_rate": 9.191138863598392e-06, "loss": 0.451, "step": 9003 }, { "epoch": 0.26527996700204765, "grad_norm": 1.5704789484080082, "learning_rate": 9.190858416145141e-06, "loss": 0.5161, "step": 9004 }, { "epoch": 0.2653094294595112, "grad_norm": 1.527982180293935, "learning_rate": 9.190577924362268e-06, "loss": 0.4226, "step": 9005 }, { "epoch": 0.2653388919169748, "grad_norm": 1.4592774353309226, "learning_rate": 9.190297388252736e-06, "loss": 0.4022, "step": 9006 }, { "epoch": 0.2653683543744384, "grad_norm": 1.9149917424274, "learning_rate": 9.190016807819517e-06, "loss": 0.6121, "step": 9007 }, { "epoch": 0.26539781683190194, "grad_norm": 1.6290892461739637, "learning_rate": 9.189736183065578e-06, "loss": 0.4393, "step": 9008 }, { "epoch": 0.26542727928936555, "grad_norm": 1.4695077100543117, "learning_rate": 9.189455513993886e-06, "loss": 0.3982, "step": 9009 }, { "epoch": 0.2654567417468291, "grad_norm": 1.6180916399461336, "learning_rate": 9.189174800607409e-06, "loss": 0.5475, "step": 9010 }, { "epoch": 0.2654862042042927, "grad_norm": 1.5647417023125492, "learning_rate": 9.188894042909118e-06, "loss": 0.5103, "step": 9011 }, { "epoch": 0.26551566666175624, "grad_norm": 1.5188866992497247, "learning_rate": 9.188613240901985e-06, "loss": 0.4655, "step": 9012 }, { "epoch": 0.26554512911921985, "grad_norm": 1.5145481111132009, "learning_rate": 9.188332394588974e-06, "loss": 0.4706, "step": 9013 }, { "epoch": 0.2655745915766834, "grad_norm": 1.5506484712872246, "learning_rate": 9.18805150397306e-06, "loss": 0.4689, "step": 9014 }, { "epoch": 0.265604054034147, "grad_norm": 1.5352046248041296, "learning_rate": 9.187770569057217e-06, "loss": 0.5899, "step": 9015 }, { "epoch": 0.26563351649161054, "grad_norm": 1.5451820284521574, "learning_rate": 9.18748958984441e-06, "loss": 0.5436, "step": 9016 }, { "epoch": 0.26566297894907415, "grad_norm": 1.3741008323087998, "learning_rate": 9.187208566337615e-06, "loss": 0.4278, "step": 9017 }, { "epoch": 0.2656924414065377, "grad_norm": 1.6386239732148768, "learning_rate": 9.186927498539804e-06, "loss": 0.5771, "step": 9018 }, { "epoch": 0.2657219038640013, "grad_norm": 1.4940085876767673, "learning_rate": 9.18664638645395e-06, "loss": 0.4975, "step": 9019 }, { "epoch": 0.2657513663214649, "grad_norm": 1.4850683106722717, "learning_rate": 9.186365230083027e-06, "loss": 0.4721, "step": 9020 }, { "epoch": 0.26578082877892845, "grad_norm": 1.8138723515209094, "learning_rate": 9.186084029430007e-06, "loss": 0.5551, "step": 9021 }, { "epoch": 0.26581029123639205, "grad_norm": 1.504879271851693, "learning_rate": 9.185802784497867e-06, "loss": 0.4185, "step": 9022 }, { "epoch": 0.2658397536938556, "grad_norm": 1.6039306543095244, "learning_rate": 9.18552149528958e-06, "loss": 0.4817, "step": 9023 }, { "epoch": 0.2658692161513192, "grad_norm": 1.5780480176584133, "learning_rate": 9.185240161808124e-06, "loss": 0.4955, "step": 9024 }, { "epoch": 0.26589867860878275, "grad_norm": 1.5237467676239889, "learning_rate": 9.18495878405647e-06, "loss": 0.4718, "step": 9025 }, { "epoch": 0.26592814106624635, "grad_norm": 1.5381018484480524, "learning_rate": 9.1846773620376e-06, "loss": 0.4189, "step": 9026 }, { "epoch": 0.2659576035237099, "grad_norm": 1.6300271758396814, "learning_rate": 9.184395895754488e-06, "loss": 0.4776, "step": 9027 }, { "epoch": 0.2659870659811735, "grad_norm": 1.547715627989722, "learning_rate": 9.18411438521011e-06, "loss": 0.5798, "step": 9028 }, { "epoch": 0.26601652843863705, "grad_norm": 1.7222341070577523, "learning_rate": 9.183832830407446e-06, "loss": 0.5902, "step": 9029 }, { "epoch": 0.26604599089610065, "grad_norm": 1.5396344308292893, "learning_rate": 9.183551231349474e-06, "loss": 0.5304, "step": 9030 }, { "epoch": 0.2660754533535642, "grad_norm": 1.2378431472431832, "learning_rate": 9.18326958803917e-06, "loss": 0.362, "step": 9031 }, { "epoch": 0.2661049158110278, "grad_norm": 1.5497357949407025, "learning_rate": 9.182987900479518e-06, "loss": 0.3932, "step": 9032 }, { "epoch": 0.2661343782684914, "grad_norm": 1.678790734942936, "learning_rate": 9.182706168673492e-06, "loss": 0.6424, "step": 9033 }, { "epoch": 0.26616384072595495, "grad_norm": 1.6105604839766618, "learning_rate": 9.182424392624077e-06, "loss": 0.4196, "step": 9034 }, { "epoch": 0.26619330318341855, "grad_norm": 1.6832864476230618, "learning_rate": 9.18214257233425e-06, "loss": 0.4422, "step": 9035 }, { "epoch": 0.2662227656408821, "grad_norm": 1.6296896079111185, "learning_rate": 9.181860707806994e-06, "loss": 0.6512, "step": 9036 }, { "epoch": 0.2662522280983457, "grad_norm": 1.6402741731618737, "learning_rate": 9.18157879904529e-06, "loss": 0.6097, "step": 9037 }, { "epoch": 0.26628169055580925, "grad_norm": 1.5125347091575005, "learning_rate": 9.18129684605212e-06, "loss": 0.5085, "step": 9038 }, { "epoch": 0.26631115301327285, "grad_norm": 1.9279997642722209, "learning_rate": 9.181014848830467e-06, "loss": 0.6604, "step": 9039 }, { "epoch": 0.2663406154707364, "grad_norm": 1.5122106025735047, "learning_rate": 9.180732807383312e-06, "loss": 0.49, "step": 9040 }, { "epoch": 0.2663700779282, "grad_norm": 1.5543271056833123, "learning_rate": 9.18045072171364e-06, "loss": 0.4146, "step": 9041 }, { "epoch": 0.26639954038566355, "grad_norm": 1.5777346222319355, "learning_rate": 9.180168591824434e-06, "loss": 0.5252, "step": 9042 }, { "epoch": 0.26642900284312715, "grad_norm": 1.481819644454871, "learning_rate": 9.179886417718677e-06, "loss": 0.4183, "step": 9043 }, { "epoch": 0.2664584653005907, "grad_norm": 1.6800420106289293, "learning_rate": 9.179604199399359e-06, "loss": 0.603, "step": 9044 }, { "epoch": 0.2664879277580543, "grad_norm": 1.6135295944683876, "learning_rate": 9.17932193686946e-06, "loss": 0.4388, "step": 9045 }, { "epoch": 0.2665173902155179, "grad_norm": 1.5815477096766826, "learning_rate": 9.179039630131965e-06, "loss": 0.4917, "step": 9046 }, { "epoch": 0.26654685267298145, "grad_norm": 1.5485663630050202, "learning_rate": 9.178757279189866e-06, "loss": 0.4137, "step": 9047 }, { "epoch": 0.26657631513044505, "grad_norm": 1.7017653066854068, "learning_rate": 9.178474884046143e-06, "loss": 0.4973, "step": 9048 }, { "epoch": 0.2666057775879086, "grad_norm": 1.8661249242224334, "learning_rate": 9.178192444703786e-06, "loss": 0.5533, "step": 9049 }, { "epoch": 0.2666352400453722, "grad_norm": 1.4746382605982704, "learning_rate": 9.177909961165785e-06, "loss": 0.4547, "step": 9050 }, { "epoch": 0.26666470250283575, "grad_norm": 1.728144924234703, "learning_rate": 9.177627433435123e-06, "loss": 0.6969, "step": 9051 }, { "epoch": 0.26669416496029935, "grad_norm": 1.5264687286963574, "learning_rate": 9.177344861514795e-06, "loss": 0.4274, "step": 9052 }, { "epoch": 0.2667236274177629, "grad_norm": 1.451477963157919, "learning_rate": 9.177062245407783e-06, "loss": 0.4256, "step": 9053 }, { "epoch": 0.2667530898752265, "grad_norm": 1.575365274847276, "learning_rate": 9.176779585117081e-06, "loss": 0.4373, "step": 9054 }, { "epoch": 0.26678255233269005, "grad_norm": 1.3771356019945709, "learning_rate": 9.176496880645676e-06, "loss": 0.432, "step": 9055 }, { "epoch": 0.26681201479015365, "grad_norm": 1.6191985438656444, "learning_rate": 9.176214131996561e-06, "loss": 0.543, "step": 9056 }, { "epoch": 0.2668414772476172, "grad_norm": 1.4016497085122677, "learning_rate": 9.175931339172725e-06, "loss": 0.4666, "step": 9057 }, { "epoch": 0.2668709397050808, "grad_norm": 1.4789412680985727, "learning_rate": 9.175648502177161e-06, "loss": 0.3952, "step": 9058 }, { "epoch": 0.2669004021625444, "grad_norm": 1.5440063930801406, "learning_rate": 9.175365621012859e-06, "loss": 0.4481, "step": 9059 }, { "epoch": 0.26692986462000795, "grad_norm": 1.5626772178479822, "learning_rate": 9.175082695682812e-06, "loss": 0.5429, "step": 9060 }, { "epoch": 0.26695932707747155, "grad_norm": 1.5793785353558698, "learning_rate": 9.174799726190014e-06, "loss": 0.569, "step": 9061 }, { "epoch": 0.2669887895349351, "grad_norm": 1.683872614212988, "learning_rate": 9.174516712537455e-06, "loss": 0.5656, "step": 9062 }, { "epoch": 0.2670182519923987, "grad_norm": 1.6274934501388103, "learning_rate": 9.174233654728131e-06, "loss": 0.4901, "step": 9063 }, { "epoch": 0.26704771444986225, "grad_norm": 1.7403020473252584, "learning_rate": 9.173950552765033e-06, "loss": 0.5426, "step": 9064 }, { "epoch": 0.26707717690732585, "grad_norm": 1.4311644887073232, "learning_rate": 9.173667406651163e-06, "loss": 0.443, "step": 9065 }, { "epoch": 0.2671066393647894, "grad_norm": 1.5194804214950286, "learning_rate": 9.173384216389507e-06, "loss": 0.5566, "step": 9066 }, { "epoch": 0.267136101822253, "grad_norm": 1.4618867847967645, "learning_rate": 9.173100981983067e-06, "loss": 0.4963, "step": 9067 }, { "epoch": 0.26716556427971655, "grad_norm": 1.4352343625244424, "learning_rate": 9.172817703434836e-06, "loss": 0.3607, "step": 9068 }, { "epoch": 0.26719502673718015, "grad_norm": 1.544565101453313, "learning_rate": 9.17253438074781e-06, "loss": 0.5662, "step": 9069 }, { "epoch": 0.2672244891946437, "grad_norm": 1.634457793105362, "learning_rate": 9.172251013924989e-06, "loss": 0.4593, "step": 9070 }, { "epoch": 0.2672539516521073, "grad_norm": 1.5462503688771032, "learning_rate": 9.171967602969368e-06, "loss": 0.3857, "step": 9071 }, { "epoch": 0.2672834141095709, "grad_norm": 1.5455792826690458, "learning_rate": 9.171684147883943e-06, "loss": 0.4543, "step": 9072 }, { "epoch": 0.26731287656703445, "grad_norm": 1.680348552865599, "learning_rate": 9.171400648671717e-06, "loss": 0.6134, "step": 9073 }, { "epoch": 0.26734233902449805, "grad_norm": 1.652008043496443, "learning_rate": 9.171117105335685e-06, "loss": 0.4967, "step": 9074 }, { "epoch": 0.2673718014819616, "grad_norm": 1.3712019352557114, "learning_rate": 9.170833517878848e-06, "loss": 0.4111, "step": 9075 }, { "epoch": 0.2674012639394252, "grad_norm": 1.5979159470887594, "learning_rate": 9.170549886304204e-06, "loss": 0.5175, "step": 9076 }, { "epoch": 0.26743072639688875, "grad_norm": 1.6435356736573634, "learning_rate": 9.170266210614754e-06, "loss": 0.5065, "step": 9077 }, { "epoch": 0.26746018885435235, "grad_norm": 1.7007549790508396, "learning_rate": 9.1699824908135e-06, "loss": 0.4937, "step": 9078 }, { "epoch": 0.2674896513118159, "grad_norm": 1.4828026999173272, "learning_rate": 9.169698726903442e-06, "loss": 0.411, "step": 9079 }, { "epoch": 0.2675191137692795, "grad_norm": 1.672345097383764, "learning_rate": 9.169414918887583e-06, "loss": 0.5769, "step": 9080 }, { "epoch": 0.26754857622674305, "grad_norm": 1.4693956432115443, "learning_rate": 9.169131066768922e-06, "loss": 0.3569, "step": 9081 }, { "epoch": 0.26757803868420665, "grad_norm": 1.5014790847363066, "learning_rate": 9.168847170550463e-06, "loss": 0.4234, "step": 9082 }, { "epoch": 0.2676075011416702, "grad_norm": 1.484262471723984, "learning_rate": 9.16856323023521e-06, "loss": 0.3523, "step": 9083 }, { "epoch": 0.2676369635991338, "grad_norm": 1.6154696276509932, "learning_rate": 9.168279245826165e-06, "loss": 0.5108, "step": 9084 }, { "epoch": 0.2676664260565974, "grad_norm": 1.4824763965589391, "learning_rate": 9.167995217326332e-06, "loss": 0.5383, "step": 9085 }, { "epoch": 0.26769588851406095, "grad_norm": 1.6210353511892632, "learning_rate": 9.167711144738717e-06, "loss": 0.4832, "step": 9086 }, { "epoch": 0.26772535097152456, "grad_norm": 1.6874505020117594, "learning_rate": 9.167427028066322e-06, "loss": 0.5928, "step": 9087 }, { "epoch": 0.2677548134289881, "grad_norm": 1.4902354773643764, "learning_rate": 9.167142867312155e-06, "loss": 0.4217, "step": 9088 }, { "epoch": 0.2677842758864517, "grad_norm": 1.4819894566321778, "learning_rate": 9.16685866247922e-06, "loss": 0.4814, "step": 9089 }, { "epoch": 0.26781373834391525, "grad_norm": 1.4271789506830252, "learning_rate": 9.166574413570527e-06, "loss": 0.4594, "step": 9090 }, { "epoch": 0.26784320080137886, "grad_norm": 1.7518481249460705, "learning_rate": 9.166290120589077e-06, "loss": 0.6763, "step": 9091 }, { "epoch": 0.2678726632588424, "grad_norm": 1.402274874342755, "learning_rate": 9.166005783537879e-06, "loss": 0.4982, "step": 9092 }, { "epoch": 0.267902125716306, "grad_norm": 1.290955661356243, "learning_rate": 9.165721402419943e-06, "loss": 0.4277, "step": 9093 }, { "epoch": 0.26793158817376955, "grad_norm": 1.512832716820004, "learning_rate": 9.165436977238276e-06, "loss": 0.5371, "step": 9094 }, { "epoch": 0.26796105063123316, "grad_norm": 1.6250884969319817, "learning_rate": 9.165152507995884e-06, "loss": 0.5378, "step": 9095 }, { "epoch": 0.2679905130886967, "grad_norm": 1.8153643876913947, "learning_rate": 9.164867994695782e-06, "loss": 0.5824, "step": 9096 }, { "epoch": 0.2680199755461603, "grad_norm": 1.5412719823947942, "learning_rate": 9.164583437340972e-06, "loss": 0.3828, "step": 9097 }, { "epoch": 0.2680494380036239, "grad_norm": 1.5617070859938176, "learning_rate": 9.164298835934467e-06, "loss": 0.6161, "step": 9098 }, { "epoch": 0.26807890046108745, "grad_norm": 1.5097066454892605, "learning_rate": 9.16401419047928e-06, "loss": 0.5372, "step": 9099 }, { "epoch": 0.26810836291855106, "grad_norm": 1.6620465640728983, "learning_rate": 9.16372950097842e-06, "loss": 0.5653, "step": 9100 }, { "epoch": 0.2681378253760146, "grad_norm": 1.8057696682292403, "learning_rate": 9.163444767434898e-06, "loss": 0.4911, "step": 9101 }, { "epoch": 0.2681672878334782, "grad_norm": 1.3771693228116406, "learning_rate": 9.163159989851727e-06, "loss": 0.497, "step": 9102 }, { "epoch": 0.26819675029094175, "grad_norm": 1.4661675414999513, "learning_rate": 9.162875168231917e-06, "loss": 0.2993, "step": 9103 }, { "epoch": 0.26822621274840536, "grad_norm": 1.7232499596724702, "learning_rate": 9.162590302578482e-06, "loss": 0.5858, "step": 9104 }, { "epoch": 0.2682556752058689, "grad_norm": 1.5166179936178354, "learning_rate": 9.162305392894436e-06, "loss": 0.5074, "step": 9105 }, { "epoch": 0.2682851376633325, "grad_norm": 1.6077139361166963, "learning_rate": 9.162020439182793e-06, "loss": 0.5225, "step": 9106 }, { "epoch": 0.26831460012079605, "grad_norm": 1.6186219306527458, "learning_rate": 9.161735441446563e-06, "loss": 0.5375, "step": 9107 }, { "epoch": 0.26834406257825966, "grad_norm": 1.5586431537419896, "learning_rate": 9.161450399688768e-06, "loss": 0.4192, "step": 9108 }, { "epoch": 0.2683735250357232, "grad_norm": 1.4989598390606254, "learning_rate": 9.161165313912417e-06, "loss": 0.474, "step": 9109 }, { "epoch": 0.2684029874931868, "grad_norm": 1.5253668516001557, "learning_rate": 9.160880184120527e-06, "loss": 0.4413, "step": 9110 }, { "epoch": 0.2684324499506504, "grad_norm": 1.5279734629105068, "learning_rate": 9.160595010316114e-06, "loss": 0.497, "step": 9111 }, { "epoch": 0.26846191240811396, "grad_norm": 1.6183870509354927, "learning_rate": 9.160309792502195e-06, "loss": 0.4767, "step": 9112 }, { "epoch": 0.26849137486557756, "grad_norm": 1.6518012964601103, "learning_rate": 9.160024530681789e-06, "loss": 0.5218, "step": 9113 }, { "epoch": 0.2685208373230411, "grad_norm": 1.5267995944185229, "learning_rate": 9.159739224857911e-06, "loss": 0.5659, "step": 9114 }, { "epoch": 0.2685502997805047, "grad_norm": 1.4618305664017563, "learning_rate": 9.159453875033577e-06, "loss": 0.3886, "step": 9115 }, { "epoch": 0.26857976223796826, "grad_norm": 1.3935230892046349, "learning_rate": 9.159168481211809e-06, "loss": 0.4717, "step": 9116 }, { "epoch": 0.26860922469543186, "grad_norm": 1.6604406099093307, "learning_rate": 9.158883043395624e-06, "loss": 0.5758, "step": 9117 }, { "epoch": 0.2686386871528954, "grad_norm": 1.5269134833617033, "learning_rate": 9.158597561588042e-06, "loss": 0.481, "step": 9118 }, { "epoch": 0.268668149610359, "grad_norm": 1.6584548464022228, "learning_rate": 9.158312035792083e-06, "loss": 0.5861, "step": 9119 }, { "epoch": 0.26869761206782256, "grad_norm": 1.390616279261648, "learning_rate": 9.158026466010765e-06, "loss": 0.3503, "step": 9120 }, { "epoch": 0.26872707452528616, "grad_norm": 1.6907730941674908, "learning_rate": 9.15774085224711e-06, "loss": 0.5456, "step": 9121 }, { "epoch": 0.2687565369827497, "grad_norm": 1.5592581502357195, "learning_rate": 9.157455194504142e-06, "loss": 0.4459, "step": 9122 }, { "epoch": 0.2687859994402133, "grad_norm": 1.5761920569897707, "learning_rate": 9.157169492784878e-06, "loss": 0.613, "step": 9123 }, { "epoch": 0.2688154618976769, "grad_norm": 1.5812043047844282, "learning_rate": 9.156883747092342e-06, "loss": 0.5765, "step": 9124 }, { "epoch": 0.26884492435514046, "grad_norm": 1.5238488304607147, "learning_rate": 9.156597957429556e-06, "loss": 0.4132, "step": 9125 }, { "epoch": 0.26887438681260406, "grad_norm": 1.6405596917732321, "learning_rate": 9.156312123799544e-06, "loss": 0.4953, "step": 9126 }, { "epoch": 0.2689038492700676, "grad_norm": 1.6897371669724053, "learning_rate": 9.156026246205332e-06, "loss": 0.5694, "step": 9127 }, { "epoch": 0.2689333117275312, "grad_norm": 1.6331271844453301, "learning_rate": 9.155740324649938e-06, "loss": 0.367, "step": 9128 }, { "epoch": 0.26896277418499476, "grad_norm": 1.6149022941980973, "learning_rate": 9.15545435913639e-06, "loss": 0.5052, "step": 9129 }, { "epoch": 0.26899223664245836, "grad_norm": 1.5954285535757384, "learning_rate": 9.15516834966771e-06, "loss": 0.5166, "step": 9130 }, { "epoch": 0.2690216990999219, "grad_norm": 1.714186024680277, "learning_rate": 9.154882296246929e-06, "loss": 0.5309, "step": 9131 }, { "epoch": 0.2690511615573855, "grad_norm": 1.5266808924511115, "learning_rate": 9.154596198877067e-06, "loss": 0.4022, "step": 9132 }, { "epoch": 0.26908062401484906, "grad_norm": 1.8833828406669122, "learning_rate": 9.154310057561154e-06, "loss": 0.6616, "step": 9133 }, { "epoch": 0.26911008647231266, "grad_norm": 1.5996911850514373, "learning_rate": 9.154023872302216e-06, "loss": 0.5559, "step": 9134 }, { "epoch": 0.2691395489297762, "grad_norm": 1.5589679161654213, "learning_rate": 9.153737643103278e-06, "loss": 0.4808, "step": 9135 }, { "epoch": 0.2691690113872398, "grad_norm": 1.529292735253962, "learning_rate": 9.153451369967368e-06, "loss": 0.4174, "step": 9136 }, { "epoch": 0.2691984738447034, "grad_norm": 1.449792157059758, "learning_rate": 9.153165052897518e-06, "loss": 0.6067, "step": 9137 }, { "epoch": 0.26922793630216696, "grad_norm": 1.5080359088282351, "learning_rate": 9.152878691896752e-06, "loss": 0.4638, "step": 9138 }, { "epoch": 0.26925739875963056, "grad_norm": 1.513730367824789, "learning_rate": 9.1525922869681e-06, "loss": 0.519, "step": 9139 }, { "epoch": 0.2692868612170941, "grad_norm": 1.7478222994566714, "learning_rate": 9.152305838114594e-06, "loss": 0.6598, "step": 9140 }, { "epoch": 0.2693163236745577, "grad_norm": 1.4672488593397477, "learning_rate": 9.152019345339261e-06, "loss": 0.4968, "step": 9141 }, { "epoch": 0.26934578613202126, "grad_norm": 1.5478035495998441, "learning_rate": 9.151732808645135e-06, "loss": 0.4758, "step": 9142 }, { "epoch": 0.26937524858948486, "grad_norm": 1.4871339383477742, "learning_rate": 9.151446228035243e-06, "loss": 0.4158, "step": 9143 }, { "epoch": 0.2694047110469484, "grad_norm": 1.433657868887154, "learning_rate": 9.151159603512617e-06, "loss": 0.4122, "step": 9144 }, { "epoch": 0.269434173504412, "grad_norm": 1.4549646511955303, "learning_rate": 9.15087293508029e-06, "loss": 0.4824, "step": 9145 }, { "epoch": 0.26946363596187556, "grad_norm": 1.5348692174190668, "learning_rate": 9.150586222741296e-06, "loss": 0.546, "step": 9146 }, { "epoch": 0.26949309841933916, "grad_norm": 1.3486648038199356, "learning_rate": 9.150299466498666e-06, "loss": 0.413, "step": 9147 }, { "epoch": 0.2695225608768027, "grad_norm": 1.481061998774869, "learning_rate": 9.15001266635543e-06, "loss": 0.4145, "step": 9148 }, { "epoch": 0.2695520233342663, "grad_norm": 1.51687239968425, "learning_rate": 9.149725822314628e-06, "loss": 0.501, "step": 9149 }, { "epoch": 0.2695814857917299, "grad_norm": 1.5108260924521213, "learning_rate": 9.14943893437929e-06, "loss": 0.3932, "step": 9150 }, { "epoch": 0.26961094824919346, "grad_norm": 1.5646380431448945, "learning_rate": 9.14915200255245e-06, "loss": 0.3885, "step": 9151 }, { "epoch": 0.26964041070665706, "grad_norm": 1.7908957014895845, "learning_rate": 9.148865026837147e-06, "loss": 0.5281, "step": 9152 }, { "epoch": 0.2696698731641206, "grad_norm": 1.4125722170985175, "learning_rate": 9.148578007236413e-06, "loss": 0.4041, "step": 9153 }, { "epoch": 0.2696993356215842, "grad_norm": 1.5081565282874894, "learning_rate": 9.148290943753286e-06, "loss": 0.5056, "step": 9154 }, { "epoch": 0.26972879807904776, "grad_norm": 1.8856087023232577, "learning_rate": 9.1480038363908e-06, "loss": 0.6087, "step": 9155 }, { "epoch": 0.26975826053651136, "grad_norm": 1.448441669574631, "learning_rate": 9.147716685151996e-06, "loss": 0.4097, "step": 9156 }, { "epoch": 0.2697877229939749, "grad_norm": 2.0473209949505975, "learning_rate": 9.147429490039904e-06, "loss": 0.4836, "step": 9157 }, { "epoch": 0.2698171854514385, "grad_norm": 1.6404909968930188, "learning_rate": 9.147142251057571e-06, "loss": 0.4287, "step": 9158 }, { "epoch": 0.26984664790890206, "grad_norm": 1.6309342661445216, "learning_rate": 9.14685496820803e-06, "loss": 0.5799, "step": 9159 }, { "epoch": 0.26987611036636566, "grad_norm": 1.6611179402074192, "learning_rate": 9.146567641494322e-06, "loss": 0.6654, "step": 9160 }, { "epoch": 0.2699055728238292, "grad_norm": 1.444432271485526, "learning_rate": 9.146280270919485e-06, "loss": 0.5088, "step": 9161 }, { "epoch": 0.2699350352812928, "grad_norm": 1.991194733602625, "learning_rate": 9.145992856486558e-06, "loss": 0.681, "step": 9162 }, { "epoch": 0.2699644977387564, "grad_norm": 1.3758902640451909, "learning_rate": 9.145705398198582e-06, "loss": 0.4845, "step": 9163 }, { "epoch": 0.26999396019621996, "grad_norm": 1.468073577042095, "learning_rate": 9.1454178960586e-06, "loss": 0.4425, "step": 9164 }, { "epoch": 0.27002342265368356, "grad_norm": 1.604276392494532, "learning_rate": 9.145130350069647e-06, "loss": 0.6178, "step": 9165 }, { "epoch": 0.2700528851111471, "grad_norm": 1.575621164009716, "learning_rate": 9.144842760234771e-06, "loss": 0.6587, "step": 9166 }, { "epoch": 0.2700823475686107, "grad_norm": 1.4866775056415764, "learning_rate": 9.144555126557011e-06, "loss": 0.4324, "step": 9167 }, { "epoch": 0.27011181002607426, "grad_norm": 1.5816837320276658, "learning_rate": 9.14426744903941e-06, "loss": 0.5174, "step": 9168 }, { "epoch": 0.27014127248353786, "grad_norm": 1.5706300626775522, "learning_rate": 9.14397972768501e-06, "loss": 0.5786, "step": 9169 }, { "epoch": 0.2701707349410014, "grad_norm": 1.5039401009827895, "learning_rate": 9.143691962496857e-06, "loss": 0.4793, "step": 9170 }, { "epoch": 0.270200197398465, "grad_norm": 1.507926101216233, "learning_rate": 9.143404153477993e-06, "loss": 0.4573, "step": 9171 }, { "epoch": 0.27022965985592856, "grad_norm": 1.4196070709884618, "learning_rate": 9.143116300631462e-06, "loss": 0.4161, "step": 9172 }, { "epoch": 0.27025912231339216, "grad_norm": 1.4953936341577405, "learning_rate": 9.142828403960309e-06, "loss": 0.5185, "step": 9173 }, { "epoch": 0.2702885847708557, "grad_norm": 1.2996174901430495, "learning_rate": 9.14254046346758e-06, "loss": 0.3983, "step": 9174 }, { "epoch": 0.2703180472283193, "grad_norm": 1.7070533059826716, "learning_rate": 9.14225247915632e-06, "loss": 0.5518, "step": 9175 }, { "epoch": 0.2703475096857829, "grad_norm": 1.6083372347943297, "learning_rate": 9.141964451029577e-06, "loss": 0.4105, "step": 9176 }, { "epoch": 0.27037697214324646, "grad_norm": 1.5218034574705477, "learning_rate": 9.141676379090395e-06, "loss": 0.4782, "step": 9177 }, { "epoch": 0.27040643460071007, "grad_norm": 1.459167133088061, "learning_rate": 9.141388263341824e-06, "loss": 0.4157, "step": 9178 }, { "epoch": 0.2704358970581736, "grad_norm": 1.4410921620762627, "learning_rate": 9.141100103786908e-06, "loss": 0.4833, "step": 9179 }, { "epoch": 0.2704653595156372, "grad_norm": 1.5697410731934645, "learning_rate": 9.140811900428699e-06, "loss": 0.5918, "step": 9180 }, { "epoch": 0.27049482197310076, "grad_norm": 1.4678860142351469, "learning_rate": 9.140523653270243e-06, "loss": 0.4548, "step": 9181 }, { "epoch": 0.27052428443056437, "grad_norm": 1.6596879177238353, "learning_rate": 9.140235362314591e-06, "loss": 0.5833, "step": 9182 }, { "epoch": 0.2705537468880279, "grad_norm": 1.4234194442743073, "learning_rate": 9.139947027564789e-06, "loss": 0.4782, "step": 9183 }, { "epoch": 0.2705832093454915, "grad_norm": 1.710685102857218, "learning_rate": 9.13965864902389e-06, "loss": 0.6588, "step": 9184 }, { "epoch": 0.27061267180295506, "grad_norm": 1.7561771027304454, "learning_rate": 9.139370226694943e-06, "loss": 0.4993, "step": 9185 }, { "epoch": 0.27064213426041867, "grad_norm": 1.538188504310721, "learning_rate": 9.139081760580999e-06, "loss": 0.4583, "step": 9186 }, { "epoch": 0.2706715967178822, "grad_norm": 1.5731878790185214, "learning_rate": 9.13879325068511e-06, "loss": 0.5293, "step": 9187 }, { "epoch": 0.2707010591753458, "grad_norm": 1.661768146459231, "learning_rate": 9.138504697010329e-06, "loss": 0.5978, "step": 9188 }, { "epoch": 0.2707305216328094, "grad_norm": 1.6617737391679417, "learning_rate": 9.138216099559704e-06, "loss": 0.5115, "step": 9189 }, { "epoch": 0.27075998409027296, "grad_norm": 1.5157134889887385, "learning_rate": 9.13792745833629e-06, "loss": 0.5438, "step": 9190 }, { "epoch": 0.27078944654773657, "grad_norm": 1.5880263127875567, "learning_rate": 9.137638773343142e-06, "loss": 0.4885, "step": 9191 }, { "epoch": 0.2708189090052001, "grad_norm": 1.5829741295488846, "learning_rate": 9.137350044583313e-06, "loss": 0.4972, "step": 9192 }, { "epoch": 0.2708483714626637, "grad_norm": 1.7003907066830921, "learning_rate": 9.137061272059853e-06, "loss": 0.4812, "step": 9193 }, { "epoch": 0.27087783392012726, "grad_norm": 1.7880106650587908, "learning_rate": 9.136772455775822e-06, "loss": 0.5956, "step": 9194 }, { "epoch": 0.27090729637759087, "grad_norm": 1.4825520144685196, "learning_rate": 9.136483595734274e-06, "loss": 0.5282, "step": 9195 }, { "epoch": 0.2709367588350544, "grad_norm": 1.5378553485360162, "learning_rate": 9.136194691938261e-06, "loss": 0.5916, "step": 9196 }, { "epoch": 0.270966221292518, "grad_norm": 1.7078351396870899, "learning_rate": 9.13590574439084e-06, "loss": 0.4263, "step": 9197 }, { "epoch": 0.27099568374998156, "grad_norm": 1.6573524287689616, "learning_rate": 9.13561675309507e-06, "loss": 0.444, "step": 9198 }, { "epoch": 0.27102514620744517, "grad_norm": 1.6838466340398748, "learning_rate": 9.135327718054006e-06, "loss": 0.5496, "step": 9199 }, { "epoch": 0.2710546086649087, "grad_norm": 1.5068874035038768, "learning_rate": 9.135038639270707e-06, "loss": 0.4899, "step": 9200 }, { "epoch": 0.2710840711223723, "grad_norm": 1.2865436647328912, "learning_rate": 9.134749516748228e-06, "loss": 0.3349, "step": 9201 }, { "epoch": 0.2711135335798359, "grad_norm": 1.6170111688563116, "learning_rate": 9.134460350489629e-06, "loss": 0.5703, "step": 9202 }, { "epoch": 0.27114299603729947, "grad_norm": 1.5275979729581166, "learning_rate": 9.134171140497969e-06, "loss": 0.5096, "step": 9203 }, { "epoch": 0.27117245849476307, "grad_norm": 1.5842922425661607, "learning_rate": 9.133881886776306e-06, "loss": 0.4921, "step": 9204 }, { "epoch": 0.2712019209522266, "grad_norm": 1.5449369243939695, "learning_rate": 9.1335925893277e-06, "loss": 0.6149, "step": 9205 }, { "epoch": 0.2712313834096902, "grad_norm": 1.7062308907049017, "learning_rate": 9.133303248155211e-06, "loss": 0.5279, "step": 9206 }, { "epoch": 0.27126084586715377, "grad_norm": 1.4825880983672894, "learning_rate": 9.133013863261901e-06, "loss": 0.4447, "step": 9207 }, { "epoch": 0.27129030832461737, "grad_norm": 1.3542848130660272, "learning_rate": 9.13272443465083e-06, "loss": 0.5349, "step": 9208 }, { "epoch": 0.2713197707820809, "grad_norm": 1.6535145715207866, "learning_rate": 9.13243496232506e-06, "loss": 0.3907, "step": 9209 }, { "epoch": 0.2713492332395445, "grad_norm": 1.5205443962338545, "learning_rate": 9.13214544628765e-06, "loss": 0.549, "step": 9210 }, { "epoch": 0.27137869569700807, "grad_norm": 1.7502182006821942, "learning_rate": 9.131855886541667e-06, "loss": 0.4969, "step": 9211 }, { "epoch": 0.27140815815447167, "grad_norm": 1.6883202348296713, "learning_rate": 9.13156628309017e-06, "loss": 0.4515, "step": 9212 }, { "epoch": 0.2714376206119352, "grad_norm": 1.5679640393450671, "learning_rate": 9.131276635936226e-06, "loss": 0.4718, "step": 9213 }, { "epoch": 0.2714670830693988, "grad_norm": 1.4678653308535137, "learning_rate": 9.130986945082896e-06, "loss": 0.4735, "step": 9214 }, { "epoch": 0.2714965455268624, "grad_norm": 1.7475697858387007, "learning_rate": 9.130697210533243e-06, "loss": 0.5767, "step": 9215 }, { "epoch": 0.27152600798432597, "grad_norm": 1.5047745475965204, "learning_rate": 9.130407432290336e-06, "loss": 0.4796, "step": 9216 }, { "epoch": 0.27155547044178957, "grad_norm": 1.597914517559421, "learning_rate": 9.130117610357238e-06, "loss": 0.5597, "step": 9217 }, { "epoch": 0.2715849328992531, "grad_norm": 1.6567625260107972, "learning_rate": 9.129827744737014e-06, "loss": 0.5383, "step": 9218 }, { "epoch": 0.2716143953567167, "grad_norm": 1.3265299572026477, "learning_rate": 9.12953783543273e-06, "loss": 0.3326, "step": 9219 }, { "epoch": 0.27164385781418027, "grad_norm": 1.7281451033371196, "learning_rate": 9.129247882447453e-06, "loss": 0.4763, "step": 9220 }, { "epoch": 0.27167332027164387, "grad_norm": 1.5940324927890217, "learning_rate": 9.128957885784253e-06, "loss": 0.4743, "step": 9221 }, { "epoch": 0.2717027827291074, "grad_norm": 1.5815260561833404, "learning_rate": 9.128667845446193e-06, "loss": 0.5263, "step": 9222 }, { "epoch": 0.271732245186571, "grad_norm": 1.6624551037047588, "learning_rate": 9.128377761436342e-06, "loss": 0.5268, "step": 9223 }, { "epoch": 0.27176170764403457, "grad_norm": 1.6289460837497096, "learning_rate": 9.128087633757772e-06, "loss": 0.5315, "step": 9224 }, { "epoch": 0.27179117010149817, "grad_norm": 1.4569618701598914, "learning_rate": 9.127797462413546e-06, "loss": 0.5036, "step": 9225 }, { "epoch": 0.2718206325589617, "grad_norm": 1.5286956583322429, "learning_rate": 9.127507247406738e-06, "loss": 0.4217, "step": 9226 }, { "epoch": 0.2718500950164253, "grad_norm": 1.5704034856147067, "learning_rate": 9.127216988740416e-06, "loss": 0.5128, "step": 9227 }, { "epoch": 0.2718795574738889, "grad_norm": 1.698463687546235, "learning_rate": 9.12692668641765e-06, "loss": 0.6602, "step": 9228 }, { "epoch": 0.27190901993135247, "grad_norm": 1.538922955944737, "learning_rate": 9.126636340441512e-06, "loss": 0.6023, "step": 9229 }, { "epoch": 0.27193848238881607, "grad_norm": 1.7254759496403596, "learning_rate": 9.126345950815072e-06, "loss": 0.4494, "step": 9230 }, { "epoch": 0.2719679448462796, "grad_norm": 1.4793881506568116, "learning_rate": 9.126055517541402e-06, "loss": 0.4267, "step": 9231 }, { "epoch": 0.2719974073037432, "grad_norm": 1.4239148110476243, "learning_rate": 9.125765040623574e-06, "loss": 0.3765, "step": 9232 }, { "epoch": 0.27202686976120677, "grad_norm": 1.6447924605236914, "learning_rate": 9.125474520064662e-06, "loss": 0.4649, "step": 9233 }, { "epoch": 0.27205633221867037, "grad_norm": 1.5413809539451555, "learning_rate": 9.125183955867735e-06, "loss": 0.4647, "step": 9234 }, { "epoch": 0.2720857946761339, "grad_norm": 1.7248784728496112, "learning_rate": 9.124893348035872e-06, "loss": 0.6287, "step": 9235 }, { "epoch": 0.2721152571335975, "grad_norm": 1.7845287388752404, "learning_rate": 9.124602696572145e-06, "loss": 0.6469, "step": 9236 }, { "epoch": 0.27214471959106107, "grad_norm": 1.4666395237216914, "learning_rate": 9.124312001479625e-06, "loss": 0.44, "step": 9237 }, { "epoch": 0.27217418204852467, "grad_norm": 1.5083293657410028, "learning_rate": 9.12402126276139e-06, "loss": 0.457, "step": 9238 }, { "epoch": 0.2722036445059882, "grad_norm": 1.7330623893909491, "learning_rate": 9.123730480420515e-06, "loss": 0.422, "step": 9239 }, { "epoch": 0.2722331069634518, "grad_norm": 1.5236827595520894, "learning_rate": 9.123439654460078e-06, "loss": 0.5471, "step": 9240 }, { "epoch": 0.2722625694209154, "grad_norm": 1.5809050458310876, "learning_rate": 9.123148784883152e-06, "loss": 0.4761, "step": 9241 }, { "epoch": 0.27229203187837897, "grad_norm": 1.577885295028954, "learning_rate": 9.122857871692813e-06, "loss": 0.4663, "step": 9242 }, { "epoch": 0.2723214943358426, "grad_norm": 1.5562659923870343, "learning_rate": 9.122566914892141e-06, "loss": 0.3194, "step": 9243 }, { "epoch": 0.2723509567933061, "grad_norm": 1.653695939208206, "learning_rate": 9.122275914484213e-06, "loss": 0.5115, "step": 9244 }, { "epoch": 0.2723804192507697, "grad_norm": 1.555628385309677, "learning_rate": 9.121984870472107e-06, "loss": 0.4576, "step": 9245 }, { "epoch": 0.27240988170823327, "grad_norm": 1.5785557841786968, "learning_rate": 9.1216937828589e-06, "loss": 0.4293, "step": 9246 }, { "epoch": 0.2724393441656969, "grad_norm": 1.864653238561383, "learning_rate": 9.121402651647674e-06, "loss": 0.606, "step": 9247 }, { "epoch": 0.2724688066231604, "grad_norm": 1.58041469298579, "learning_rate": 9.121111476841504e-06, "loss": 0.485, "step": 9248 }, { "epoch": 0.272498269080624, "grad_norm": 1.7068208058806607, "learning_rate": 9.120820258443475e-06, "loss": 0.762, "step": 9249 }, { "epoch": 0.27252773153808757, "grad_norm": 1.491280919358025, "learning_rate": 9.120528996456664e-06, "loss": 0.518, "step": 9250 }, { "epoch": 0.2725571939955512, "grad_norm": 1.5798452699336023, "learning_rate": 9.120237690884154e-06, "loss": 0.5763, "step": 9251 }, { "epoch": 0.2725866564530147, "grad_norm": 1.6512327357564096, "learning_rate": 9.119946341729025e-06, "loss": 0.5312, "step": 9252 }, { "epoch": 0.2726161189104783, "grad_norm": 1.506430872717618, "learning_rate": 9.119654948994358e-06, "loss": 0.5197, "step": 9253 }, { "epoch": 0.2726455813679419, "grad_norm": 1.919448495154881, "learning_rate": 9.119363512683239e-06, "loss": 0.5344, "step": 9254 }, { "epoch": 0.27267504382540547, "grad_norm": 1.6014136619845656, "learning_rate": 9.119072032798746e-06, "loss": 0.4637, "step": 9255 }, { "epoch": 0.2727045062828691, "grad_norm": 1.557821631496992, "learning_rate": 9.118780509343966e-06, "loss": 0.4852, "step": 9256 }, { "epoch": 0.2727339687403326, "grad_norm": 1.4183026366369909, "learning_rate": 9.118488942321978e-06, "loss": 0.4502, "step": 9257 }, { "epoch": 0.2727634311977962, "grad_norm": 1.58614064722853, "learning_rate": 9.118197331735873e-06, "loss": 0.5547, "step": 9258 }, { "epoch": 0.27279289365525977, "grad_norm": 1.5846442064182853, "learning_rate": 9.117905677588729e-06, "loss": 0.5952, "step": 9259 }, { "epoch": 0.2728223561127234, "grad_norm": 1.5729882783836833, "learning_rate": 9.117613979883636e-06, "loss": 0.5283, "step": 9260 }, { "epoch": 0.2728518185701869, "grad_norm": 1.4033412559714247, "learning_rate": 9.117322238623676e-06, "loss": 0.5441, "step": 9261 }, { "epoch": 0.2728812810276505, "grad_norm": 1.5684368519745848, "learning_rate": 9.117030453811937e-06, "loss": 0.6227, "step": 9262 }, { "epoch": 0.27291074348511407, "grad_norm": 1.4046302025256068, "learning_rate": 9.116738625451503e-06, "loss": 0.3996, "step": 9263 }, { "epoch": 0.2729402059425777, "grad_norm": 1.4592888193283022, "learning_rate": 9.116446753545462e-06, "loss": 0.5003, "step": 9264 }, { "epoch": 0.2729696684000412, "grad_norm": 1.640126882140366, "learning_rate": 9.116154838096902e-06, "loss": 0.4201, "step": 9265 }, { "epoch": 0.2729991308575048, "grad_norm": 1.5645918926522588, "learning_rate": 9.115862879108912e-06, "loss": 0.5759, "step": 9266 }, { "epoch": 0.2730285933149684, "grad_norm": 1.6687844209753466, "learning_rate": 9.11557087658458e-06, "loss": 0.3612, "step": 9267 }, { "epoch": 0.273058055772432, "grad_norm": 1.4605119257561265, "learning_rate": 9.11527883052699e-06, "loss": 0.5551, "step": 9268 }, { "epoch": 0.2730875182298956, "grad_norm": 1.7161565905126264, "learning_rate": 9.114986740939236e-06, "loss": 0.4945, "step": 9269 }, { "epoch": 0.2731169806873591, "grad_norm": 1.5041202716305386, "learning_rate": 9.114694607824408e-06, "loss": 0.4985, "step": 9270 }, { "epoch": 0.2731464431448227, "grad_norm": 1.6343839154132718, "learning_rate": 9.114402431185592e-06, "loss": 0.4635, "step": 9271 }, { "epoch": 0.2731759056022863, "grad_norm": 1.4610804034737492, "learning_rate": 9.114110211025882e-06, "loss": 0.4687, "step": 9272 }, { "epoch": 0.2732053680597499, "grad_norm": 1.552511645775013, "learning_rate": 9.11381794734837e-06, "loss": 0.4881, "step": 9273 }, { "epoch": 0.2732348305172134, "grad_norm": 1.645913342526346, "learning_rate": 9.113525640156143e-06, "loss": 0.5125, "step": 9274 }, { "epoch": 0.273264292974677, "grad_norm": 1.8503188373999173, "learning_rate": 9.113233289452296e-06, "loss": 0.4572, "step": 9275 }, { "epoch": 0.2732937554321406, "grad_norm": 1.5044592323102304, "learning_rate": 9.112940895239921e-06, "loss": 0.5236, "step": 9276 }, { "epoch": 0.2733232178896042, "grad_norm": 1.683863132284807, "learning_rate": 9.11264845752211e-06, "loss": 0.5698, "step": 9277 }, { "epoch": 0.2733526803470677, "grad_norm": 1.6642599662556974, "learning_rate": 9.112355976301957e-06, "loss": 0.5505, "step": 9278 }, { "epoch": 0.2733821428045313, "grad_norm": 1.5912327624223779, "learning_rate": 9.112063451582557e-06, "loss": 0.5038, "step": 9279 }, { "epoch": 0.2734116052619949, "grad_norm": 1.692289057594548, "learning_rate": 9.111770883367003e-06, "loss": 0.5688, "step": 9280 }, { "epoch": 0.2734410677194585, "grad_norm": 1.581327915056174, "learning_rate": 9.111478271658388e-06, "loss": 0.6114, "step": 9281 }, { "epoch": 0.2734705301769221, "grad_norm": 1.5504493760555818, "learning_rate": 9.11118561645981e-06, "loss": 0.5181, "step": 9282 }, { "epoch": 0.2734999926343856, "grad_norm": 1.5034208576768338, "learning_rate": 9.110892917774363e-06, "loss": 0.4933, "step": 9283 }, { "epoch": 0.2735294550918492, "grad_norm": 1.6007693027955199, "learning_rate": 9.110600175605145e-06, "loss": 0.5271, "step": 9284 }, { "epoch": 0.2735589175493128, "grad_norm": 1.6166999569371332, "learning_rate": 9.110307389955248e-06, "loss": 0.5905, "step": 9285 }, { "epoch": 0.2735883800067764, "grad_norm": 1.5573694144449552, "learning_rate": 9.110014560827776e-06, "loss": 0.5076, "step": 9286 }, { "epoch": 0.2736178424642399, "grad_norm": 1.5659616870529227, "learning_rate": 9.109721688225819e-06, "loss": 0.5004, "step": 9287 }, { "epoch": 0.2736473049217035, "grad_norm": 1.589769212669307, "learning_rate": 9.109428772152481e-06, "loss": 0.4379, "step": 9288 }, { "epoch": 0.2736767673791671, "grad_norm": 1.609236542756954, "learning_rate": 9.109135812610856e-06, "loss": 0.4769, "step": 9289 }, { "epoch": 0.2737062298366307, "grad_norm": 1.6619548567956517, "learning_rate": 9.108842809604047e-06, "loss": 0.5723, "step": 9290 }, { "epoch": 0.2737356922940942, "grad_norm": 1.5845987398183792, "learning_rate": 9.108549763135149e-06, "loss": 0.442, "step": 9291 }, { "epoch": 0.2737651547515578, "grad_norm": 1.4514683811678262, "learning_rate": 9.108256673207265e-06, "loss": 0.4621, "step": 9292 }, { "epoch": 0.27379461720902143, "grad_norm": 1.6046191583807008, "learning_rate": 9.107963539823493e-06, "loss": 0.4179, "step": 9293 }, { "epoch": 0.273824079666485, "grad_norm": 1.8245321929590466, "learning_rate": 9.107670362986934e-06, "loss": 0.5732, "step": 9294 }, { "epoch": 0.2738535421239486, "grad_norm": 1.6295051982042366, "learning_rate": 9.10737714270069e-06, "loss": 0.4754, "step": 9295 }, { "epoch": 0.2738830045814121, "grad_norm": 1.3900708940068611, "learning_rate": 9.107083878967865e-06, "loss": 0.4558, "step": 9296 }, { "epoch": 0.27391246703887573, "grad_norm": 1.477754933857292, "learning_rate": 9.106790571791554e-06, "loss": 0.3482, "step": 9297 }, { "epoch": 0.2739419294963393, "grad_norm": 1.5225165172151214, "learning_rate": 9.106497221174868e-06, "loss": 0.5654, "step": 9298 }, { "epoch": 0.2739713919538029, "grad_norm": 1.8850181491047824, "learning_rate": 9.106203827120903e-06, "loss": 0.3397, "step": 9299 }, { "epoch": 0.2740008544112664, "grad_norm": 1.483745052667824, "learning_rate": 9.105910389632766e-06, "loss": 0.4416, "step": 9300 }, { "epoch": 0.27403031686873003, "grad_norm": 1.6052231415181741, "learning_rate": 9.105616908713561e-06, "loss": 0.6235, "step": 9301 }, { "epoch": 0.2740597793261936, "grad_norm": 1.3675039040503891, "learning_rate": 9.105323384366391e-06, "loss": 0.3775, "step": 9302 }, { "epoch": 0.2740892417836572, "grad_norm": 1.6933254413133223, "learning_rate": 9.105029816594362e-06, "loss": 0.4453, "step": 9303 }, { "epoch": 0.2741187042411207, "grad_norm": 1.4676541110568275, "learning_rate": 9.10473620540058e-06, "loss": 0.3914, "step": 9304 }, { "epoch": 0.27414816669858433, "grad_norm": 1.6489368194120533, "learning_rate": 9.104442550788146e-06, "loss": 0.4459, "step": 9305 }, { "epoch": 0.27417762915604793, "grad_norm": 1.5441311264430655, "learning_rate": 9.104148852760172e-06, "loss": 0.5226, "step": 9306 }, { "epoch": 0.2742070916135115, "grad_norm": 1.4270918869770892, "learning_rate": 9.103855111319761e-06, "loss": 0.4076, "step": 9307 }, { "epoch": 0.2742365540709751, "grad_norm": 1.5907510327518932, "learning_rate": 9.103561326470022e-06, "loss": 0.5562, "step": 9308 }, { "epoch": 0.2742660165284386, "grad_norm": 1.5061479005588376, "learning_rate": 9.10326749821406e-06, "loss": 0.4288, "step": 9309 }, { "epoch": 0.27429547898590223, "grad_norm": 1.3330251503052335, "learning_rate": 9.102973626554988e-06, "loss": 0.2878, "step": 9310 }, { "epoch": 0.2743249414433658, "grad_norm": 1.639223130181437, "learning_rate": 9.10267971149591e-06, "loss": 0.4727, "step": 9311 }, { "epoch": 0.2743544039008294, "grad_norm": 1.3607785059538007, "learning_rate": 9.102385753039937e-06, "loss": 0.3889, "step": 9312 }, { "epoch": 0.2743838663582929, "grad_norm": 1.6303698407914236, "learning_rate": 9.102091751190176e-06, "loss": 0.5364, "step": 9313 }, { "epoch": 0.27441332881575653, "grad_norm": 1.6328460946472747, "learning_rate": 9.10179770594974e-06, "loss": 0.5891, "step": 9314 }, { "epoch": 0.2744427912732201, "grad_norm": 1.7497888303235822, "learning_rate": 9.10150361732174e-06, "loss": 0.4473, "step": 9315 }, { "epoch": 0.2744722537306837, "grad_norm": 1.669197112298205, "learning_rate": 9.10120948530928e-06, "loss": 0.505, "step": 9316 }, { "epoch": 0.2745017161881472, "grad_norm": 1.4553939497998063, "learning_rate": 9.100915309915478e-06, "loss": 0.5732, "step": 9317 }, { "epoch": 0.27453117864561083, "grad_norm": 1.6164163334924444, "learning_rate": 9.100621091143445e-06, "loss": 0.5106, "step": 9318 }, { "epoch": 0.27456064110307443, "grad_norm": 1.5315556597042348, "learning_rate": 9.10032682899629e-06, "loss": 0.3883, "step": 9319 }, { "epoch": 0.274590103560538, "grad_norm": 1.705325769250572, "learning_rate": 9.100032523477127e-06, "loss": 0.3972, "step": 9320 }, { "epoch": 0.2746195660180016, "grad_norm": 1.4835607208957806, "learning_rate": 9.099738174589072e-06, "loss": 0.3997, "step": 9321 }, { "epoch": 0.27464902847546513, "grad_norm": 1.4560329687773188, "learning_rate": 9.099443782335235e-06, "loss": 0.4145, "step": 9322 }, { "epoch": 0.27467849093292873, "grad_norm": 1.4864109073161282, "learning_rate": 9.09914934671873e-06, "loss": 0.4312, "step": 9323 }, { "epoch": 0.2747079533903923, "grad_norm": 1.5331109370849363, "learning_rate": 9.098854867742672e-06, "loss": 0.5232, "step": 9324 }, { "epoch": 0.2747374158478559, "grad_norm": 1.8504341242436226, "learning_rate": 9.098560345410178e-06, "loss": 0.5548, "step": 9325 }, { "epoch": 0.27476687830531943, "grad_norm": 1.6535601698875728, "learning_rate": 9.098265779724362e-06, "loss": 0.4419, "step": 9326 }, { "epoch": 0.27479634076278303, "grad_norm": 1.8388782930490737, "learning_rate": 9.097971170688339e-06, "loss": 0.5337, "step": 9327 }, { "epoch": 0.2748258032202466, "grad_norm": 1.6136743154210362, "learning_rate": 9.097676518305226e-06, "loss": 0.594, "step": 9328 }, { "epoch": 0.2748552656777102, "grad_norm": 1.7402611230091838, "learning_rate": 9.097381822578139e-06, "loss": 0.4753, "step": 9329 }, { "epoch": 0.27488472813517373, "grad_norm": 1.6423650460625836, "learning_rate": 9.097087083510195e-06, "loss": 0.5956, "step": 9330 }, { "epoch": 0.27491419059263733, "grad_norm": 1.7073664600923764, "learning_rate": 9.096792301104513e-06, "loss": 0.4764, "step": 9331 }, { "epoch": 0.27494365305010093, "grad_norm": 1.589015518403495, "learning_rate": 9.096497475364213e-06, "loss": 0.5217, "step": 9332 }, { "epoch": 0.2749731155075645, "grad_norm": 1.4936320222404758, "learning_rate": 9.096202606292407e-06, "loss": 0.373, "step": 9333 }, { "epoch": 0.2750025779650281, "grad_norm": 1.442178395468037, "learning_rate": 9.095907693892222e-06, "loss": 0.4554, "step": 9334 }, { "epoch": 0.27503204042249163, "grad_norm": 1.5998471373660283, "learning_rate": 9.095612738166773e-06, "loss": 0.5187, "step": 9335 }, { "epoch": 0.27506150287995523, "grad_norm": 1.839471546005602, "learning_rate": 9.095317739119178e-06, "loss": 0.7039, "step": 9336 }, { "epoch": 0.2750909653374188, "grad_norm": 1.6990707282373245, "learning_rate": 9.095022696752562e-06, "loss": 0.5161, "step": 9337 }, { "epoch": 0.2751204277948824, "grad_norm": 1.6162010594351048, "learning_rate": 9.094727611070044e-06, "loss": 0.5909, "step": 9338 }, { "epoch": 0.27514989025234593, "grad_norm": 1.4013716375277374, "learning_rate": 9.094432482074746e-06, "loss": 0.513, "step": 9339 }, { "epoch": 0.27517935270980953, "grad_norm": 1.5647641170369548, "learning_rate": 9.094137309769786e-06, "loss": 0.4761, "step": 9340 }, { "epoch": 0.2752088151672731, "grad_norm": 1.513561391120322, "learning_rate": 9.093842094158292e-06, "loss": 0.5533, "step": 9341 }, { "epoch": 0.2752382776247367, "grad_norm": 1.551939896124754, "learning_rate": 9.093546835243383e-06, "loss": 0.5682, "step": 9342 }, { "epoch": 0.27526774008220023, "grad_norm": 1.5025117417891756, "learning_rate": 9.093251533028185e-06, "loss": 0.3774, "step": 9343 }, { "epoch": 0.27529720253966383, "grad_norm": 1.4723424988090954, "learning_rate": 9.092956187515817e-06, "loss": 0.459, "step": 9344 }, { "epoch": 0.27532666499712743, "grad_norm": 1.6022186304163226, "learning_rate": 9.092660798709407e-06, "loss": 0.4004, "step": 9345 }, { "epoch": 0.275356127454591, "grad_norm": 1.4825798390163007, "learning_rate": 9.092365366612077e-06, "loss": 0.4909, "step": 9346 }, { "epoch": 0.2753855899120546, "grad_norm": 1.6076010237759824, "learning_rate": 9.092069891226956e-06, "loss": 0.4346, "step": 9347 }, { "epoch": 0.27541505236951813, "grad_norm": 1.5817631935497134, "learning_rate": 9.091774372557164e-06, "loss": 0.4463, "step": 9348 }, { "epoch": 0.27544451482698173, "grad_norm": 1.461230353309368, "learning_rate": 9.091478810605832e-06, "loss": 0.4309, "step": 9349 }, { "epoch": 0.2754739772844453, "grad_norm": 1.6358798148910771, "learning_rate": 9.091183205376083e-06, "loss": 0.5452, "step": 9350 }, { "epoch": 0.2755034397419089, "grad_norm": 1.7110541246021724, "learning_rate": 9.090887556871044e-06, "loss": 0.4786, "step": 9351 }, { "epoch": 0.27553290219937243, "grad_norm": 1.6240104878792505, "learning_rate": 9.090591865093846e-06, "loss": 0.4639, "step": 9352 }, { "epoch": 0.27556236465683603, "grad_norm": 1.7921464547502353, "learning_rate": 9.090296130047612e-06, "loss": 0.6171, "step": 9353 }, { "epoch": 0.2755918271142996, "grad_norm": 1.4412303120916619, "learning_rate": 9.090000351735472e-06, "loss": 0.3908, "step": 9354 }, { "epoch": 0.2756212895717632, "grad_norm": 1.653692088461082, "learning_rate": 9.089704530160554e-06, "loss": 0.3839, "step": 9355 }, { "epoch": 0.27565075202922673, "grad_norm": 1.6072032713714697, "learning_rate": 9.08940866532599e-06, "loss": 0.4411, "step": 9356 }, { "epoch": 0.27568021448669033, "grad_norm": 1.6020133880344876, "learning_rate": 9.089112757234905e-06, "loss": 0.4041, "step": 9357 }, { "epoch": 0.27570967694415394, "grad_norm": 1.4776200939989037, "learning_rate": 9.088816805890433e-06, "loss": 0.4613, "step": 9358 }, { "epoch": 0.2757391394016175, "grad_norm": 1.5103697348698404, "learning_rate": 9.088520811295703e-06, "loss": 0.428, "step": 9359 }, { "epoch": 0.2757686018590811, "grad_norm": 1.5645594478654294, "learning_rate": 9.088224773453844e-06, "loss": 0.5685, "step": 9360 }, { "epoch": 0.27579806431654463, "grad_norm": 1.796236191962056, "learning_rate": 9.08792869236799e-06, "loss": 0.5239, "step": 9361 }, { "epoch": 0.27582752677400824, "grad_norm": 1.7960890588246163, "learning_rate": 9.087632568041276e-06, "loss": 0.587, "step": 9362 }, { "epoch": 0.2758569892314718, "grad_norm": 1.4539817575119705, "learning_rate": 9.087336400476825e-06, "loss": 0.4421, "step": 9363 }, { "epoch": 0.2758864516889354, "grad_norm": 1.4250676670072095, "learning_rate": 9.087040189677778e-06, "loss": 0.3108, "step": 9364 }, { "epoch": 0.27591591414639893, "grad_norm": 1.6094173243738066, "learning_rate": 9.086743935647266e-06, "loss": 0.4771, "step": 9365 }, { "epoch": 0.27594537660386254, "grad_norm": 1.6890046603207696, "learning_rate": 9.08644763838842e-06, "loss": 0.6287, "step": 9366 }, { "epoch": 0.2759748390613261, "grad_norm": 1.5759911450666855, "learning_rate": 9.086151297904378e-06, "loss": 0.3272, "step": 9367 }, { "epoch": 0.2760043015187897, "grad_norm": 1.6006192196431703, "learning_rate": 9.085854914198272e-06, "loss": 0.5897, "step": 9368 }, { "epoch": 0.27603376397625323, "grad_norm": 1.462970217124599, "learning_rate": 9.085558487273238e-06, "loss": 0.4915, "step": 9369 }, { "epoch": 0.27606322643371684, "grad_norm": 1.350141200764263, "learning_rate": 9.08526201713241e-06, "loss": 0.392, "step": 9370 }, { "epoch": 0.27609268889118044, "grad_norm": 1.5372647563920856, "learning_rate": 9.084965503778928e-06, "loss": 0.4417, "step": 9371 }, { "epoch": 0.276122151348644, "grad_norm": 1.5428499803584093, "learning_rate": 9.084668947215924e-06, "loss": 0.4524, "step": 9372 }, { "epoch": 0.2761516138061076, "grad_norm": 1.5188852332106022, "learning_rate": 9.08437234744654e-06, "loss": 0.4818, "step": 9373 }, { "epoch": 0.27618107626357113, "grad_norm": 1.9887627340003164, "learning_rate": 9.084075704473907e-06, "loss": 0.448, "step": 9374 }, { "epoch": 0.27621053872103474, "grad_norm": 1.6877950029630222, "learning_rate": 9.083779018301167e-06, "loss": 0.4408, "step": 9375 }, { "epoch": 0.2762400011784983, "grad_norm": 1.459800423908908, "learning_rate": 9.083482288931456e-06, "loss": 0.5144, "step": 9376 }, { "epoch": 0.2762694636359619, "grad_norm": 1.7253302477536994, "learning_rate": 9.083185516367914e-06, "loss": 0.5289, "step": 9377 }, { "epoch": 0.27629892609342543, "grad_norm": 1.6320715132014072, "learning_rate": 9.08288870061368e-06, "loss": 0.5578, "step": 9378 }, { "epoch": 0.27632838855088904, "grad_norm": 1.5044757300519604, "learning_rate": 9.082591841671893e-06, "loss": 0.4785, "step": 9379 }, { "epoch": 0.2763578510083526, "grad_norm": 1.576287304638724, "learning_rate": 9.082294939545696e-06, "loss": 0.4503, "step": 9380 }, { "epoch": 0.2763873134658162, "grad_norm": 1.865431553064131, "learning_rate": 9.081997994238226e-06, "loss": 0.5863, "step": 9381 }, { "epoch": 0.27641677592327973, "grad_norm": 1.456642006713268, "learning_rate": 9.081701005752625e-06, "loss": 0.4628, "step": 9382 }, { "epoch": 0.27644623838074334, "grad_norm": 1.7375646892590353, "learning_rate": 9.081403974092035e-06, "loss": 0.583, "step": 9383 }, { "epoch": 0.27647570083820694, "grad_norm": 1.6564366181826589, "learning_rate": 9.081106899259598e-06, "loss": 0.5682, "step": 9384 }, { "epoch": 0.2765051632956705, "grad_norm": 1.6760395260044088, "learning_rate": 9.080809781258454e-06, "loss": 0.5125, "step": 9385 }, { "epoch": 0.2765346257531341, "grad_norm": 1.624912118503286, "learning_rate": 9.08051262009175e-06, "loss": 0.5084, "step": 9386 }, { "epoch": 0.27656408821059764, "grad_norm": 1.722507782574039, "learning_rate": 9.080215415762627e-06, "loss": 0.5515, "step": 9387 }, { "epoch": 0.27659355066806124, "grad_norm": 1.4885385508395095, "learning_rate": 9.079918168274227e-06, "loss": 0.3953, "step": 9388 }, { "epoch": 0.2766230131255248, "grad_norm": 1.4040799290295953, "learning_rate": 9.079620877629698e-06, "loss": 0.4434, "step": 9389 }, { "epoch": 0.2766524755829884, "grad_norm": 1.6489377109339218, "learning_rate": 9.079323543832182e-06, "loss": 0.5966, "step": 9390 }, { "epoch": 0.27668193804045194, "grad_norm": 1.6352757975314083, "learning_rate": 9.079026166884825e-06, "loss": 0.3647, "step": 9391 }, { "epoch": 0.27671140049791554, "grad_norm": 1.522146924738663, "learning_rate": 9.078728746790772e-06, "loss": 0.5219, "step": 9392 }, { "epoch": 0.2767408629553791, "grad_norm": 1.5552110385119307, "learning_rate": 9.078431283553169e-06, "loss": 0.583, "step": 9393 }, { "epoch": 0.2767703254128427, "grad_norm": 1.3295880381541199, "learning_rate": 9.078133777175163e-06, "loss": 0.3524, "step": 9394 }, { "epoch": 0.27679978787030624, "grad_norm": 1.5730797692260727, "learning_rate": 9.077836227659901e-06, "loss": 0.51, "step": 9395 }, { "epoch": 0.27682925032776984, "grad_norm": 1.4259586993358442, "learning_rate": 9.07753863501053e-06, "loss": 0.3889, "step": 9396 }, { "epoch": 0.27685871278523344, "grad_norm": 1.5425447843008058, "learning_rate": 9.077240999230198e-06, "loss": 0.5662, "step": 9397 }, { "epoch": 0.276888175242697, "grad_norm": 1.871923298431527, "learning_rate": 9.076943320322054e-06, "loss": 0.4956, "step": 9398 }, { "epoch": 0.2769176377001606, "grad_norm": 1.425923639731493, "learning_rate": 9.076645598289244e-06, "loss": 0.479, "step": 9399 }, { "epoch": 0.27694710015762414, "grad_norm": 1.6240153843225738, "learning_rate": 9.07634783313492e-06, "loss": 0.5312, "step": 9400 }, { "epoch": 0.27697656261508774, "grad_norm": 1.5961820595409588, "learning_rate": 9.076050024862232e-06, "loss": 0.4487, "step": 9401 }, { "epoch": 0.2770060250725513, "grad_norm": 1.623102739751967, "learning_rate": 9.075752173474329e-06, "loss": 0.5924, "step": 9402 }, { "epoch": 0.2770354875300149, "grad_norm": 1.4450393419847527, "learning_rate": 9.07545427897436e-06, "loss": 0.4572, "step": 9403 }, { "epoch": 0.27706494998747844, "grad_norm": 1.6630166951298884, "learning_rate": 9.075156341365481e-06, "loss": 0.6054, "step": 9404 }, { "epoch": 0.27709441244494204, "grad_norm": 1.6857580917772952, "learning_rate": 9.074858360650837e-06, "loss": 0.4981, "step": 9405 }, { "epoch": 0.2771238749024056, "grad_norm": 1.5371979871597514, "learning_rate": 9.074560336833585e-06, "loss": 0.4047, "step": 9406 }, { "epoch": 0.2771533373598692, "grad_norm": 1.6743557376484477, "learning_rate": 9.074262269916874e-06, "loss": 0.6166, "step": 9407 }, { "epoch": 0.27718279981733274, "grad_norm": 1.5811219126687153, "learning_rate": 9.07396415990386e-06, "loss": 0.4063, "step": 9408 }, { "epoch": 0.27721226227479634, "grad_norm": 1.458771175310609, "learning_rate": 9.073666006797693e-06, "loss": 0.381, "step": 9409 }, { "epoch": 0.27724172473225994, "grad_norm": 1.4513072843753452, "learning_rate": 9.07336781060153e-06, "loss": 0.4687, "step": 9410 }, { "epoch": 0.2772711871897235, "grad_norm": 1.8322129673937875, "learning_rate": 9.073069571318522e-06, "loss": 0.4941, "step": 9411 }, { "epoch": 0.2773006496471871, "grad_norm": 1.5356270151066882, "learning_rate": 9.072771288951827e-06, "loss": 0.4102, "step": 9412 }, { "epoch": 0.27733011210465064, "grad_norm": 1.6256085329602494, "learning_rate": 9.072472963504597e-06, "loss": 0.4996, "step": 9413 }, { "epoch": 0.27735957456211424, "grad_norm": 1.3815930828820342, "learning_rate": 9.07217459497999e-06, "loss": 0.4934, "step": 9414 }, { "epoch": 0.2773890370195778, "grad_norm": 1.4085416029035047, "learning_rate": 9.071876183381162e-06, "loss": 0.4414, "step": 9415 }, { "epoch": 0.2774184994770414, "grad_norm": 1.5160785338229141, "learning_rate": 9.071577728711267e-06, "loss": 0.532, "step": 9416 }, { "epoch": 0.27744796193450494, "grad_norm": 1.8064905848951234, "learning_rate": 9.071279230973464e-06, "loss": 0.8011, "step": 9417 }, { "epoch": 0.27747742439196854, "grad_norm": 1.6179535200391062, "learning_rate": 9.070980690170913e-06, "loss": 0.4729, "step": 9418 }, { "epoch": 0.2775068868494321, "grad_norm": 1.5838701993474023, "learning_rate": 9.070682106306765e-06, "loss": 0.439, "step": 9419 }, { "epoch": 0.2775363493068957, "grad_norm": 1.6718262434419555, "learning_rate": 9.070383479384184e-06, "loss": 0.4655, "step": 9420 }, { "epoch": 0.27756581176435924, "grad_norm": 1.412376195400751, "learning_rate": 9.070084809406327e-06, "loss": 0.394, "step": 9421 }, { "epoch": 0.27759527422182284, "grad_norm": 1.5226100756910996, "learning_rate": 9.069786096376353e-06, "loss": 0.3912, "step": 9422 }, { "epoch": 0.27762473667928644, "grad_norm": 1.4672214969441548, "learning_rate": 9.06948734029742e-06, "loss": 0.5109, "step": 9423 }, { "epoch": 0.27765419913675, "grad_norm": 1.4321036438428036, "learning_rate": 9.069188541172692e-06, "loss": 0.5051, "step": 9424 }, { "epoch": 0.2776836615942136, "grad_norm": 1.6436137718539157, "learning_rate": 9.06888969900533e-06, "loss": 0.4367, "step": 9425 }, { "epoch": 0.27771312405167714, "grad_norm": 1.6861873623828842, "learning_rate": 9.06859081379849e-06, "loss": 0.545, "step": 9426 }, { "epoch": 0.27774258650914074, "grad_norm": 1.5704492171832778, "learning_rate": 9.068291885555338e-06, "loss": 0.5662, "step": 9427 }, { "epoch": 0.2777720489666043, "grad_norm": 1.810672759738546, "learning_rate": 9.067992914279032e-06, "loss": 0.4947, "step": 9428 }, { "epoch": 0.2778015114240679, "grad_norm": 1.622606645609248, "learning_rate": 9.067693899972739e-06, "loss": 0.5098, "step": 9429 }, { "epoch": 0.27783097388153144, "grad_norm": 1.4636496875383902, "learning_rate": 9.067394842639619e-06, "loss": 0.4143, "step": 9430 }, { "epoch": 0.27786043633899504, "grad_norm": 1.6375545872142803, "learning_rate": 9.067095742282834e-06, "loss": 0.4534, "step": 9431 }, { "epoch": 0.2778898987964586, "grad_norm": 1.518273101298484, "learning_rate": 9.066796598905552e-06, "loss": 0.4705, "step": 9432 }, { "epoch": 0.2779193612539222, "grad_norm": 1.4395300437225658, "learning_rate": 9.066497412510934e-06, "loss": 0.4853, "step": 9433 }, { "epoch": 0.27794882371138574, "grad_norm": 1.6119017686347281, "learning_rate": 9.066198183102146e-06, "loss": 0.5028, "step": 9434 }, { "epoch": 0.27797828616884934, "grad_norm": 1.4089621025169674, "learning_rate": 9.065898910682352e-06, "loss": 0.4481, "step": 9435 }, { "epoch": 0.27800774862631294, "grad_norm": 1.55288839488602, "learning_rate": 9.06559959525472e-06, "loss": 0.3898, "step": 9436 }, { "epoch": 0.2780372110837765, "grad_norm": 1.8235605218091924, "learning_rate": 9.065300236822414e-06, "loss": 0.5935, "step": 9437 }, { "epoch": 0.2780666735412401, "grad_norm": 1.8984041027004963, "learning_rate": 9.0650008353886e-06, "loss": 0.4156, "step": 9438 }, { "epoch": 0.27809613599870364, "grad_norm": 1.7187782940423098, "learning_rate": 9.064701390956447e-06, "loss": 0.558, "step": 9439 }, { "epoch": 0.27812559845616724, "grad_norm": 1.919558210294283, "learning_rate": 9.06440190352912e-06, "loss": 0.5853, "step": 9440 }, { "epoch": 0.2781550609136308, "grad_norm": 1.5711021811970998, "learning_rate": 9.064102373109791e-06, "loss": 0.341, "step": 9441 }, { "epoch": 0.2781845233710944, "grad_norm": 1.5964676717122106, "learning_rate": 9.063802799701624e-06, "loss": 0.3915, "step": 9442 }, { "epoch": 0.27821398582855794, "grad_norm": 1.7168888619310954, "learning_rate": 9.06350318330779e-06, "loss": 0.6451, "step": 9443 }, { "epoch": 0.27824344828602154, "grad_norm": 1.823317029003605, "learning_rate": 9.063203523931457e-06, "loss": 0.5177, "step": 9444 }, { "epoch": 0.2782729107434851, "grad_norm": 1.6137806142636004, "learning_rate": 9.062903821575796e-06, "loss": 0.4373, "step": 9445 }, { "epoch": 0.2783023732009487, "grad_norm": 1.7349473405832705, "learning_rate": 9.062604076243978e-06, "loss": 0.4687, "step": 9446 }, { "epoch": 0.27833183565841224, "grad_norm": 1.7504195568167116, "learning_rate": 9.06230428793917e-06, "loss": 0.4867, "step": 9447 }, { "epoch": 0.27836129811587584, "grad_norm": 1.7624320636725515, "learning_rate": 9.062004456664546e-06, "loss": 0.5189, "step": 9448 }, { "epoch": 0.27839076057333945, "grad_norm": 1.6100263103955588, "learning_rate": 9.061704582423276e-06, "loss": 0.572, "step": 9449 }, { "epoch": 0.278420223030803, "grad_norm": 1.5645103249951773, "learning_rate": 9.061404665218533e-06, "loss": 0.4721, "step": 9450 }, { "epoch": 0.2784496854882666, "grad_norm": 1.5755473222392014, "learning_rate": 9.06110470505349e-06, "loss": 0.3102, "step": 9451 }, { "epoch": 0.27847914794573014, "grad_norm": 1.4911551577471216, "learning_rate": 9.060804701931318e-06, "loss": 0.3363, "step": 9452 }, { "epoch": 0.27850861040319375, "grad_norm": 1.5030889105737673, "learning_rate": 9.060504655855194e-06, "loss": 0.4969, "step": 9453 }, { "epoch": 0.2785380728606573, "grad_norm": 1.4220812576505015, "learning_rate": 9.060204566828286e-06, "loss": 0.4667, "step": 9454 }, { "epoch": 0.2785675353181209, "grad_norm": 1.5222758236889617, "learning_rate": 9.059904434853773e-06, "loss": 0.464, "step": 9455 }, { "epoch": 0.27859699777558444, "grad_norm": 1.5250106650485238, "learning_rate": 9.059604259934828e-06, "loss": 0.5622, "step": 9456 }, { "epoch": 0.27862646023304805, "grad_norm": 1.5449680106682373, "learning_rate": 9.059304042074626e-06, "loss": 0.4666, "step": 9457 }, { "epoch": 0.2786559226905116, "grad_norm": 1.5265972315072434, "learning_rate": 9.059003781276344e-06, "loss": 0.4819, "step": 9458 }, { "epoch": 0.2786853851479752, "grad_norm": 1.5312515939239084, "learning_rate": 9.058703477543156e-06, "loss": 0.4516, "step": 9459 }, { "epoch": 0.27871484760543874, "grad_norm": 1.9029205703806575, "learning_rate": 9.05840313087824e-06, "loss": 0.5534, "step": 9460 }, { "epoch": 0.27874431006290235, "grad_norm": 1.7017429397191373, "learning_rate": 9.058102741284773e-06, "loss": 0.5741, "step": 9461 }, { "epoch": 0.27877377252036595, "grad_norm": 1.596192781684016, "learning_rate": 9.05780230876593e-06, "loss": 0.5531, "step": 9462 }, { "epoch": 0.2788032349778295, "grad_norm": 1.8089351697136176, "learning_rate": 9.057501833324893e-06, "loss": 0.5401, "step": 9463 }, { "epoch": 0.2788326974352931, "grad_norm": 1.5186451880627334, "learning_rate": 9.057201314964837e-06, "loss": 0.5203, "step": 9464 }, { "epoch": 0.27886215989275664, "grad_norm": 1.7603946629373, "learning_rate": 9.056900753688942e-06, "loss": 0.6262, "step": 9465 }, { "epoch": 0.27889162235022025, "grad_norm": 1.4209080539855887, "learning_rate": 9.05660014950039e-06, "loss": 0.4184, "step": 9466 }, { "epoch": 0.2789210848076838, "grad_norm": 1.6185353787089916, "learning_rate": 9.056299502402353e-06, "loss": 0.5132, "step": 9467 }, { "epoch": 0.2789505472651474, "grad_norm": 1.3266075278611507, "learning_rate": 9.05599881239802e-06, "loss": 0.3499, "step": 9468 }, { "epoch": 0.27898000972261094, "grad_norm": 1.5512817726564019, "learning_rate": 9.055698079490566e-06, "loss": 0.4649, "step": 9469 }, { "epoch": 0.27900947218007455, "grad_norm": 1.5897811043861085, "learning_rate": 9.055397303683173e-06, "loss": 0.5966, "step": 9470 }, { "epoch": 0.2790389346375381, "grad_norm": 1.813019314403809, "learning_rate": 9.055096484979024e-06, "loss": 0.459, "step": 9471 }, { "epoch": 0.2790683970950017, "grad_norm": 1.5968509098330606, "learning_rate": 9.0547956233813e-06, "loss": 0.4774, "step": 9472 }, { "epoch": 0.27909785955246524, "grad_norm": 1.4829983981970025, "learning_rate": 9.054494718893186e-06, "loss": 0.4404, "step": 9473 }, { "epoch": 0.27912732200992885, "grad_norm": 1.662818075194562, "learning_rate": 9.054193771517859e-06, "loss": 0.5338, "step": 9474 }, { "epoch": 0.27915678446739245, "grad_norm": 1.7383840524057053, "learning_rate": 9.053892781258507e-06, "loss": 0.5866, "step": 9475 }, { "epoch": 0.279186246924856, "grad_norm": 1.7363554094587894, "learning_rate": 9.053591748118313e-06, "loss": 0.3704, "step": 9476 }, { "epoch": 0.2792157093823196, "grad_norm": 1.4822439173740025, "learning_rate": 9.053290672100462e-06, "loss": 0.3833, "step": 9477 }, { "epoch": 0.27924517183978315, "grad_norm": 1.744222635332479, "learning_rate": 9.052989553208136e-06, "loss": 0.5389, "step": 9478 }, { "epoch": 0.27927463429724675, "grad_norm": 1.725648636946337, "learning_rate": 9.052688391444523e-06, "loss": 0.5763, "step": 9479 }, { "epoch": 0.2793040967547103, "grad_norm": 1.749941874082134, "learning_rate": 9.052387186812806e-06, "loss": 0.6447, "step": 9480 }, { "epoch": 0.2793335592121739, "grad_norm": 1.6113059838508528, "learning_rate": 9.052085939316172e-06, "loss": 0.5781, "step": 9481 }, { "epoch": 0.27936302166963745, "grad_norm": 1.3317441231704061, "learning_rate": 9.05178464895781e-06, "loss": 0.3357, "step": 9482 }, { "epoch": 0.27939248412710105, "grad_norm": 1.6080694215060807, "learning_rate": 9.051483315740902e-06, "loss": 0.6214, "step": 9483 }, { "epoch": 0.2794219465845646, "grad_norm": 1.6869795426649759, "learning_rate": 9.05118193966864e-06, "loss": 0.5866, "step": 9484 }, { "epoch": 0.2794514090420282, "grad_norm": 1.52080443770065, "learning_rate": 9.05088052074421e-06, "loss": 0.5202, "step": 9485 }, { "epoch": 0.27948087149949175, "grad_norm": 1.4370777729623276, "learning_rate": 9.0505790589708e-06, "loss": 0.4174, "step": 9486 }, { "epoch": 0.27951033395695535, "grad_norm": 1.416988736053232, "learning_rate": 9.0502775543516e-06, "loss": 0.4221, "step": 9487 }, { "epoch": 0.27953979641441895, "grad_norm": 1.4759326098485308, "learning_rate": 9.049976006889796e-06, "loss": 0.4286, "step": 9488 }, { "epoch": 0.2795692588718825, "grad_norm": 1.562412489428405, "learning_rate": 9.049674416588582e-06, "loss": 0.375, "step": 9489 }, { "epoch": 0.2795987213293461, "grad_norm": 1.6550986007235473, "learning_rate": 9.049372783451148e-06, "loss": 0.608, "step": 9490 }, { "epoch": 0.27962818378680965, "grad_norm": 1.8095498540203945, "learning_rate": 9.049071107480678e-06, "loss": 0.6318, "step": 9491 }, { "epoch": 0.27965764624427325, "grad_norm": 1.662960266285347, "learning_rate": 9.048769388680372e-06, "loss": 0.4493, "step": 9492 }, { "epoch": 0.2796871087017368, "grad_norm": 1.524395294051185, "learning_rate": 9.048467627053415e-06, "loss": 0.4516, "step": 9493 }, { "epoch": 0.2797165711592004, "grad_norm": 1.610269796057944, "learning_rate": 9.048165822603003e-06, "loss": 0.4985, "step": 9494 }, { "epoch": 0.27974603361666395, "grad_norm": 1.7072124705185179, "learning_rate": 9.047863975332325e-06, "loss": 0.5237, "step": 9495 }, { "epoch": 0.27977549607412755, "grad_norm": 1.5039587228697116, "learning_rate": 9.047562085244577e-06, "loss": 0.46, "step": 9496 }, { "epoch": 0.2798049585315911, "grad_norm": 1.4785692347064743, "learning_rate": 9.04726015234295e-06, "loss": 0.4008, "step": 9497 }, { "epoch": 0.2798344209890547, "grad_norm": 1.6094326342416718, "learning_rate": 9.046958176630639e-06, "loss": 0.6729, "step": 9498 }, { "epoch": 0.27986388344651825, "grad_norm": 1.6057667210594841, "learning_rate": 9.046656158110837e-06, "loss": 0.6247, "step": 9499 }, { "epoch": 0.27989334590398185, "grad_norm": 1.4201053646223392, "learning_rate": 9.04635409678674e-06, "loss": 0.4234, "step": 9500 }, { "epoch": 0.27992280836144545, "grad_norm": 1.47170810712047, "learning_rate": 9.04605199266154e-06, "loss": 0.4124, "step": 9501 }, { "epoch": 0.279952270818909, "grad_norm": 1.7336625628987934, "learning_rate": 9.045749845738438e-06, "loss": 0.5437, "step": 9502 }, { "epoch": 0.2799817332763726, "grad_norm": 1.584563546836577, "learning_rate": 9.045447656020628e-06, "loss": 0.4943, "step": 9503 }, { "epoch": 0.28001119573383615, "grad_norm": 1.4817534609567733, "learning_rate": 9.045145423511304e-06, "loss": 0.3785, "step": 9504 }, { "epoch": 0.28004065819129975, "grad_norm": 1.4672824284297135, "learning_rate": 9.044843148213664e-06, "loss": 0.4637, "step": 9505 }, { "epoch": 0.2800701206487633, "grad_norm": 1.6786457238251706, "learning_rate": 9.044540830130905e-06, "loss": 0.5217, "step": 9506 }, { "epoch": 0.2800995831062269, "grad_norm": 1.535884871230134, "learning_rate": 9.044238469266229e-06, "loss": 0.5162, "step": 9507 }, { "epoch": 0.28012904556369045, "grad_norm": 1.3950081089440418, "learning_rate": 9.043936065622827e-06, "loss": 0.5137, "step": 9508 }, { "epoch": 0.28015850802115405, "grad_norm": 1.6746036864581255, "learning_rate": 9.043633619203905e-06, "loss": 0.4576, "step": 9509 }, { "epoch": 0.2801879704786176, "grad_norm": 1.5328033630368665, "learning_rate": 9.043331130012658e-06, "loss": 0.4167, "step": 9510 }, { "epoch": 0.2802174329360812, "grad_norm": 1.2774170400049039, "learning_rate": 9.043028598052286e-06, "loss": 0.376, "step": 9511 }, { "epoch": 0.28024689539354475, "grad_norm": 1.4023576345435012, "learning_rate": 9.04272602332599e-06, "loss": 0.4585, "step": 9512 }, { "epoch": 0.28027635785100835, "grad_norm": 1.8296739948565368, "learning_rate": 9.042423405836971e-06, "loss": 0.5628, "step": 9513 }, { "epoch": 0.28030582030847195, "grad_norm": 1.7347670674534013, "learning_rate": 9.042120745588428e-06, "loss": 0.6081, "step": 9514 }, { "epoch": 0.2803352827659355, "grad_norm": 1.322577967674035, "learning_rate": 9.041818042583563e-06, "loss": 0.4222, "step": 9515 }, { "epoch": 0.2803647452233991, "grad_norm": 1.4594722227037675, "learning_rate": 9.04151529682558e-06, "loss": 0.4902, "step": 9516 }, { "epoch": 0.28039420768086265, "grad_norm": 1.7882385065160726, "learning_rate": 9.041212508317679e-06, "loss": 0.6186, "step": 9517 }, { "epoch": 0.28042367013832625, "grad_norm": 1.4792099735519626, "learning_rate": 9.040909677063065e-06, "loss": 0.5011, "step": 9518 }, { "epoch": 0.2804531325957898, "grad_norm": 1.6119827613936524, "learning_rate": 9.04060680306494e-06, "loss": 0.4516, "step": 9519 }, { "epoch": 0.2804825950532534, "grad_norm": 1.8047865001573427, "learning_rate": 9.040303886326506e-06, "loss": 0.5833, "step": 9520 }, { "epoch": 0.28051205751071695, "grad_norm": 1.5263915139121937, "learning_rate": 9.040000926850969e-06, "loss": 0.3566, "step": 9521 }, { "epoch": 0.28054151996818055, "grad_norm": 1.5961284849232453, "learning_rate": 9.039697924641534e-06, "loss": 0.5116, "step": 9522 }, { "epoch": 0.2805709824256441, "grad_norm": 1.501800374564372, "learning_rate": 9.039394879701406e-06, "loss": 0.5448, "step": 9523 }, { "epoch": 0.2806004448831077, "grad_norm": 1.4596363533152967, "learning_rate": 9.03909179203379e-06, "loss": 0.5155, "step": 9524 }, { "epoch": 0.28062990734057125, "grad_norm": 1.6872444915664129, "learning_rate": 9.03878866164189e-06, "loss": 0.4418, "step": 9525 }, { "epoch": 0.28065936979803485, "grad_norm": 1.3975775729470923, "learning_rate": 9.038485488528916e-06, "loss": 0.3977, "step": 9526 }, { "epoch": 0.28068883225549845, "grad_norm": 1.6208672061645852, "learning_rate": 9.038182272698073e-06, "loss": 0.4897, "step": 9527 }, { "epoch": 0.280718294712962, "grad_norm": 1.415530190645869, "learning_rate": 9.037879014152568e-06, "loss": 0.5384, "step": 9528 }, { "epoch": 0.2807477571704256, "grad_norm": 1.7123504169436565, "learning_rate": 9.03757571289561e-06, "loss": 0.5286, "step": 9529 }, { "epoch": 0.28077721962788915, "grad_norm": 1.844884762332914, "learning_rate": 9.037272368930407e-06, "loss": 0.4454, "step": 9530 }, { "epoch": 0.28080668208535275, "grad_norm": 1.784714848737298, "learning_rate": 9.036968982260167e-06, "loss": 0.5012, "step": 9531 }, { "epoch": 0.2808361445428163, "grad_norm": 1.7016577815221614, "learning_rate": 9.0366655528881e-06, "loss": 0.5439, "step": 9532 }, { "epoch": 0.2808656070002799, "grad_norm": 1.5701639856077374, "learning_rate": 9.036362080817414e-06, "loss": 0.3786, "step": 9533 }, { "epoch": 0.28089506945774345, "grad_norm": 1.7596709767090328, "learning_rate": 9.03605856605132e-06, "loss": 0.3838, "step": 9534 }, { "epoch": 0.28092453191520705, "grad_norm": 1.7032048065223333, "learning_rate": 9.035755008593028e-06, "loss": 0.5375, "step": 9535 }, { "epoch": 0.2809539943726706, "grad_norm": 1.7146238323087095, "learning_rate": 9.035451408445753e-06, "loss": 0.5966, "step": 9536 }, { "epoch": 0.2809834568301342, "grad_norm": 1.6406166472304207, "learning_rate": 9.0351477656127e-06, "loss": 0.5831, "step": 9537 }, { "epoch": 0.28101291928759775, "grad_norm": 1.6203032178746837, "learning_rate": 9.034844080097083e-06, "loss": 0.4659, "step": 9538 }, { "epoch": 0.28104238174506135, "grad_norm": 1.4638964969622539, "learning_rate": 9.034540351902114e-06, "loss": 0.4231, "step": 9539 }, { "epoch": 0.28107184420252496, "grad_norm": 1.577676488588029, "learning_rate": 9.034236581031011e-06, "loss": 0.5363, "step": 9540 }, { "epoch": 0.2811013066599885, "grad_norm": 1.3345718066770151, "learning_rate": 9.03393276748698e-06, "loss": 0.3987, "step": 9541 }, { "epoch": 0.2811307691174521, "grad_norm": 1.6215402086932686, "learning_rate": 9.033628911273238e-06, "loss": 0.5961, "step": 9542 }, { "epoch": 0.28116023157491565, "grad_norm": 1.7706815356414651, "learning_rate": 9.033325012392997e-06, "loss": 0.5163, "step": 9543 }, { "epoch": 0.28118969403237926, "grad_norm": 1.6056192744793472, "learning_rate": 9.033021070849475e-06, "loss": 0.6014, "step": 9544 }, { "epoch": 0.2812191564898428, "grad_norm": 1.4892212542696257, "learning_rate": 9.032717086645886e-06, "loss": 0.4498, "step": 9545 }, { "epoch": 0.2812486189473064, "grad_norm": 1.4024978324853612, "learning_rate": 9.032413059785443e-06, "loss": 0.4733, "step": 9546 }, { "epoch": 0.28127808140476995, "grad_norm": 1.4742798289957957, "learning_rate": 9.032108990271365e-06, "loss": 0.5619, "step": 9547 }, { "epoch": 0.28130754386223356, "grad_norm": 1.7562575479161322, "learning_rate": 9.031804878106867e-06, "loss": 0.656, "step": 9548 }, { "epoch": 0.2813370063196971, "grad_norm": 1.3765842838599893, "learning_rate": 9.031500723295164e-06, "loss": 0.4575, "step": 9549 }, { "epoch": 0.2813664687771607, "grad_norm": 1.6998269056851032, "learning_rate": 9.031196525839477e-06, "loss": 0.5405, "step": 9550 }, { "epoch": 0.28139593123462425, "grad_norm": 1.4192599757896809, "learning_rate": 9.03089228574302e-06, "loss": 0.526, "step": 9551 }, { "epoch": 0.28142539369208786, "grad_norm": 1.4132764049610698, "learning_rate": 9.030588003009013e-06, "loss": 0.5548, "step": 9552 }, { "epoch": 0.28145485614955146, "grad_norm": 1.574505449879979, "learning_rate": 9.030283677640677e-06, "loss": 0.5001, "step": 9553 }, { "epoch": 0.281484318607015, "grad_norm": 1.6709124016558226, "learning_rate": 9.029979309641226e-06, "loss": 0.3323, "step": 9554 }, { "epoch": 0.2815137810644786, "grad_norm": 1.7783730425257025, "learning_rate": 9.029674899013884e-06, "loss": 0.5576, "step": 9555 }, { "epoch": 0.28154324352194215, "grad_norm": 1.5973997259467327, "learning_rate": 9.029370445761866e-06, "loss": 0.5232, "step": 9556 }, { "epoch": 0.28157270597940576, "grad_norm": 1.6959283691726887, "learning_rate": 9.029065949888398e-06, "loss": 0.479, "step": 9557 }, { "epoch": 0.2816021684368693, "grad_norm": 1.593688564060292, "learning_rate": 9.028761411396697e-06, "loss": 0.4393, "step": 9558 }, { "epoch": 0.2816316308943329, "grad_norm": 1.6074027522741565, "learning_rate": 9.028456830289984e-06, "loss": 0.5336, "step": 9559 }, { "epoch": 0.28166109335179645, "grad_norm": 1.6494313982053843, "learning_rate": 9.028152206571486e-06, "loss": 0.4876, "step": 9560 }, { "epoch": 0.28169055580926006, "grad_norm": 1.5014924418150757, "learning_rate": 9.02784754024442e-06, "loss": 0.5066, "step": 9561 }, { "epoch": 0.2817200182667236, "grad_norm": 1.5983263509622523, "learning_rate": 9.027542831312008e-06, "loss": 0.626, "step": 9562 }, { "epoch": 0.2817494807241872, "grad_norm": 1.524350073586218, "learning_rate": 9.02723807977748e-06, "loss": 0.4286, "step": 9563 }, { "epoch": 0.28177894318165075, "grad_norm": 1.6096209294667183, "learning_rate": 9.02693328564405e-06, "loss": 0.4933, "step": 9564 }, { "epoch": 0.28180840563911436, "grad_norm": 1.8118094911660778, "learning_rate": 9.02662844891495e-06, "loss": 0.6431, "step": 9565 }, { "epoch": 0.28183786809657796, "grad_norm": 1.6942829047945256, "learning_rate": 9.0263235695934e-06, "loss": 0.4212, "step": 9566 }, { "epoch": 0.2818673305540415, "grad_norm": 1.8228651684666317, "learning_rate": 9.026018647682629e-06, "loss": 0.6705, "step": 9567 }, { "epoch": 0.2818967930115051, "grad_norm": 1.5863061213670608, "learning_rate": 9.025713683185856e-06, "loss": 0.4723, "step": 9568 }, { "epoch": 0.28192625546896866, "grad_norm": 1.685059784292781, "learning_rate": 9.025408676106313e-06, "loss": 0.587, "step": 9569 }, { "epoch": 0.28195571792643226, "grad_norm": 1.7190829662604719, "learning_rate": 9.025103626447224e-06, "loss": 0.7256, "step": 9570 }, { "epoch": 0.2819851803838958, "grad_norm": 1.574281945676572, "learning_rate": 9.024798534211815e-06, "loss": 0.4604, "step": 9571 }, { "epoch": 0.2820146428413594, "grad_norm": 1.545990089677842, "learning_rate": 9.024493399403313e-06, "loss": 0.4421, "step": 9572 }, { "epoch": 0.28204410529882296, "grad_norm": 1.5648902326220588, "learning_rate": 9.024188222024945e-06, "loss": 0.5512, "step": 9573 }, { "epoch": 0.28207356775628656, "grad_norm": 1.5457399561690366, "learning_rate": 9.023883002079942e-06, "loss": 0.5278, "step": 9574 }, { "epoch": 0.2821030302137501, "grad_norm": 1.5958057540404345, "learning_rate": 9.023577739571532e-06, "loss": 0.5048, "step": 9575 }, { "epoch": 0.2821324926712137, "grad_norm": 1.6441171308675802, "learning_rate": 9.023272434502942e-06, "loss": 0.3678, "step": 9576 }, { "epoch": 0.28216195512867726, "grad_norm": 1.7454281165244467, "learning_rate": 9.022967086877403e-06, "loss": 0.617, "step": 9577 }, { "epoch": 0.28219141758614086, "grad_norm": 1.5401456110888199, "learning_rate": 9.022661696698142e-06, "loss": 0.4673, "step": 9578 }, { "epoch": 0.28222088004360446, "grad_norm": 1.6483116749169107, "learning_rate": 9.022356263968394e-06, "loss": 0.4584, "step": 9579 }, { "epoch": 0.282250342501068, "grad_norm": 1.8138141734938258, "learning_rate": 9.022050788691386e-06, "loss": 0.4758, "step": 9580 }, { "epoch": 0.2822798049585316, "grad_norm": 1.6586016939386976, "learning_rate": 9.02174527087035e-06, "loss": 0.5384, "step": 9581 }, { "epoch": 0.28230926741599516, "grad_norm": 1.4635537092960063, "learning_rate": 9.021439710508519e-06, "loss": 0.4243, "step": 9582 }, { "epoch": 0.28233872987345876, "grad_norm": 1.3733102224876297, "learning_rate": 9.021134107609126e-06, "loss": 0.3955, "step": 9583 }, { "epoch": 0.2823681923309223, "grad_norm": 1.5094582420322216, "learning_rate": 9.0208284621754e-06, "loss": 0.4457, "step": 9584 }, { "epoch": 0.2823976547883859, "grad_norm": 1.526847869052278, "learning_rate": 9.020522774210577e-06, "loss": 0.4118, "step": 9585 }, { "epoch": 0.28242711724584946, "grad_norm": 1.4821317393954807, "learning_rate": 9.020217043717886e-06, "loss": 0.3397, "step": 9586 }, { "epoch": 0.28245657970331306, "grad_norm": 1.5826044440283054, "learning_rate": 9.019911270700568e-06, "loss": 0.4134, "step": 9587 }, { "epoch": 0.2824860421607766, "grad_norm": 1.4955638897346664, "learning_rate": 9.019605455161853e-06, "loss": 0.421, "step": 9588 }, { "epoch": 0.2825155046182402, "grad_norm": 1.6586459200765324, "learning_rate": 9.019299597104976e-06, "loss": 0.4912, "step": 9589 }, { "epoch": 0.28254496707570376, "grad_norm": 1.502205307029928, "learning_rate": 9.018993696533172e-06, "loss": 0.4545, "step": 9590 }, { "epoch": 0.28257442953316736, "grad_norm": 1.474745255825641, "learning_rate": 9.018687753449678e-06, "loss": 0.4276, "step": 9591 }, { "epoch": 0.28260389199063096, "grad_norm": 1.3765361853444393, "learning_rate": 9.01838176785773e-06, "loss": 0.3821, "step": 9592 }, { "epoch": 0.2826333544480945, "grad_norm": 1.4798519440321116, "learning_rate": 9.018075739760564e-06, "loss": 0.4492, "step": 9593 }, { "epoch": 0.2826628169055581, "grad_norm": 1.580115531243876, "learning_rate": 9.017769669161418e-06, "loss": 0.5793, "step": 9594 }, { "epoch": 0.28269227936302166, "grad_norm": 1.538715326744963, "learning_rate": 9.017463556063528e-06, "loss": 0.5186, "step": 9595 }, { "epoch": 0.28272174182048526, "grad_norm": 1.6283689070800935, "learning_rate": 9.017157400470133e-06, "loss": 0.5759, "step": 9596 }, { "epoch": 0.2827512042779488, "grad_norm": 1.86792436246902, "learning_rate": 9.016851202384473e-06, "loss": 0.5713, "step": 9597 }, { "epoch": 0.2827806667354124, "grad_norm": 1.776478945922326, "learning_rate": 9.016544961809784e-06, "loss": 0.445, "step": 9598 }, { "epoch": 0.28281012919287596, "grad_norm": 1.488766322138599, "learning_rate": 9.016238678749306e-06, "loss": 0.5281, "step": 9599 }, { "epoch": 0.28283959165033956, "grad_norm": 1.6587429000119265, "learning_rate": 9.01593235320628e-06, "loss": 0.6472, "step": 9600 }, { "epoch": 0.2828690541078031, "grad_norm": 1.4876223253096046, "learning_rate": 9.015625985183945e-06, "loss": 0.4572, "step": 9601 }, { "epoch": 0.2828985165652667, "grad_norm": 1.4984534985318319, "learning_rate": 9.015319574685541e-06, "loss": 0.4843, "step": 9602 }, { "epoch": 0.28292797902273026, "grad_norm": 1.709893897082496, "learning_rate": 9.015013121714311e-06, "loss": 0.4848, "step": 9603 }, { "epoch": 0.28295744148019386, "grad_norm": 1.6137472596639653, "learning_rate": 9.014706626273498e-06, "loss": 0.4888, "step": 9604 }, { "epoch": 0.28298690393765746, "grad_norm": 1.5490653526134746, "learning_rate": 9.014400088366339e-06, "loss": 0.4278, "step": 9605 }, { "epoch": 0.283016366395121, "grad_norm": 1.518379090078171, "learning_rate": 9.01409350799608e-06, "loss": 0.4909, "step": 9606 }, { "epoch": 0.2830458288525846, "grad_norm": 1.5335331986887066, "learning_rate": 9.013786885165962e-06, "loss": 0.5482, "step": 9607 }, { "epoch": 0.28307529131004816, "grad_norm": 1.4779800116562427, "learning_rate": 9.013480219879233e-06, "loss": 0.4659, "step": 9608 }, { "epoch": 0.28310475376751176, "grad_norm": 1.570443586219172, "learning_rate": 9.01317351213913e-06, "loss": 0.4327, "step": 9609 }, { "epoch": 0.2831342162249753, "grad_norm": 1.4521554024490515, "learning_rate": 9.012866761948904e-06, "loss": 0.3784, "step": 9610 }, { "epoch": 0.2831636786824389, "grad_norm": 1.4792271600976903, "learning_rate": 9.012559969311794e-06, "loss": 0.4757, "step": 9611 }, { "epoch": 0.28319314113990246, "grad_norm": 1.566341314785027, "learning_rate": 9.012253134231047e-06, "loss": 0.6086, "step": 9612 }, { "epoch": 0.28322260359736606, "grad_norm": 1.7167468333645668, "learning_rate": 9.011946256709911e-06, "loss": 0.5504, "step": 9613 }, { "epoch": 0.2832520660548296, "grad_norm": 1.4209550025931665, "learning_rate": 9.011639336751628e-06, "loss": 0.457, "step": 9614 }, { "epoch": 0.2832815285122932, "grad_norm": 1.7227060853600527, "learning_rate": 9.011332374359449e-06, "loss": 0.6617, "step": 9615 }, { "epoch": 0.28331099096975676, "grad_norm": 1.5436935501497857, "learning_rate": 9.011025369536618e-06, "loss": 0.5025, "step": 9616 }, { "epoch": 0.28334045342722036, "grad_norm": 1.6101059545129057, "learning_rate": 9.010718322286383e-06, "loss": 0.3793, "step": 9617 }, { "epoch": 0.28336991588468396, "grad_norm": 1.4790098647753946, "learning_rate": 9.010411232611991e-06, "loss": 0.5494, "step": 9618 }, { "epoch": 0.2833993783421475, "grad_norm": 1.864203949556065, "learning_rate": 9.010104100516693e-06, "loss": 0.7257, "step": 9619 }, { "epoch": 0.2834288407996111, "grad_norm": 1.8091084765852474, "learning_rate": 9.009796926003733e-06, "loss": 0.6223, "step": 9620 }, { "epoch": 0.28345830325707466, "grad_norm": 1.4530610174490524, "learning_rate": 9.009489709076365e-06, "loss": 0.3845, "step": 9621 }, { "epoch": 0.28348776571453826, "grad_norm": 1.5900361323999936, "learning_rate": 9.009182449737836e-06, "loss": 0.5625, "step": 9622 }, { "epoch": 0.2835172281720018, "grad_norm": 1.6327330115635317, "learning_rate": 9.008875147991399e-06, "loss": 0.5326, "step": 9623 }, { "epoch": 0.2835466906294654, "grad_norm": 1.8115014022000977, "learning_rate": 9.0085678038403e-06, "loss": 0.5016, "step": 9624 }, { "epoch": 0.28357615308692896, "grad_norm": 1.4666242629808686, "learning_rate": 9.008260417287794e-06, "loss": 0.4478, "step": 9625 }, { "epoch": 0.28360561554439256, "grad_norm": 1.7431234165957001, "learning_rate": 9.00795298833713e-06, "loss": 0.5706, "step": 9626 }, { "epoch": 0.2836350780018561, "grad_norm": 1.494596304680779, "learning_rate": 9.00764551699156e-06, "loss": 0.5124, "step": 9627 }, { "epoch": 0.2836645404593197, "grad_norm": 1.3843489086395557, "learning_rate": 9.007338003254338e-06, "loss": 0.3928, "step": 9628 }, { "epoch": 0.2836940029167833, "grad_norm": 1.5493794700595982, "learning_rate": 9.007030447128715e-06, "loss": 0.5709, "step": 9629 }, { "epoch": 0.28372346537424686, "grad_norm": 1.5913695801994536, "learning_rate": 9.006722848617947e-06, "loss": 0.5894, "step": 9630 }, { "epoch": 0.28375292783171047, "grad_norm": 1.6015779522950258, "learning_rate": 9.006415207725286e-06, "loss": 0.556, "step": 9631 }, { "epoch": 0.283782390289174, "grad_norm": 1.4929754846223122, "learning_rate": 9.006107524453984e-06, "loss": 0.3931, "step": 9632 }, { "epoch": 0.2838118527466376, "grad_norm": 1.6616051901523952, "learning_rate": 9.005799798807298e-06, "loss": 0.5325, "step": 9633 }, { "epoch": 0.28384131520410116, "grad_norm": 1.5438490114909384, "learning_rate": 9.005492030788482e-06, "loss": 0.4457, "step": 9634 }, { "epoch": 0.28387077766156477, "grad_norm": 1.4754051960216616, "learning_rate": 9.005184220400793e-06, "loss": 0.3693, "step": 9635 }, { "epoch": 0.2839002401190283, "grad_norm": 1.5896702908053164, "learning_rate": 9.004876367647487e-06, "loss": 0.5351, "step": 9636 }, { "epoch": 0.2839297025764919, "grad_norm": 1.574765779578844, "learning_rate": 9.004568472531817e-06, "loss": 0.5709, "step": 9637 }, { "epoch": 0.28395916503395546, "grad_norm": 1.546701830846229, "learning_rate": 9.004260535057043e-06, "loss": 0.5581, "step": 9638 }, { "epoch": 0.28398862749141907, "grad_norm": 1.6796106168774092, "learning_rate": 9.003952555226423e-06, "loss": 0.4934, "step": 9639 }, { "epoch": 0.2840180899488826, "grad_norm": 1.7212951676315753, "learning_rate": 9.00364453304321e-06, "loss": 0.5966, "step": 9640 }, { "epoch": 0.2840475524063462, "grad_norm": 1.733462182008271, "learning_rate": 9.003336468510668e-06, "loss": 0.6373, "step": 9641 }, { "epoch": 0.2840770148638098, "grad_norm": 1.8534194059422553, "learning_rate": 9.003028361632052e-06, "loss": 0.5505, "step": 9642 }, { "epoch": 0.28410647732127337, "grad_norm": 1.5293576878845139, "learning_rate": 9.002720212410622e-06, "loss": 0.3434, "step": 9643 }, { "epoch": 0.28413593977873697, "grad_norm": 1.8003039325270027, "learning_rate": 9.002412020849636e-06, "loss": 0.6278, "step": 9644 }, { "epoch": 0.2841654022362005, "grad_norm": 1.7798123389185794, "learning_rate": 9.002103786952358e-06, "loss": 0.7038, "step": 9645 }, { "epoch": 0.2841948646936641, "grad_norm": 1.728460359278687, "learning_rate": 9.001795510722045e-06, "loss": 0.5562, "step": 9646 }, { "epoch": 0.28422432715112766, "grad_norm": 1.7908419433670102, "learning_rate": 9.001487192161958e-06, "loss": 0.5954, "step": 9647 }, { "epoch": 0.28425378960859127, "grad_norm": 1.7152253712953178, "learning_rate": 9.001178831275359e-06, "loss": 0.5152, "step": 9648 }, { "epoch": 0.2842832520660548, "grad_norm": 1.4937518495892093, "learning_rate": 9.000870428065508e-06, "loss": 0.5196, "step": 9649 }, { "epoch": 0.2843127145235184, "grad_norm": 1.6330999524421603, "learning_rate": 9.000561982535672e-06, "loss": 0.621, "step": 9650 }, { "epoch": 0.28434217698098196, "grad_norm": 1.448514838437159, "learning_rate": 9.000253494689109e-06, "loss": 0.4429, "step": 9651 }, { "epoch": 0.28437163943844557, "grad_norm": 1.63433766324645, "learning_rate": 8.999944964529084e-06, "loss": 0.4383, "step": 9652 }, { "epoch": 0.2844011018959091, "grad_norm": 1.8102480678366115, "learning_rate": 8.999636392058862e-06, "loss": 0.5981, "step": 9653 }, { "epoch": 0.2844305643533727, "grad_norm": 1.8745212008434977, "learning_rate": 8.999327777281703e-06, "loss": 0.6463, "step": 9654 }, { "epoch": 0.2844600268108363, "grad_norm": 1.6443949590579297, "learning_rate": 8.999019120200873e-06, "loss": 0.6242, "step": 9655 }, { "epoch": 0.28448948926829987, "grad_norm": 1.445428664772038, "learning_rate": 8.998710420819639e-06, "loss": 0.4394, "step": 9656 }, { "epoch": 0.28451895172576347, "grad_norm": 1.5869217659249177, "learning_rate": 8.998401679141264e-06, "loss": 0.5541, "step": 9657 }, { "epoch": 0.284548414183227, "grad_norm": 1.6507102757369327, "learning_rate": 8.998092895169016e-06, "loss": 0.4982, "step": 9658 }, { "epoch": 0.2845778766406906, "grad_norm": 1.5547215390168019, "learning_rate": 8.997784068906157e-06, "loss": 0.408, "step": 9659 }, { "epoch": 0.28460733909815417, "grad_norm": 1.6233150222805166, "learning_rate": 8.997475200355958e-06, "loss": 0.4427, "step": 9660 }, { "epoch": 0.28463680155561777, "grad_norm": 1.592843113262894, "learning_rate": 8.997166289521685e-06, "loss": 0.606, "step": 9661 }, { "epoch": 0.2846662640130813, "grad_norm": 1.531751273724533, "learning_rate": 8.996857336406605e-06, "loss": 0.3872, "step": 9662 }, { "epoch": 0.2846957264705449, "grad_norm": 1.5670636519707333, "learning_rate": 8.996548341013986e-06, "loss": 0.5675, "step": 9663 }, { "epoch": 0.28472518892800847, "grad_norm": 1.8054495037113285, "learning_rate": 8.996239303347097e-06, "loss": 0.7043, "step": 9664 }, { "epoch": 0.28475465138547207, "grad_norm": 1.595414983861719, "learning_rate": 8.995930223409206e-06, "loss": 0.5562, "step": 9665 }, { "epoch": 0.2847841138429356, "grad_norm": 1.4084807608125864, "learning_rate": 8.995621101203584e-06, "loss": 0.4686, "step": 9666 }, { "epoch": 0.2848135763003992, "grad_norm": 1.4244870978275923, "learning_rate": 8.995311936733498e-06, "loss": 0.3814, "step": 9667 }, { "epoch": 0.2848430387578628, "grad_norm": 1.6627217357793775, "learning_rate": 8.995002730002222e-06, "loss": 0.5924, "step": 9668 }, { "epoch": 0.28487250121532637, "grad_norm": 1.4035959922491317, "learning_rate": 8.994693481013022e-06, "loss": 0.3886, "step": 9669 }, { "epoch": 0.28490196367278997, "grad_norm": 1.540605755859836, "learning_rate": 8.994384189769174e-06, "loss": 0.3641, "step": 9670 }, { "epoch": 0.2849314261302535, "grad_norm": 1.6421950565191372, "learning_rate": 8.994074856273946e-06, "loss": 0.4737, "step": 9671 }, { "epoch": 0.2849608885877171, "grad_norm": 1.6730008790602022, "learning_rate": 8.993765480530613e-06, "loss": 0.4759, "step": 9672 }, { "epoch": 0.28499035104518067, "grad_norm": 1.4014921327751408, "learning_rate": 8.993456062542445e-06, "loss": 0.5148, "step": 9673 }, { "epoch": 0.28501981350264427, "grad_norm": 1.6076825897759464, "learning_rate": 8.993146602312714e-06, "loss": 0.4756, "step": 9674 }, { "epoch": 0.2850492759601078, "grad_norm": 1.6020081933780692, "learning_rate": 8.992837099844697e-06, "loss": 0.5557, "step": 9675 }, { "epoch": 0.2850787384175714, "grad_norm": 1.7331181076693567, "learning_rate": 8.992527555141666e-06, "loss": 0.5025, "step": 9676 }, { "epoch": 0.28510820087503497, "grad_norm": 1.4904922329086034, "learning_rate": 8.992217968206895e-06, "loss": 0.5681, "step": 9677 }, { "epoch": 0.28513766333249857, "grad_norm": 1.6993206254298112, "learning_rate": 8.99190833904366e-06, "loss": 0.526, "step": 9678 }, { "epoch": 0.2851671257899621, "grad_norm": 1.568623108341542, "learning_rate": 8.991598667655235e-06, "loss": 0.4517, "step": 9679 }, { "epoch": 0.2851965882474257, "grad_norm": 1.5190998198188117, "learning_rate": 8.991288954044895e-06, "loss": 0.4608, "step": 9680 }, { "epoch": 0.2852260507048893, "grad_norm": 1.4849131812197138, "learning_rate": 8.990979198215916e-06, "loss": 0.5055, "step": 9681 }, { "epoch": 0.28525551316235287, "grad_norm": 1.6852008808526486, "learning_rate": 8.990669400171577e-06, "loss": 0.6109, "step": 9682 }, { "epoch": 0.28528497561981647, "grad_norm": 1.5993456431807853, "learning_rate": 8.990359559915152e-06, "loss": 0.596, "step": 9683 }, { "epoch": 0.28531443807728, "grad_norm": 1.401567226881152, "learning_rate": 8.990049677449922e-06, "loss": 0.3497, "step": 9684 }, { "epoch": 0.2853439005347436, "grad_norm": 1.6716865554520803, "learning_rate": 8.98973975277916e-06, "loss": 0.5241, "step": 9685 }, { "epoch": 0.28537336299220717, "grad_norm": 1.5674081692540023, "learning_rate": 8.989429785906147e-06, "loss": 0.5184, "step": 9686 }, { "epoch": 0.28540282544967077, "grad_norm": 1.4481051797383597, "learning_rate": 8.989119776834164e-06, "loss": 0.4686, "step": 9687 }, { "epoch": 0.2854322879071343, "grad_norm": 1.4940695419465462, "learning_rate": 8.988809725566484e-06, "loss": 0.3921, "step": 9688 }, { "epoch": 0.2854617503645979, "grad_norm": 1.8552292978060874, "learning_rate": 8.988499632106393e-06, "loss": 0.5979, "step": 9689 }, { "epoch": 0.28549121282206147, "grad_norm": 1.5769667519988342, "learning_rate": 8.988189496457167e-06, "loss": 0.6011, "step": 9690 }, { "epoch": 0.28552067527952507, "grad_norm": 1.8598003399604648, "learning_rate": 8.987879318622089e-06, "loss": 0.5629, "step": 9691 }, { "epoch": 0.2855501377369886, "grad_norm": 1.3630634174968044, "learning_rate": 8.987569098604439e-06, "loss": 0.3953, "step": 9692 }, { "epoch": 0.2855796001944522, "grad_norm": 1.6707771705967098, "learning_rate": 8.987258836407498e-06, "loss": 0.4849, "step": 9693 }, { "epoch": 0.2856090626519158, "grad_norm": 1.4980152215035232, "learning_rate": 8.986948532034547e-06, "loss": 0.4338, "step": 9694 }, { "epoch": 0.28563852510937937, "grad_norm": 1.7934633922150098, "learning_rate": 8.98663818548887e-06, "loss": 0.4671, "step": 9695 }, { "epoch": 0.285667987566843, "grad_norm": 1.4916778629616387, "learning_rate": 8.98632779677375e-06, "loss": 0.4601, "step": 9696 }, { "epoch": 0.2856974500243065, "grad_norm": 1.71577948995582, "learning_rate": 8.98601736589247e-06, "loss": 0.69, "step": 9697 }, { "epoch": 0.2857269124817701, "grad_norm": 1.6531340462155348, "learning_rate": 8.985706892848311e-06, "loss": 0.5013, "step": 9698 }, { "epoch": 0.28575637493923367, "grad_norm": 1.7054108353486774, "learning_rate": 8.985396377644562e-06, "loss": 0.5397, "step": 9699 }, { "epoch": 0.2857858373966973, "grad_norm": 1.640423471897125, "learning_rate": 8.985085820284503e-06, "loss": 0.3451, "step": 9700 }, { "epoch": 0.2858152998541608, "grad_norm": 1.623994565789993, "learning_rate": 8.98477522077142e-06, "loss": 0.4626, "step": 9701 }, { "epoch": 0.2858447623116244, "grad_norm": 1.7535668585042268, "learning_rate": 8.984464579108601e-06, "loss": 0.3996, "step": 9702 }, { "epoch": 0.28587422476908797, "grad_norm": 1.5866111332673019, "learning_rate": 8.984153895299328e-06, "loss": 0.565, "step": 9703 }, { "epoch": 0.2859036872265516, "grad_norm": 1.386685842573191, "learning_rate": 8.98384316934689e-06, "loss": 0.4325, "step": 9704 }, { "epoch": 0.2859331496840151, "grad_norm": 1.3958034410395557, "learning_rate": 8.983532401254573e-06, "loss": 0.4387, "step": 9705 }, { "epoch": 0.2859626121414787, "grad_norm": 1.7391347319739072, "learning_rate": 8.983221591025665e-06, "loss": 0.4963, "step": 9706 }, { "epoch": 0.2859920745989423, "grad_norm": 1.637322203265375, "learning_rate": 8.982910738663453e-06, "loss": 0.5557, "step": 9707 }, { "epoch": 0.2860215370564059, "grad_norm": 1.6218788377746292, "learning_rate": 8.982599844171224e-06, "loss": 0.4646, "step": 9708 }, { "epoch": 0.2860509995138695, "grad_norm": 1.536515074322491, "learning_rate": 8.982288907552268e-06, "loss": 0.4093, "step": 9709 }, { "epoch": 0.286080461971333, "grad_norm": 1.8858621508654851, "learning_rate": 8.981977928809874e-06, "loss": 0.547, "step": 9710 }, { "epoch": 0.2861099244287966, "grad_norm": 1.566231438448788, "learning_rate": 8.981666907947331e-06, "loss": 0.5471, "step": 9711 }, { "epoch": 0.28613938688626017, "grad_norm": 1.4630837637949745, "learning_rate": 8.981355844967929e-06, "loss": 0.5632, "step": 9712 }, { "epoch": 0.2861688493437238, "grad_norm": 1.496292665477947, "learning_rate": 8.981044739874958e-06, "loss": 0.4434, "step": 9713 }, { "epoch": 0.2861983118011873, "grad_norm": 1.6031812457290229, "learning_rate": 8.980733592671708e-06, "loss": 0.5033, "step": 9714 }, { "epoch": 0.2862277742586509, "grad_norm": 1.6725582088343915, "learning_rate": 8.980422403361474e-06, "loss": 0.5928, "step": 9715 }, { "epoch": 0.28625723671611447, "grad_norm": 1.4237066045761146, "learning_rate": 8.980111171947542e-06, "loss": 0.5434, "step": 9716 }, { "epoch": 0.2862866991735781, "grad_norm": 1.5098427191609969, "learning_rate": 8.979799898433208e-06, "loss": 0.4826, "step": 9717 }, { "epoch": 0.2863161616310416, "grad_norm": 1.4693734860953405, "learning_rate": 8.979488582821763e-06, "loss": 0.4028, "step": 9718 }, { "epoch": 0.2863456240885052, "grad_norm": 1.456997512423423, "learning_rate": 8.9791772251165e-06, "loss": 0.3483, "step": 9719 }, { "epoch": 0.2863750865459688, "grad_norm": 1.454552282076233, "learning_rate": 8.978865825320715e-06, "loss": 0.4253, "step": 9720 }, { "epoch": 0.2864045490034324, "grad_norm": 1.5135863808782661, "learning_rate": 8.978554383437698e-06, "loss": 0.5004, "step": 9721 }, { "epoch": 0.286434011460896, "grad_norm": 1.5120750576707966, "learning_rate": 8.978242899470748e-06, "loss": 0.6082, "step": 9722 }, { "epoch": 0.2864634739183595, "grad_norm": 1.7141981459701734, "learning_rate": 8.977931373423154e-06, "loss": 0.4931, "step": 9723 }, { "epoch": 0.2864929363758231, "grad_norm": 1.3252346159179131, "learning_rate": 8.977619805298215e-06, "loss": 0.3797, "step": 9724 }, { "epoch": 0.2865223988332867, "grad_norm": 1.409781531816485, "learning_rate": 8.977308195099228e-06, "loss": 0.359, "step": 9725 }, { "epoch": 0.2865518612907503, "grad_norm": 1.7242621764664419, "learning_rate": 8.976996542829485e-06, "loss": 0.4222, "step": 9726 }, { "epoch": 0.2865813237482138, "grad_norm": 1.5763931023798987, "learning_rate": 8.976684848492285e-06, "loss": 0.5677, "step": 9727 }, { "epoch": 0.2866107862056774, "grad_norm": 1.509150540705215, "learning_rate": 8.976373112090925e-06, "loss": 0.5768, "step": 9728 }, { "epoch": 0.286640248663141, "grad_norm": 1.4746445684867528, "learning_rate": 8.9760613336287e-06, "loss": 0.3596, "step": 9729 }, { "epoch": 0.2866697111206046, "grad_norm": 1.5127055596395587, "learning_rate": 8.975749513108914e-06, "loss": 0.6001, "step": 9730 }, { "epoch": 0.2866991735780681, "grad_norm": 1.4654988243229912, "learning_rate": 8.975437650534858e-06, "loss": 0.424, "step": 9731 }, { "epoch": 0.2867286360355317, "grad_norm": 1.600932415489403, "learning_rate": 8.975125745909836e-06, "loss": 0.4842, "step": 9732 }, { "epoch": 0.28675809849299533, "grad_norm": 1.4898777849132425, "learning_rate": 8.974813799237143e-06, "loss": 0.5485, "step": 9733 }, { "epoch": 0.2867875609504589, "grad_norm": 1.7682251198891303, "learning_rate": 8.974501810520082e-06, "loss": 0.4737, "step": 9734 }, { "epoch": 0.2868170234079225, "grad_norm": 1.5577866315575921, "learning_rate": 8.974189779761953e-06, "loss": 0.5226, "step": 9735 }, { "epoch": 0.286846485865386, "grad_norm": 1.4223309666701835, "learning_rate": 8.973877706966055e-06, "loss": 0.4049, "step": 9736 }, { "epoch": 0.2868759483228496, "grad_norm": 1.507044365283294, "learning_rate": 8.973565592135691e-06, "loss": 0.5346, "step": 9737 }, { "epoch": 0.2869054107803132, "grad_norm": 1.6291055741692613, "learning_rate": 8.973253435274159e-06, "loss": 0.6001, "step": 9738 }, { "epoch": 0.2869348732377768, "grad_norm": 1.6320685995419586, "learning_rate": 8.972941236384765e-06, "loss": 0.374, "step": 9739 }, { "epoch": 0.2869643356952403, "grad_norm": 1.574796839893736, "learning_rate": 8.972628995470809e-06, "loss": 0.4392, "step": 9740 }, { "epoch": 0.2869937981527039, "grad_norm": 1.5685572605581408, "learning_rate": 8.972316712535592e-06, "loss": 0.5429, "step": 9741 }, { "epoch": 0.2870232606101675, "grad_norm": 1.7369849965315374, "learning_rate": 8.972004387582421e-06, "loss": 0.5866, "step": 9742 }, { "epoch": 0.2870527230676311, "grad_norm": 1.842751987239473, "learning_rate": 8.971692020614599e-06, "loss": 0.4683, "step": 9743 }, { "epoch": 0.2870821855250946, "grad_norm": 1.6455921164351404, "learning_rate": 8.971379611635428e-06, "loss": 0.4493, "step": 9744 }, { "epoch": 0.2871116479825582, "grad_norm": 1.7176763979182912, "learning_rate": 8.971067160648216e-06, "loss": 0.527, "step": 9745 }, { "epoch": 0.28714111044002183, "grad_norm": 1.5103140875406866, "learning_rate": 8.970754667656263e-06, "loss": 0.5191, "step": 9746 }, { "epoch": 0.2871705728974854, "grad_norm": 1.4965656122555508, "learning_rate": 8.970442132662877e-06, "loss": 0.3555, "step": 9747 }, { "epoch": 0.287200035354949, "grad_norm": 1.8200718853279694, "learning_rate": 8.970129555671366e-06, "loss": 0.6027, "step": 9748 }, { "epoch": 0.2872294978124125, "grad_norm": 1.6224115682441298, "learning_rate": 8.969816936685034e-06, "loss": 0.4873, "step": 9749 }, { "epoch": 0.28725896026987613, "grad_norm": 1.5241095430360432, "learning_rate": 8.969504275707188e-06, "loss": 0.4734, "step": 9750 }, { "epoch": 0.2872884227273397, "grad_norm": 1.6417593226815408, "learning_rate": 8.969191572741137e-06, "loss": 0.5179, "step": 9751 }, { "epoch": 0.2873178851848033, "grad_norm": 1.5756789460606582, "learning_rate": 8.968878827790185e-06, "loss": 0.492, "step": 9752 }, { "epoch": 0.2873473476422668, "grad_norm": 1.6118172015711223, "learning_rate": 8.968566040857642e-06, "loss": 0.3889, "step": 9753 }, { "epoch": 0.28737681009973043, "grad_norm": 1.6598167495083935, "learning_rate": 8.968253211946818e-06, "loss": 0.5325, "step": 9754 }, { "epoch": 0.287406272557194, "grad_norm": 1.5735684693654823, "learning_rate": 8.967940341061022e-06, "loss": 0.4791, "step": 9755 }, { "epoch": 0.2874357350146576, "grad_norm": 1.551482334908228, "learning_rate": 8.96762742820356e-06, "loss": 0.5363, "step": 9756 }, { "epoch": 0.2874651974721211, "grad_norm": 1.552426846465632, "learning_rate": 8.967314473377747e-06, "loss": 0.3783, "step": 9757 }, { "epoch": 0.28749465992958473, "grad_norm": 1.498549700330238, "learning_rate": 8.967001476586888e-06, "loss": 0.5747, "step": 9758 }, { "epoch": 0.28752412238704833, "grad_norm": 1.6225335862392842, "learning_rate": 8.966688437834297e-06, "loss": 0.505, "step": 9759 }, { "epoch": 0.2875535848445119, "grad_norm": 1.5106727751086404, "learning_rate": 8.966375357123286e-06, "loss": 0.459, "step": 9760 }, { "epoch": 0.2875830473019755, "grad_norm": 1.8017910507786232, "learning_rate": 8.966062234457164e-06, "loss": 0.5919, "step": 9761 }, { "epoch": 0.28761250975943903, "grad_norm": 1.4572372255432728, "learning_rate": 8.965749069839244e-06, "loss": 0.432, "step": 9762 }, { "epoch": 0.28764197221690263, "grad_norm": 1.6608179712776374, "learning_rate": 8.96543586327284e-06, "loss": 0.5138, "step": 9763 }, { "epoch": 0.2876714346743662, "grad_norm": 1.348314019631399, "learning_rate": 8.965122614761263e-06, "loss": 0.371, "step": 9764 }, { "epoch": 0.2877008971318298, "grad_norm": 1.587625196123893, "learning_rate": 8.964809324307828e-06, "loss": 0.5219, "step": 9765 }, { "epoch": 0.2877303595892933, "grad_norm": 1.616707945746912, "learning_rate": 8.964495991915849e-06, "loss": 0.535, "step": 9766 }, { "epoch": 0.28775982204675693, "grad_norm": 1.4277715504014488, "learning_rate": 8.96418261758864e-06, "loss": 0.3179, "step": 9767 }, { "epoch": 0.2877892845042205, "grad_norm": 1.4602703008324749, "learning_rate": 8.963869201329514e-06, "loss": 0.4827, "step": 9768 }, { "epoch": 0.2878187469616841, "grad_norm": 1.5954559798160006, "learning_rate": 8.963555743141787e-06, "loss": 0.524, "step": 9769 }, { "epoch": 0.2878482094191476, "grad_norm": 1.5369351145089187, "learning_rate": 8.963242243028776e-06, "loss": 0.4654, "step": 9770 }, { "epoch": 0.28787767187661123, "grad_norm": 1.6871341393620536, "learning_rate": 8.962928700993796e-06, "loss": 0.4769, "step": 9771 }, { "epoch": 0.28790713433407483, "grad_norm": 1.357399186689644, "learning_rate": 8.962615117040166e-06, "loss": 0.4295, "step": 9772 }, { "epoch": 0.2879365967915384, "grad_norm": 1.4268680097852744, "learning_rate": 8.962301491171199e-06, "loss": 0.4095, "step": 9773 }, { "epoch": 0.287966059249002, "grad_norm": 1.610308917985917, "learning_rate": 8.961987823390214e-06, "loss": 0.5299, "step": 9774 }, { "epoch": 0.28799552170646553, "grad_norm": 1.4975439634800598, "learning_rate": 8.96167411370053e-06, "loss": 0.6368, "step": 9775 }, { "epoch": 0.28802498416392913, "grad_norm": 1.6106527085992506, "learning_rate": 8.961360362105464e-06, "loss": 0.5481, "step": 9776 }, { "epoch": 0.2880544466213927, "grad_norm": 1.70188840138954, "learning_rate": 8.961046568608339e-06, "loss": 0.4927, "step": 9777 }, { "epoch": 0.2880839090788563, "grad_norm": 1.4295506121859236, "learning_rate": 8.960732733212466e-06, "loss": 0.4981, "step": 9778 }, { "epoch": 0.28811337153631983, "grad_norm": 1.3870697172475344, "learning_rate": 8.96041885592117e-06, "loss": 0.3218, "step": 9779 }, { "epoch": 0.28814283399378343, "grad_norm": 1.558783678334355, "learning_rate": 8.960104936737772e-06, "loss": 0.3603, "step": 9780 }, { "epoch": 0.288172296451247, "grad_norm": 1.88073094948365, "learning_rate": 8.95979097566559e-06, "loss": 0.5124, "step": 9781 }, { "epoch": 0.2882017589087106, "grad_norm": 1.6528014233607518, "learning_rate": 8.959476972707946e-06, "loss": 0.4767, "step": 9782 }, { "epoch": 0.28823122136617413, "grad_norm": 1.552749474864854, "learning_rate": 8.959162927868161e-06, "loss": 0.5823, "step": 9783 }, { "epoch": 0.28826068382363773, "grad_norm": 1.361560305793835, "learning_rate": 8.958848841149557e-06, "loss": 0.4243, "step": 9784 }, { "epoch": 0.28829014628110133, "grad_norm": 1.3870129754977618, "learning_rate": 8.958534712555458e-06, "loss": 0.4471, "step": 9785 }, { "epoch": 0.2883196087385649, "grad_norm": 1.547442994932587, "learning_rate": 8.958220542089182e-06, "loss": 0.4421, "step": 9786 }, { "epoch": 0.2883490711960285, "grad_norm": 1.5903722245669827, "learning_rate": 8.957906329754057e-06, "loss": 0.4501, "step": 9787 }, { "epoch": 0.28837853365349203, "grad_norm": 1.6584730219918875, "learning_rate": 8.957592075553406e-06, "loss": 0.6381, "step": 9788 }, { "epoch": 0.28840799611095563, "grad_norm": 1.5510140838460227, "learning_rate": 8.957277779490552e-06, "loss": 0.4798, "step": 9789 }, { "epoch": 0.2884374585684192, "grad_norm": 1.7269928819724132, "learning_rate": 8.956963441568819e-06, "loss": 0.6116, "step": 9790 }, { "epoch": 0.2884669210258828, "grad_norm": 1.6485297409130242, "learning_rate": 8.956649061791532e-06, "loss": 0.5427, "step": 9791 }, { "epoch": 0.28849638348334633, "grad_norm": 1.5006990221143461, "learning_rate": 8.956334640162018e-06, "loss": 0.4653, "step": 9792 }, { "epoch": 0.28852584594080993, "grad_norm": 1.6655673738264374, "learning_rate": 8.956020176683602e-06, "loss": 0.4876, "step": 9793 }, { "epoch": 0.2885553083982735, "grad_norm": 1.4864799996233256, "learning_rate": 8.95570567135961e-06, "loss": 0.5997, "step": 9794 }, { "epoch": 0.2885847708557371, "grad_norm": 1.5968357874874752, "learning_rate": 8.95539112419337e-06, "loss": 0.5089, "step": 9795 }, { "epoch": 0.28861423331320063, "grad_norm": 1.5253974867018496, "learning_rate": 8.955076535188206e-06, "loss": 0.4514, "step": 9796 }, { "epoch": 0.28864369577066423, "grad_norm": 1.77471895879835, "learning_rate": 8.954761904347448e-06, "loss": 0.549, "step": 9797 }, { "epoch": 0.28867315822812784, "grad_norm": 1.6239582331372049, "learning_rate": 8.954447231674426e-06, "loss": 0.4464, "step": 9798 }, { "epoch": 0.2887026206855914, "grad_norm": 1.5629604466942058, "learning_rate": 8.954132517172465e-06, "loss": 0.5251, "step": 9799 }, { "epoch": 0.288732083143055, "grad_norm": 1.6427789306344804, "learning_rate": 8.953817760844896e-06, "loss": 0.5749, "step": 9800 }, { "epoch": 0.28876154560051853, "grad_norm": 1.8198631442665478, "learning_rate": 8.953502962695046e-06, "loss": 0.5471, "step": 9801 }, { "epoch": 0.28879100805798213, "grad_norm": 1.67174397917882, "learning_rate": 8.953188122726248e-06, "loss": 0.5458, "step": 9802 }, { "epoch": 0.2888204705154457, "grad_norm": 1.3533632392171324, "learning_rate": 8.95287324094183e-06, "loss": 0.3591, "step": 9803 }, { "epoch": 0.2888499329729093, "grad_norm": 1.6779134202305046, "learning_rate": 8.952558317345124e-06, "loss": 0.4097, "step": 9804 }, { "epoch": 0.28887939543037283, "grad_norm": 1.5779714232293789, "learning_rate": 8.95224335193946e-06, "loss": 0.4081, "step": 9805 }, { "epoch": 0.28890885788783643, "grad_norm": 1.4799626899174996, "learning_rate": 8.95192834472817e-06, "loss": 0.391, "step": 9806 }, { "epoch": 0.2889383203453, "grad_norm": 1.5042488526552664, "learning_rate": 8.951613295714589e-06, "loss": 0.498, "step": 9807 }, { "epoch": 0.2889677828027636, "grad_norm": 1.7071743809932836, "learning_rate": 8.951298204902045e-06, "loss": 0.4119, "step": 9808 }, { "epoch": 0.28899724526022713, "grad_norm": 1.6423886943713737, "learning_rate": 8.950983072293872e-06, "loss": 0.5812, "step": 9809 }, { "epoch": 0.28902670771769073, "grad_norm": 1.393489311889225, "learning_rate": 8.950667897893405e-06, "loss": 0.497, "step": 9810 }, { "epoch": 0.28905617017515434, "grad_norm": 1.5040815430945038, "learning_rate": 8.950352681703978e-06, "loss": 0.459, "step": 9811 }, { "epoch": 0.2890856326326179, "grad_norm": 1.3631858901017344, "learning_rate": 8.950037423728923e-06, "loss": 0.3553, "step": 9812 }, { "epoch": 0.2891150950900815, "grad_norm": 1.5796136286974718, "learning_rate": 8.949722123971576e-06, "loss": 0.5071, "step": 9813 }, { "epoch": 0.28914455754754503, "grad_norm": 1.5427635950758996, "learning_rate": 8.949406782435273e-06, "loss": 0.48, "step": 9814 }, { "epoch": 0.28917402000500864, "grad_norm": 1.4748972932851059, "learning_rate": 8.949091399123347e-06, "loss": 0.3852, "step": 9815 }, { "epoch": 0.2892034824624722, "grad_norm": 1.4592239546381789, "learning_rate": 8.948775974039137e-06, "loss": 0.3919, "step": 9816 }, { "epoch": 0.2892329449199358, "grad_norm": 1.7316719954810724, "learning_rate": 8.948460507185978e-06, "loss": 0.6307, "step": 9817 }, { "epoch": 0.28926240737739933, "grad_norm": 1.7659719027341885, "learning_rate": 8.948144998567206e-06, "loss": 0.5279, "step": 9818 }, { "epoch": 0.28929186983486294, "grad_norm": 1.3711950118044243, "learning_rate": 8.947829448186161e-06, "loss": 0.4084, "step": 9819 }, { "epoch": 0.2893213322923265, "grad_norm": 1.362124846324801, "learning_rate": 8.947513856046178e-06, "loss": 0.3814, "step": 9820 }, { "epoch": 0.2893507947497901, "grad_norm": 1.5512427449499147, "learning_rate": 8.947198222150597e-06, "loss": 0.5462, "step": 9821 }, { "epoch": 0.28938025720725363, "grad_norm": 1.490328834100322, "learning_rate": 8.946882546502757e-06, "loss": 0.4702, "step": 9822 }, { "epoch": 0.28940971966471724, "grad_norm": 1.4905855258368, "learning_rate": 8.946566829105995e-06, "loss": 0.3583, "step": 9823 }, { "epoch": 0.28943918212218084, "grad_norm": 1.7710679924711268, "learning_rate": 8.946251069963653e-06, "loss": 0.6391, "step": 9824 }, { "epoch": 0.2894686445796444, "grad_norm": 1.5064299266344463, "learning_rate": 8.94593526907907e-06, "loss": 0.5535, "step": 9825 }, { "epoch": 0.289498107037108, "grad_norm": 1.643624033999112, "learning_rate": 8.945619426455585e-06, "loss": 0.431, "step": 9826 }, { "epoch": 0.28952756949457154, "grad_norm": 1.4625070628709476, "learning_rate": 8.94530354209654e-06, "loss": 0.5082, "step": 9827 }, { "epoch": 0.28955703195203514, "grad_norm": 1.6858064533907138, "learning_rate": 8.944987616005276e-06, "loss": 0.5049, "step": 9828 }, { "epoch": 0.2895864944094987, "grad_norm": 1.6651170122068804, "learning_rate": 8.944671648185137e-06, "loss": 0.7052, "step": 9829 }, { "epoch": 0.2896159568669623, "grad_norm": 1.4998019393885809, "learning_rate": 8.944355638639463e-06, "loss": 0.4422, "step": 9830 }, { "epoch": 0.28964541932442583, "grad_norm": 1.6012718152574847, "learning_rate": 8.944039587371598e-06, "loss": 0.5545, "step": 9831 }, { "epoch": 0.28967488178188944, "grad_norm": 1.5391894354552809, "learning_rate": 8.943723494384883e-06, "loss": 0.5091, "step": 9832 }, { "epoch": 0.289704344239353, "grad_norm": 1.6002629923239955, "learning_rate": 8.943407359682664e-06, "loss": 0.5821, "step": 9833 }, { "epoch": 0.2897338066968166, "grad_norm": 1.7048785558552713, "learning_rate": 8.943091183268281e-06, "loss": 0.7092, "step": 9834 }, { "epoch": 0.28976326915428013, "grad_norm": 1.4928506351695277, "learning_rate": 8.942774965145082e-06, "loss": 0.4738, "step": 9835 }, { "epoch": 0.28979273161174374, "grad_norm": 1.4547961496542008, "learning_rate": 8.942458705316412e-06, "loss": 0.4408, "step": 9836 }, { "epoch": 0.28982219406920734, "grad_norm": 1.669884846677804, "learning_rate": 8.942142403785617e-06, "loss": 0.4701, "step": 9837 }, { "epoch": 0.2898516565266709, "grad_norm": 1.8534194998759277, "learning_rate": 8.941826060556038e-06, "loss": 0.6296, "step": 9838 }, { "epoch": 0.2898811189841345, "grad_norm": 1.4373090556994015, "learning_rate": 8.941509675631026e-06, "loss": 0.3475, "step": 9839 }, { "epoch": 0.28991058144159804, "grad_norm": 1.5774343185501116, "learning_rate": 8.941193249013927e-06, "loss": 0.5305, "step": 9840 }, { "epoch": 0.28994004389906164, "grad_norm": 1.5388431166470409, "learning_rate": 8.940876780708083e-06, "loss": 0.5691, "step": 9841 }, { "epoch": 0.2899695063565252, "grad_norm": 1.5269950867180662, "learning_rate": 8.940560270716849e-06, "loss": 0.4997, "step": 9842 }, { "epoch": 0.2899989688139888, "grad_norm": 1.5826170917505367, "learning_rate": 8.940243719043569e-06, "loss": 0.5707, "step": 9843 }, { "epoch": 0.29002843127145234, "grad_norm": 1.5888549091721151, "learning_rate": 8.93992712569159e-06, "loss": 0.4179, "step": 9844 }, { "epoch": 0.29005789372891594, "grad_norm": 1.6243612715908575, "learning_rate": 8.939610490664264e-06, "loss": 0.535, "step": 9845 }, { "epoch": 0.2900873561863795, "grad_norm": 1.5781237195249853, "learning_rate": 8.939293813964937e-06, "loss": 0.481, "step": 9846 }, { "epoch": 0.2901168186438431, "grad_norm": 1.4107026602274446, "learning_rate": 8.938977095596964e-06, "loss": 0.4563, "step": 9847 }, { "epoch": 0.29014628110130664, "grad_norm": 1.5373507404867703, "learning_rate": 8.93866033556369e-06, "loss": 0.477, "step": 9848 }, { "epoch": 0.29017574355877024, "grad_norm": 1.569826243029149, "learning_rate": 8.938343533868466e-06, "loss": 0.4624, "step": 9849 }, { "epoch": 0.29020520601623384, "grad_norm": 1.3939314829667067, "learning_rate": 8.938026690514644e-06, "loss": 0.4257, "step": 9850 }, { "epoch": 0.2902346684736974, "grad_norm": 1.6612507548965216, "learning_rate": 8.937709805505579e-06, "loss": 0.6595, "step": 9851 }, { "epoch": 0.290264130931161, "grad_norm": 1.4667401804292228, "learning_rate": 8.937392878844618e-06, "loss": 0.4478, "step": 9852 }, { "epoch": 0.29029359338862454, "grad_norm": 1.712041898659365, "learning_rate": 8.937075910535114e-06, "loss": 0.627, "step": 9853 }, { "epoch": 0.29032305584608814, "grad_norm": 1.5942096083822836, "learning_rate": 8.936758900580423e-06, "loss": 0.474, "step": 9854 }, { "epoch": 0.2903525183035517, "grad_norm": 1.6043607906025308, "learning_rate": 8.936441848983895e-06, "loss": 0.4538, "step": 9855 }, { "epoch": 0.2903819807610153, "grad_norm": 1.6375454309114, "learning_rate": 8.936124755748882e-06, "loss": 0.4797, "step": 9856 }, { "epoch": 0.29041144321847884, "grad_norm": 1.5943677713192077, "learning_rate": 8.935807620878746e-06, "loss": 0.499, "step": 9857 }, { "epoch": 0.29044090567594244, "grad_norm": 1.4823341471508011, "learning_rate": 8.935490444376834e-06, "loss": 0.3177, "step": 9858 }, { "epoch": 0.290470368133406, "grad_norm": 1.4284119775614137, "learning_rate": 8.935173226246503e-06, "loss": 0.4105, "step": 9859 }, { "epoch": 0.2904998305908696, "grad_norm": 1.8317895846260261, "learning_rate": 8.93485596649111e-06, "loss": 0.3994, "step": 9860 }, { "epoch": 0.29052929304833314, "grad_norm": 1.6370754015316942, "learning_rate": 8.93453866511401e-06, "loss": 0.5097, "step": 9861 }, { "epoch": 0.29055875550579674, "grad_norm": 1.7474834149549376, "learning_rate": 8.934221322118557e-06, "loss": 0.4617, "step": 9862 }, { "epoch": 0.29058821796326034, "grad_norm": 1.5652828126358542, "learning_rate": 8.933903937508112e-06, "loss": 0.5378, "step": 9863 }, { "epoch": 0.2906176804207239, "grad_norm": 1.658543904704813, "learning_rate": 8.933586511286028e-06, "loss": 0.5793, "step": 9864 }, { "epoch": 0.2906471428781875, "grad_norm": 1.6166024752131898, "learning_rate": 8.933269043455667e-06, "loss": 0.4173, "step": 9865 }, { "epoch": 0.29067660533565104, "grad_norm": 1.3253287068565074, "learning_rate": 8.932951534020385e-06, "loss": 0.3279, "step": 9866 }, { "epoch": 0.29070606779311464, "grad_norm": 1.5889634535945787, "learning_rate": 8.932633982983538e-06, "loss": 0.6345, "step": 9867 }, { "epoch": 0.2907355302505782, "grad_norm": 1.6898334970470277, "learning_rate": 8.932316390348488e-06, "loss": 0.5072, "step": 9868 }, { "epoch": 0.2907649927080418, "grad_norm": 1.5315266334559494, "learning_rate": 8.931998756118593e-06, "loss": 0.5212, "step": 9869 }, { "epoch": 0.29079445516550534, "grad_norm": 1.6169141002239136, "learning_rate": 8.931681080297215e-06, "loss": 0.443, "step": 9870 }, { "epoch": 0.29082391762296894, "grad_norm": 1.551557128693027, "learning_rate": 8.931363362887713e-06, "loss": 0.4633, "step": 9871 }, { "epoch": 0.2908533800804325, "grad_norm": 1.6928341843046395, "learning_rate": 8.931045603893445e-06, "loss": 0.6071, "step": 9872 }, { "epoch": 0.2908828425378961, "grad_norm": 1.387541990837425, "learning_rate": 8.930727803317776e-06, "loss": 0.4231, "step": 9873 }, { "epoch": 0.29091230499535964, "grad_norm": 1.6358788447091872, "learning_rate": 8.930409961164069e-06, "loss": 0.5088, "step": 9874 }, { "epoch": 0.29094176745282324, "grad_norm": 1.7114079965727484, "learning_rate": 8.930092077435678e-06, "loss": 0.57, "step": 9875 }, { "epoch": 0.29097122991028684, "grad_norm": 1.5609866289313936, "learning_rate": 8.929774152135975e-06, "loss": 0.5165, "step": 9876 }, { "epoch": 0.2910006923677504, "grad_norm": 1.5240774170556832, "learning_rate": 8.929456185268318e-06, "loss": 0.4392, "step": 9877 }, { "epoch": 0.291030154825214, "grad_norm": 1.6225119599908915, "learning_rate": 8.929138176836069e-06, "loss": 0.492, "step": 9878 }, { "epoch": 0.29105961728267754, "grad_norm": 1.619427096681148, "learning_rate": 8.928820126842594e-06, "loss": 0.4377, "step": 9879 }, { "epoch": 0.29108907974014114, "grad_norm": 1.5379531626091412, "learning_rate": 8.928502035291258e-06, "loss": 0.4644, "step": 9880 }, { "epoch": 0.2911185421976047, "grad_norm": 1.5466599205321592, "learning_rate": 8.928183902185426e-06, "loss": 0.5044, "step": 9881 }, { "epoch": 0.2911480046550683, "grad_norm": 1.5279909810611938, "learning_rate": 8.92786572752846e-06, "loss": 0.4846, "step": 9882 }, { "epoch": 0.29117746711253184, "grad_norm": 1.5067783476214103, "learning_rate": 8.927547511323728e-06, "loss": 0.5452, "step": 9883 }, { "epoch": 0.29120692956999544, "grad_norm": 1.5163953765967935, "learning_rate": 8.927229253574594e-06, "loss": 0.4872, "step": 9884 }, { "epoch": 0.291236392027459, "grad_norm": 1.4673535678276994, "learning_rate": 8.926910954284428e-06, "loss": 0.4152, "step": 9885 }, { "epoch": 0.2912658544849226, "grad_norm": 1.6290572155640257, "learning_rate": 8.926592613456594e-06, "loss": 0.5764, "step": 9886 }, { "epoch": 0.29129531694238614, "grad_norm": 1.4272588990867134, "learning_rate": 8.92627423109446e-06, "loss": 0.3696, "step": 9887 }, { "epoch": 0.29132477939984974, "grad_norm": 1.5894842059772911, "learning_rate": 8.925955807201391e-06, "loss": 0.5563, "step": 9888 }, { "epoch": 0.29135424185731335, "grad_norm": 1.5497931663437763, "learning_rate": 8.925637341780761e-06, "loss": 0.4834, "step": 9889 }, { "epoch": 0.2913837043147769, "grad_norm": 1.6305651027075478, "learning_rate": 8.925318834835937e-06, "loss": 0.6471, "step": 9890 }, { "epoch": 0.2914131667722405, "grad_norm": 1.572898012380237, "learning_rate": 8.925000286370284e-06, "loss": 0.492, "step": 9891 }, { "epoch": 0.29144262922970404, "grad_norm": 1.4787568055884648, "learning_rate": 8.924681696387174e-06, "loss": 0.4566, "step": 9892 }, { "epoch": 0.29147209168716764, "grad_norm": 1.4328727559427792, "learning_rate": 8.924363064889979e-06, "loss": 0.4641, "step": 9893 }, { "epoch": 0.2915015541446312, "grad_norm": 1.5707933261696945, "learning_rate": 8.924044391882066e-06, "loss": 0.5521, "step": 9894 }, { "epoch": 0.2915310166020948, "grad_norm": 1.5039115217315557, "learning_rate": 8.923725677366807e-06, "loss": 0.4471, "step": 9895 }, { "epoch": 0.29156047905955834, "grad_norm": 1.5820640807052002, "learning_rate": 8.923406921347576e-06, "loss": 0.5781, "step": 9896 }, { "epoch": 0.29158994151702194, "grad_norm": 1.660116629149839, "learning_rate": 8.92308812382774e-06, "loss": 0.4135, "step": 9897 }, { "epoch": 0.2916194039744855, "grad_norm": 1.5616934061649947, "learning_rate": 8.922769284810674e-06, "loss": 0.4768, "step": 9898 }, { "epoch": 0.2916488664319491, "grad_norm": 1.5858304093824864, "learning_rate": 8.92245040429975e-06, "loss": 0.6085, "step": 9899 }, { "epoch": 0.29167832888941264, "grad_norm": 1.4818917810362064, "learning_rate": 8.922131482298341e-06, "loss": 0.3934, "step": 9900 }, { "epoch": 0.29170779134687624, "grad_norm": 1.8682577920185393, "learning_rate": 8.92181251880982e-06, "loss": 0.5049, "step": 9901 }, { "epoch": 0.29173725380433985, "grad_norm": 1.8501148927914544, "learning_rate": 8.921493513837562e-06, "loss": 0.5297, "step": 9902 }, { "epoch": 0.2917667162618034, "grad_norm": 1.7980392037555024, "learning_rate": 8.921174467384941e-06, "loss": 0.5473, "step": 9903 }, { "epoch": 0.291796178719267, "grad_norm": 1.6254768766456396, "learning_rate": 8.92085537945533e-06, "loss": 0.5067, "step": 9904 }, { "epoch": 0.29182564117673054, "grad_norm": 1.497424391124314, "learning_rate": 8.920536250052109e-06, "loss": 0.4632, "step": 9905 }, { "epoch": 0.29185510363419415, "grad_norm": 1.4687753047018994, "learning_rate": 8.920217079178647e-06, "loss": 0.5271, "step": 9906 }, { "epoch": 0.2918845660916577, "grad_norm": 1.5379007842251085, "learning_rate": 8.919897866838325e-06, "loss": 0.2803, "step": 9907 }, { "epoch": 0.2919140285491213, "grad_norm": 1.3977106264081933, "learning_rate": 8.919578613034519e-06, "loss": 0.3734, "step": 9908 }, { "epoch": 0.29194349100658484, "grad_norm": 1.5765757049603089, "learning_rate": 8.919259317770603e-06, "loss": 0.6074, "step": 9909 }, { "epoch": 0.29197295346404845, "grad_norm": 1.3795800424723859, "learning_rate": 8.918939981049957e-06, "loss": 0.3533, "step": 9910 }, { "epoch": 0.292002415921512, "grad_norm": 1.8333855804280559, "learning_rate": 8.918620602875959e-06, "loss": 0.4653, "step": 9911 }, { "epoch": 0.2920318783789756, "grad_norm": 1.747927423640247, "learning_rate": 8.918301183251985e-06, "loss": 0.637, "step": 9912 }, { "epoch": 0.29206134083643914, "grad_norm": 1.5691548302369593, "learning_rate": 8.917981722181415e-06, "loss": 0.5972, "step": 9913 }, { "epoch": 0.29209080329390275, "grad_norm": 1.5155128535161189, "learning_rate": 8.917662219667631e-06, "loss": 0.5397, "step": 9914 }, { "epoch": 0.29212026575136635, "grad_norm": 1.6304398528168016, "learning_rate": 8.917342675714006e-06, "loss": 0.4661, "step": 9915 }, { "epoch": 0.2921497282088299, "grad_norm": 1.6071792632914845, "learning_rate": 8.917023090323928e-06, "loss": 0.4981, "step": 9916 }, { "epoch": 0.2921791906662935, "grad_norm": 1.4926883139770537, "learning_rate": 8.916703463500773e-06, "loss": 0.5926, "step": 9917 }, { "epoch": 0.29220865312375705, "grad_norm": 1.668122367427131, "learning_rate": 8.916383795247921e-06, "loss": 0.5021, "step": 9918 }, { "epoch": 0.29223811558122065, "grad_norm": 1.5649789107304857, "learning_rate": 8.916064085568755e-06, "loss": 0.3466, "step": 9919 }, { "epoch": 0.2922675780386842, "grad_norm": 1.7547924246082292, "learning_rate": 8.915744334466657e-06, "loss": 0.5482, "step": 9920 }, { "epoch": 0.2922970404961478, "grad_norm": 1.6263406158824205, "learning_rate": 8.915424541945007e-06, "loss": 0.4965, "step": 9921 }, { "epoch": 0.29232650295361134, "grad_norm": 1.3872457931567654, "learning_rate": 8.915104708007191e-06, "loss": 0.3936, "step": 9922 }, { "epoch": 0.29235596541107495, "grad_norm": 1.906023421421572, "learning_rate": 8.914784832656592e-06, "loss": 0.6914, "step": 9923 }, { "epoch": 0.2923854278685385, "grad_norm": 1.70564180328422, "learning_rate": 8.91446491589659e-06, "loss": 0.5999, "step": 9924 }, { "epoch": 0.2924148903260021, "grad_norm": 1.4600352707703292, "learning_rate": 8.91414495773057e-06, "loss": 0.4513, "step": 9925 }, { "epoch": 0.29244435278346564, "grad_norm": 1.5875398403256316, "learning_rate": 8.91382495816192e-06, "loss": 0.4823, "step": 9926 }, { "epoch": 0.29247381524092925, "grad_norm": 1.5325608405496516, "learning_rate": 8.91350491719402e-06, "loss": 0.6562, "step": 9927 }, { "epoch": 0.29250327769839285, "grad_norm": 1.6804980533336111, "learning_rate": 8.913184834830261e-06, "loss": 0.4235, "step": 9928 }, { "epoch": 0.2925327401558564, "grad_norm": 1.6741192451133713, "learning_rate": 8.912864711074022e-06, "loss": 0.6212, "step": 9929 }, { "epoch": 0.29256220261332, "grad_norm": 1.5793340811631018, "learning_rate": 8.912544545928694e-06, "loss": 0.4634, "step": 9930 }, { "epoch": 0.29259166507078355, "grad_norm": 1.720855228628465, "learning_rate": 8.912224339397664e-06, "loss": 0.6313, "step": 9931 }, { "epoch": 0.29262112752824715, "grad_norm": 1.6887261667443147, "learning_rate": 8.911904091484316e-06, "loss": 0.6398, "step": 9932 }, { "epoch": 0.2926505899857107, "grad_norm": 1.6147688215268117, "learning_rate": 8.911583802192037e-06, "loss": 0.5374, "step": 9933 }, { "epoch": 0.2926800524431743, "grad_norm": 1.6691683887176159, "learning_rate": 8.911263471524218e-06, "loss": 0.4298, "step": 9934 }, { "epoch": 0.29270951490063785, "grad_norm": 1.563811120975635, "learning_rate": 8.910943099484246e-06, "loss": 0.4793, "step": 9935 }, { "epoch": 0.29273897735810145, "grad_norm": 1.4562928082962339, "learning_rate": 8.910622686075509e-06, "loss": 0.4267, "step": 9936 }, { "epoch": 0.292768439815565, "grad_norm": 1.7050282853626249, "learning_rate": 8.9103022313014e-06, "loss": 0.3923, "step": 9937 }, { "epoch": 0.2927979022730286, "grad_norm": 1.6113962940066024, "learning_rate": 8.909981735165303e-06, "loss": 0.5534, "step": 9938 }, { "epoch": 0.29282736473049215, "grad_norm": 1.4781298500673639, "learning_rate": 8.909661197670612e-06, "loss": 0.4341, "step": 9939 }, { "epoch": 0.29285682718795575, "grad_norm": 1.6794929207263525, "learning_rate": 8.909340618820715e-06, "loss": 0.6208, "step": 9940 }, { "epoch": 0.29288628964541935, "grad_norm": 1.466133469996827, "learning_rate": 8.909019998619005e-06, "loss": 0.4964, "step": 9941 }, { "epoch": 0.2929157521028829, "grad_norm": 1.8717983464190646, "learning_rate": 8.908699337068871e-06, "loss": 0.467, "step": 9942 }, { "epoch": 0.2929452145603465, "grad_norm": 1.5078206138697754, "learning_rate": 8.908378634173709e-06, "loss": 0.3839, "step": 9943 }, { "epoch": 0.29297467701781005, "grad_norm": 1.408026680226648, "learning_rate": 8.908057889936909e-06, "loss": 0.4937, "step": 9944 }, { "epoch": 0.29300413947527365, "grad_norm": 1.7468510528783776, "learning_rate": 8.907737104361863e-06, "loss": 0.5263, "step": 9945 }, { "epoch": 0.2930336019327372, "grad_norm": 1.7024685048582684, "learning_rate": 8.907416277451963e-06, "loss": 0.5336, "step": 9946 }, { "epoch": 0.2930630643902008, "grad_norm": 1.5218748606003019, "learning_rate": 8.907095409210607e-06, "loss": 0.4356, "step": 9947 }, { "epoch": 0.29309252684766435, "grad_norm": 1.7453923482279292, "learning_rate": 8.906774499641186e-06, "loss": 0.5367, "step": 9948 }, { "epoch": 0.29312198930512795, "grad_norm": 1.484257191378875, "learning_rate": 8.906453548747095e-06, "loss": 0.4346, "step": 9949 }, { "epoch": 0.2931514517625915, "grad_norm": 1.7036663181542027, "learning_rate": 8.906132556531728e-06, "loss": 0.5386, "step": 9950 }, { "epoch": 0.2931809142200551, "grad_norm": 1.6308768999385994, "learning_rate": 8.905811522998483e-06, "loss": 0.4776, "step": 9951 }, { "epoch": 0.29321037667751865, "grad_norm": 1.5796700878209673, "learning_rate": 8.905490448150752e-06, "loss": 0.4908, "step": 9952 }, { "epoch": 0.29323983913498225, "grad_norm": 1.7000128255280733, "learning_rate": 8.905169331991934e-06, "loss": 0.457, "step": 9953 }, { "epoch": 0.29326930159244585, "grad_norm": 1.5272957838139054, "learning_rate": 8.904848174525424e-06, "loss": 0.3949, "step": 9954 }, { "epoch": 0.2932987640499094, "grad_norm": 1.450728383465602, "learning_rate": 8.904526975754622e-06, "loss": 0.4276, "step": 9955 }, { "epoch": 0.293328226507373, "grad_norm": 1.6974227419066021, "learning_rate": 8.904205735682923e-06, "loss": 0.5613, "step": 9956 }, { "epoch": 0.29335768896483655, "grad_norm": 1.4893220670960328, "learning_rate": 8.903884454313725e-06, "loss": 0.4952, "step": 9957 }, { "epoch": 0.29338715142230015, "grad_norm": 1.436142294442458, "learning_rate": 8.903563131650427e-06, "loss": 0.4875, "step": 9958 }, { "epoch": 0.2934166138797637, "grad_norm": 1.7344965784044692, "learning_rate": 8.903241767696428e-06, "loss": 0.4044, "step": 9959 }, { "epoch": 0.2934460763372273, "grad_norm": 1.487692627740208, "learning_rate": 8.902920362455128e-06, "loss": 0.4474, "step": 9960 }, { "epoch": 0.29347553879469085, "grad_norm": 1.576000515989415, "learning_rate": 8.902598915929926e-06, "loss": 0.5512, "step": 9961 }, { "epoch": 0.29350500125215445, "grad_norm": 1.4121326272607244, "learning_rate": 8.90227742812422e-06, "loss": 0.41, "step": 9962 }, { "epoch": 0.293534463709618, "grad_norm": 1.6130758338207667, "learning_rate": 8.901955899041415e-06, "loss": 0.4763, "step": 9963 }, { "epoch": 0.2935639261670816, "grad_norm": 1.2946891957425533, "learning_rate": 8.901634328684907e-06, "loss": 0.3489, "step": 9964 }, { "epoch": 0.29359338862454515, "grad_norm": 1.5936634779006458, "learning_rate": 8.901312717058101e-06, "loss": 0.5442, "step": 9965 }, { "epoch": 0.29362285108200875, "grad_norm": 1.487317370635539, "learning_rate": 8.9009910641644e-06, "loss": 0.5676, "step": 9966 }, { "epoch": 0.29365231353947235, "grad_norm": 1.5055197412878152, "learning_rate": 8.900669370007203e-06, "loss": 0.4686, "step": 9967 }, { "epoch": 0.2936817759969359, "grad_norm": 1.6560646025720134, "learning_rate": 8.900347634589913e-06, "loss": 0.4893, "step": 9968 }, { "epoch": 0.2937112384543995, "grad_norm": 1.6389325880721024, "learning_rate": 8.900025857915936e-06, "loss": 0.6238, "step": 9969 }, { "epoch": 0.29374070091186305, "grad_norm": 1.565722655848482, "learning_rate": 8.899704039988674e-06, "loss": 0.5458, "step": 9970 }, { "epoch": 0.29377016336932665, "grad_norm": 1.504270264874123, "learning_rate": 8.89938218081153e-06, "loss": 0.4295, "step": 9971 }, { "epoch": 0.2937996258267902, "grad_norm": 1.5668363903432303, "learning_rate": 8.89906028038791e-06, "loss": 0.4971, "step": 9972 }, { "epoch": 0.2938290882842538, "grad_norm": 1.5845238408206943, "learning_rate": 8.89873833872122e-06, "loss": 0.5235, "step": 9973 }, { "epoch": 0.29385855074171735, "grad_norm": 1.5204158356438966, "learning_rate": 8.898416355814863e-06, "loss": 0.4967, "step": 9974 }, { "epoch": 0.29388801319918095, "grad_norm": 1.4652794623298278, "learning_rate": 8.898094331672246e-06, "loss": 0.5155, "step": 9975 }, { "epoch": 0.2939174756566445, "grad_norm": 1.5459722656369626, "learning_rate": 8.897772266296776e-06, "loss": 0.4478, "step": 9976 }, { "epoch": 0.2939469381141081, "grad_norm": 1.455212408655193, "learning_rate": 8.897450159691856e-06, "loss": 0.3329, "step": 9977 }, { "epoch": 0.29397640057157165, "grad_norm": 1.4875794357915588, "learning_rate": 8.897128011860899e-06, "loss": 0.5487, "step": 9978 }, { "epoch": 0.29400586302903525, "grad_norm": 1.6429059823257453, "learning_rate": 8.896805822807308e-06, "loss": 0.4763, "step": 9979 }, { "epoch": 0.29403532548649886, "grad_norm": 1.535056476624434, "learning_rate": 8.896483592534492e-06, "loss": 0.4319, "step": 9980 }, { "epoch": 0.2940647879439624, "grad_norm": 1.4108486427652172, "learning_rate": 8.896161321045861e-06, "loss": 0.4611, "step": 9981 }, { "epoch": 0.294094250401426, "grad_norm": 1.6348049425431388, "learning_rate": 8.895839008344825e-06, "loss": 0.4932, "step": 9982 }, { "epoch": 0.29412371285888955, "grad_norm": 1.7530547301747046, "learning_rate": 8.895516654434787e-06, "loss": 0.503, "step": 9983 }, { "epoch": 0.29415317531635315, "grad_norm": 1.6475624171550494, "learning_rate": 8.895194259319165e-06, "loss": 0.4474, "step": 9984 }, { "epoch": 0.2941826377738167, "grad_norm": 1.7351441077240448, "learning_rate": 8.894871823001363e-06, "loss": 0.5147, "step": 9985 }, { "epoch": 0.2942121002312803, "grad_norm": 1.597902873819421, "learning_rate": 8.894549345484795e-06, "loss": 0.4726, "step": 9986 }, { "epoch": 0.29424156268874385, "grad_norm": 1.6539548206380668, "learning_rate": 8.894226826772871e-06, "loss": 0.4814, "step": 9987 }, { "epoch": 0.29427102514620745, "grad_norm": 1.6349138471999063, "learning_rate": 8.893904266869001e-06, "loss": 0.4727, "step": 9988 }, { "epoch": 0.294300487603671, "grad_norm": 1.5151146830525326, "learning_rate": 8.8935816657766e-06, "loss": 0.5604, "step": 9989 }, { "epoch": 0.2943299500611346, "grad_norm": 1.4606122527462624, "learning_rate": 8.893259023499077e-06, "loss": 0.4828, "step": 9990 }, { "epoch": 0.29435941251859815, "grad_norm": 1.6613484018686728, "learning_rate": 8.892936340039848e-06, "loss": 0.4491, "step": 9991 }, { "epoch": 0.29438887497606175, "grad_norm": 1.750956028834009, "learning_rate": 8.892613615402324e-06, "loss": 0.5736, "step": 9992 }, { "epoch": 0.29441833743352536, "grad_norm": 1.5828367694622352, "learning_rate": 8.89229084958992e-06, "loss": 0.4042, "step": 9993 }, { "epoch": 0.2944477998909889, "grad_norm": 1.5808349189941493, "learning_rate": 8.89196804260605e-06, "loss": 0.5124, "step": 9994 }, { "epoch": 0.2944772623484525, "grad_norm": 1.4700662138163965, "learning_rate": 8.891645194454127e-06, "loss": 0.4662, "step": 9995 }, { "epoch": 0.29450672480591605, "grad_norm": 1.4849674314322505, "learning_rate": 8.891322305137569e-06, "loss": 0.477, "step": 9996 }, { "epoch": 0.29453618726337966, "grad_norm": 1.4390244072367921, "learning_rate": 8.890999374659787e-06, "loss": 0.4427, "step": 9997 }, { "epoch": 0.2945656497208432, "grad_norm": 1.6068402316738866, "learning_rate": 8.890676403024201e-06, "loss": 0.4588, "step": 9998 }, { "epoch": 0.2945951121783068, "grad_norm": 1.4863104956449198, "learning_rate": 8.890353390234225e-06, "loss": 0.5325, "step": 9999 }, { "epoch": 0.29462457463577035, "grad_norm": 1.6902473709317165, "learning_rate": 8.890030336293276e-06, "loss": 0.5796, "step": 10000 }, { "epoch": 0.29465403709323396, "grad_norm": 1.4790195624431917, "learning_rate": 8.889707241204773e-06, "loss": 0.3996, "step": 10001 }, { "epoch": 0.2946834995506975, "grad_norm": 1.5976942075278884, "learning_rate": 8.889384104972132e-06, "loss": 0.4592, "step": 10002 }, { "epoch": 0.2947129620081611, "grad_norm": 1.6219440072055586, "learning_rate": 8.889060927598772e-06, "loss": 0.544, "step": 10003 }, { "epoch": 0.29474242446562465, "grad_norm": 1.566061477714546, "learning_rate": 8.888737709088109e-06, "loss": 0.5294, "step": 10004 }, { "epoch": 0.29477188692308826, "grad_norm": 1.7034388045967424, "learning_rate": 8.888414449443564e-06, "loss": 0.5191, "step": 10005 }, { "epoch": 0.29480134938055186, "grad_norm": 1.4873759157625732, "learning_rate": 8.888091148668556e-06, "loss": 0.4116, "step": 10006 }, { "epoch": 0.2948308118380154, "grad_norm": 1.5263661603616898, "learning_rate": 8.887767806766505e-06, "loss": 0.5211, "step": 10007 }, { "epoch": 0.294860274295479, "grad_norm": 1.5701383400490327, "learning_rate": 8.887444423740832e-06, "loss": 0.2661, "step": 10008 }, { "epoch": 0.29488973675294256, "grad_norm": 1.4776318103613078, "learning_rate": 8.887120999594956e-06, "loss": 0.4976, "step": 10009 }, { "epoch": 0.29491919921040616, "grad_norm": 1.5480288702246596, "learning_rate": 8.8867975343323e-06, "loss": 0.4328, "step": 10010 }, { "epoch": 0.2949486616678697, "grad_norm": 1.8008791944545097, "learning_rate": 8.886474027956281e-06, "loss": 0.3628, "step": 10011 }, { "epoch": 0.2949781241253333, "grad_norm": 1.6386460974378763, "learning_rate": 8.886150480470327e-06, "loss": 0.5281, "step": 10012 }, { "epoch": 0.29500758658279685, "grad_norm": 1.738190904812394, "learning_rate": 8.885826891877856e-06, "loss": 0.4986, "step": 10013 }, { "epoch": 0.29503704904026046, "grad_norm": 1.5965302790544795, "learning_rate": 8.885503262182293e-06, "loss": 0.5436, "step": 10014 }, { "epoch": 0.295066511497724, "grad_norm": 1.4938474963782775, "learning_rate": 8.885179591387059e-06, "loss": 0.5494, "step": 10015 }, { "epoch": 0.2950959739551876, "grad_norm": 1.5748328757880043, "learning_rate": 8.884855879495582e-06, "loss": 0.5223, "step": 10016 }, { "epoch": 0.29512543641265115, "grad_norm": 1.519954421313161, "learning_rate": 8.884532126511281e-06, "loss": 0.521, "step": 10017 }, { "epoch": 0.29515489887011476, "grad_norm": 1.9085989116308248, "learning_rate": 8.884208332437585e-06, "loss": 0.685, "step": 10018 }, { "epoch": 0.29518436132757836, "grad_norm": 1.5742921705269965, "learning_rate": 8.883884497277916e-06, "loss": 0.3835, "step": 10019 }, { "epoch": 0.2952138237850419, "grad_norm": 1.5996046526800582, "learning_rate": 8.8835606210357e-06, "loss": 0.4086, "step": 10020 }, { "epoch": 0.2952432862425055, "grad_norm": 1.6000760792282143, "learning_rate": 8.883236703714364e-06, "loss": 0.4171, "step": 10021 }, { "epoch": 0.29527274869996906, "grad_norm": 1.6089937262209517, "learning_rate": 8.882912745317333e-06, "loss": 0.5124, "step": 10022 }, { "epoch": 0.29530221115743266, "grad_norm": 1.4433110196973877, "learning_rate": 8.882588745848035e-06, "loss": 0.4181, "step": 10023 }, { "epoch": 0.2953316736148962, "grad_norm": 1.4942034962899577, "learning_rate": 8.882264705309898e-06, "loss": 0.3312, "step": 10024 }, { "epoch": 0.2953611360723598, "grad_norm": 1.5256090714116362, "learning_rate": 8.881940623706346e-06, "loss": 0.4173, "step": 10025 }, { "epoch": 0.29539059852982336, "grad_norm": 1.5679355301277174, "learning_rate": 8.88161650104081e-06, "loss": 0.4813, "step": 10026 }, { "epoch": 0.29542006098728696, "grad_norm": 1.505866529461966, "learning_rate": 8.881292337316716e-06, "loss": 0.5083, "step": 10027 }, { "epoch": 0.2954495234447505, "grad_norm": 1.3769964605031604, "learning_rate": 8.880968132537498e-06, "loss": 0.4723, "step": 10028 }, { "epoch": 0.2954789859022141, "grad_norm": 1.7855531350715352, "learning_rate": 8.88064388670658e-06, "loss": 0.4594, "step": 10029 }, { "epoch": 0.29550844835967766, "grad_norm": 1.6371289458980889, "learning_rate": 8.880319599827392e-06, "loss": 0.4459, "step": 10030 }, { "epoch": 0.29553791081714126, "grad_norm": 1.6506367377628306, "learning_rate": 8.879995271903367e-06, "loss": 0.4383, "step": 10031 }, { "epoch": 0.29556737327460486, "grad_norm": 1.772735987150055, "learning_rate": 8.879670902937935e-06, "loss": 0.6291, "step": 10032 }, { "epoch": 0.2955968357320684, "grad_norm": 1.8662928365530802, "learning_rate": 8.879346492934526e-06, "loss": 0.5701, "step": 10033 }, { "epoch": 0.295626298189532, "grad_norm": 1.6350124443507041, "learning_rate": 8.879022041896572e-06, "loss": 0.5293, "step": 10034 }, { "epoch": 0.29565576064699556, "grad_norm": 1.4943525988278115, "learning_rate": 8.878697549827506e-06, "loss": 0.5674, "step": 10035 }, { "epoch": 0.29568522310445916, "grad_norm": 1.4291579756192432, "learning_rate": 8.878373016730758e-06, "loss": 0.4728, "step": 10036 }, { "epoch": 0.2957146855619227, "grad_norm": 1.568976420908577, "learning_rate": 8.878048442609762e-06, "loss": 0.4741, "step": 10037 }, { "epoch": 0.2957441480193863, "grad_norm": 1.570680015545452, "learning_rate": 8.877723827467953e-06, "loss": 0.4417, "step": 10038 }, { "epoch": 0.29577361047684986, "grad_norm": 1.5995626590161667, "learning_rate": 8.877399171308763e-06, "loss": 0.4992, "step": 10039 }, { "epoch": 0.29580307293431346, "grad_norm": 1.5389269628935518, "learning_rate": 8.877074474135626e-06, "loss": 0.4508, "step": 10040 }, { "epoch": 0.295832535391777, "grad_norm": 1.5901553655750018, "learning_rate": 8.876749735951974e-06, "loss": 0.4204, "step": 10041 }, { "epoch": 0.2958619978492406, "grad_norm": 1.5604351101393144, "learning_rate": 8.876424956761249e-06, "loss": 0.5227, "step": 10042 }, { "epoch": 0.29589146030670416, "grad_norm": 1.5083117470853624, "learning_rate": 8.87610013656688e-06, "loss": 0.4896, "step": 10043 }, { "epoch": 0.29592092276416776, "grad_norm": 2.137349668081692, "learning_rate": 8.875775275372304e-06, "loss": 0.4591, "step": 10044 }, { "epoch": 0.29595038522163136, "grad_norm": 1.380140868503727, "learning_rate": 8.875450373180961e-06, "loss": 0.4309, "step": 10045 }, { "epoch": 0.2959798476790949, "grad_norm": 1.3509449407579595, "learning_rate": 8.875125429996283e-06, "loss": 0.3722, "step": 10046 }, { "epoch": 0.2960093101365585, "grad_norm": 1.5025611636582215, "learning_rate": 8.874800445821711e-06, "loss": 0.4979, "step": 10047 }, { "epoch": 0.29603877259402206, "grad_norm": 1.5893266381431472, "learning_rate": 8.87447542066068e-06, "loss": 0.4496, "step": 10048 }, { "epoch": 0.29606823505148566, "grad_norm": 1.574722686150001, "learning_rate": 8.874150354516628e-06, "loss": 0.5508, "step": 10049 }, { "epoch": 0.2960976975089492, "grad_norm": 1.3894100981420687, "learning_rate": 8.873825247392996e-06, "loss": 0.3618, "step": 10050 }, { "epoch": 0.2961271599664128, "grad_norm": 1.6313727599339995, "learning_rate": 8.87350009929322e-06, "loss": 0.4481, "step": 10051 }, { "epoch": 0.29615662242387636, "grad_norm": 1.6540800595381702, "learning_rate": 8.873174910220738e-06, "loss": 0.5362, "step": 10052 }, { "epoch": 0.29618608488133996, "grad_norm": 1.4803321404396605, "learning_rate": 8.872849680178995e-06, "loss": 0.5314, "step": 10053 }, { "epoch": 0.2962155473388035, "grad_norm": 1.6607798438374057, "learning_rate": 8.872524409171428e-06, "loss": 0.4918, "step": 10054 }, { "epoch": 0.2962450097962671, "grad_norm": 1.6855838118248, "learning_rate": 8.872199097201478e-06, "loss": 0.5368, "step": 10055 }, { "epoch": 0.29627447225373066, "grad_norm": 1.6952655941003616, "learning_rate": 8.871873744272586e-06, "loss": 0.4878, "step": 10056 }, { "epoch": 0.29630393471119426, "grad_norm": 1.5132350358199738, "learning_rate": 8.871548350388193e-06, "loss": 0.2338, "step": 10057 }, { "epoch": 0.29633339716865786, "grad_norm": 1.5239042825112408, "learning_rate": 8.871222915551742e-06, "loss": 0.3706, "step": 10058 }, { "epoch": 0.2963628596261214, "grad_norm": 1.4057969922848852, "learning_rate": 8.870897439766676e-06, "loss": 0.5197, "step": 10059 }, { "epoch": 0.296392322083585, "grad_norm": 1.5760136574704213, "learning_rate": 8.870571923036434e-06, "loss": 0.3887, "step": 10060 }, { "epoch": 0.29642178454104856, "grad_norm": 1.5719194682795739, "learning_rate": 8.870246365364463e-06, "loss": 0.5353, "step": 10061 }, { "epoch": 0.29645124699851216, "grad_norm": 1.584909753143375, "learning_rate": 8.869920766754205e-06, "loss": 0.4395, "step": 10062 }, { "epoch": 0.2964807094559757, "grad_norm": 1.4863977894798492, "learning_rate": 8.869595127209106e-06, "loss": 0.3799, "step": 10063 }, { "epoch": 0.2965101719134393, "grad_norm": 1.3971507618107264, "learning_rate": 8.869269446732608e-06, "loss": 0.4374, "step": 10064 }, { "epoch": 0.29653963437090286, "grad_norm": 1.50257493578789, "learning_rate": 8.868943725328157e-06, "loss": 0.4077, "step": 10065 }, { "epoch": 0.29656909682836646, "grad_norm": 1.840156927467824, "learning_rate": 8.868617962999198e-06, "loss": 0.4241, "step": 10066 }, { "epoch": 0.29659855928583, "grad_norm": 1.4796116901514453, "learning_rate": 8.868292159749178e-06, "loss": 0.4185, "step": 10067 }, { "epoch": 0.2966280217432936, "grad_norm": 1.7000909963787874, "learning_rate": 8.867966315581543e-06, "loss": 0.5187, "step": 10068 }, { "epoch": 0.29665748420075716, "grad_norm": 1.555111822021257, "learning_rate": 8.867640430499738e-06, "loss": 0.4396, "step": 10069 }, { "epoch": 0.29668694665822076, "grad_norm": 1.5905167049077147, "learning_rate": 8.86731450450721e-06, "loss": 0.475, "step": 10070 }, { "epoch": 0.29671640911568437, "grad_norm": 1.4882287760257251, "learning_rate": 8.866988537607411e-06, "loss": 0.5261, "step": 10071 }, { "epoch": 0.2967458715731479, "grad_norm": 1.5271880094065582, "learning_rate": 8.866662529803783e-06, "loss": 0.4815, "step": 10072 }, { "epoch": 0.2967753340306115, "grad_norm": 1.5604455377821647, "learning_rate": 8.866336481099778e-06, "loss": 0.536, "step": 10073 }, { "epoch": 0.29680479648807506, "grad_norm": 1.7541911927444396, "learning_rate": 8.866010391498844e-06, "loss": 0.5569, "step": 10074 }, { "epoch": 0.29683425894553866, "grad_norm": 1.8238258307831308, "learning_rate": 8.865684261004431e-06, "loss": 0.4384, "step": 10075 }, { "epoch": 0.2968637214030022, "grad_norm": 1.481512185159951, "learning_rate": 8.865358089619987e-06, "loss": 0.4962, "step": 10076 }, { "epoch": 0.2968931838604658, "grad_norm": 1.7679912097940955, "learning_rate": 8.865031877348963e-06, "loss": 0.6617, "step": 10077 }, { "epoch": 0.29692264631792936, "grad_norm": 1.5763884140142845, "learning_rate": 8.86470562419481e-06, "loss": 0.5219, "step": 10078 }, { "epoch": 0.29695210877539296, "grad_norm": 1.516057542931304, "learning_rate": 8.86437933016098e-06, "loss": 0.3835, "step": 10079 }, { "epoch": 0.2969815712328565, "grad_norm": 1.6995016074637739, "learning_rate": 8.86405299525092e-06, "loss": 0.4755, "step": 10080 }, { "epoch": 0.2970110336903201, "grad_norm": 1.7528180047349073, "learning_rate": 8.863726619468089e-06, "loss": 0.5662, "step": 10081 }, { "epoch": 0.29704049614778366, "grad_norm": 1.4524850247426275, "learning_rate": 8.863400202815933e-06, "loss": 0.4419, "step": 10082 }, { "epoch": 0.29706995860524726, "grad_norm": 1.4864836471679836, "learning_rate": 8.863073745297906e-06, "loss": 0.4729, "step": 10083 }, { "epoch": 0.29709942106271087, "grad_norm": 1.510109752972471, "learning_rate": 8.862747246917464e-06, "loss": 0.4761, "step": 10084 }, { "epoch": 0.2971288835201744, "grad_norm": 1.59741350229475, "learning_rate": 8.862420707678058e-06, "loss": 0.5071, "step": 10085 }, { "epoch": 0.297158345977638, "grad_norm": 1.4319718937191983, "learning_rate": 8.862094127583145e-06, "loss": 0.453, "step": 10086 }, { "epoch": 0.29718780843510156, "grad_norm": 1.6123684411577328, "learning_rate": 8.861767506636176e-06, "loss": 0.4176, "step": 10087 }, { "epoch": 0.29721727089256517, "grad_norm": 1.5734870944767412, "learning_rate": 8.861440844840606e-06, "loss": 0.5219, "step": 10088 }, { "epoch": 0.2972467333500287, "grad_norm": 1.509603827633973, "learning_rate": 8.861114142199892e-06, "loss": 0.6204, "step": 10089 }, { "epoch": 0.2972761958074923, "grad_norm": 1.491057152839086, "learning_rate": 8.860787398717491e-06, "loss": 0.4096, "step": 10090 }, { "epoch": 0.29730565826495586, "grad_norm": 1.4777038691927038, "learning_rate": 8.860460614396856e-06, "loss": 0.4418, "step": 10091 }, { "epoch": 0.29733512072241947, "grad_norm": 1.5023096683168145, "learning_rate": 8.860133789241445e-06, "loss": 0.4479, "step": 10092 }, { "epoch": 0.297364583179883, "grad_norm": 1.5921338508779894, "learning_rate": 8.859806923254717e-06, "loss": 0.6003, "step": 10093 }, { "epoch": 0.2973940456373466, "grad_norm": 1.4732221241421746, "learning_rate": 8.859480016440126e-06, "loss": 0.4645, "step": 10094 }, { "epoch": 0.29742350809481016, "grad_norm": 1.914013200881249, "learning_rate": 8.859153068801132e-06, "loss": 0.6447, "step": 10095 }, { "epoch": 0.29745297055227377, "grad_norm": 1.6881060581218075, "learning_rate": 8.858826080341192e-06, "loss": 0.5665, "step": 10096 }, { "epoch": 0.29748243300973737, "grad_norm": 1.731718641021446, "learning_rate": 8.858499051063766e-06, "loss": 0.4269, "step": 10097 }, { "epoch": 0.2975118954672009, "grad_norm": 1.6298228595054842, "learning_rate": 8.858171980972314e-06, "loss": 0.3525, "step": 10098 }, { "epoch": 0.2975413579246645, "grad_norm": 1.5027879946067766, "learning_rate": 8.857844870070294e-06, "loss": 0.5331, "step": 10099 }, { "epoch": 0.29757082038212807, "grad_norm": 1.753891612363891, "learning_rate": 8.857517718361168e-06, "loss": 0.5088, "step": 10100 }, { "epoch": 0.29760028283959167, "grad_norm": 1.5133842156494155, "learning_rate": 8.857190525848394e-06, "loss": 0.4822, "step": 10101 }, { "epoch": 0.2976297452970552, "grad_norm": 1.6039245962084259, "learning_rate": 8.856863292535434e-06, "loss": 0.5223, "step": 10102 }, { "epoch": 0.2976592077545188, "grad_norm": 1.7083129559795427, "learning_rate": 8.856536018425749e-06, "loss": 0.5581, "step": 10103 }, { "epoch": 0.29768867021198236, "grad_norm": 1.670262640207254, "learning_rate": 8.856208703522801e-06, "loss": 0.5918, "step": 10104 }, { "epoch": 0.29771813266944597, "grad_norm": 1.5673307830008107, "learning_rate": 8.855881347830055e-06, "loss": 0.4896, "step": 10105 }, { "epoch": 0.2977475951269095, "grad_norm": 1.64862614228939, "learning_rate": 8.85555395135097e-06, "loss": 0.4874, "step": 10106 }, { "epoch": 0.2977770575843731, "grad_norm": 1.4121697203926602, "learning_rate": 8.855226514089011e-06, "loss": 0.3941, "step": 10107 }, { "epoch": 0.29780652004183666, "grad_norm": 1.5415979624088454, "learning_rate": 8.85489903604764e-06, "loss": 0.5772, "step": 10108 }, { "epoch": 0.29783598249930027, "grad_norm": 1.4547527702070042, "learning_rate": 8.85457151723032e-06, "loss": 0.5336, "step": 10109 }, { "epoch": 0.29786544495676387, "grad_norm": 1.4632840341812945, "learning_rate": 8.85424395764052e-06, "loss": 0.3766, "step": 10110 }, { "epoch": 0.2978949074142274, "grad_norm": 1.4438387001169493, "learning_rate": 8.853916357281701e-06, "loss": 0.4825, "step": 10111 }, { "epoch": 0.297924369871691, "grad_norm": 1.5445716706687522, "learning_rate": 8.853588716157329e-06, "loss": 0.4424, "step": 10112 }, { "epoch": 0.29795383232915457, "grad_norm": 1.7029959753814001, "learning_rate": 8.85326103427087e-06, "loss": 0.7276, "step": 10113 }, { "epoch": 0.29798329478661817, "grad_norm": 1.4999830213387408, "learning_rate": 8.852933311625791e-06, "loss": 0.4475, "step": 10114 }, { "epoch": 0.2980127572440817, "grad_norm": 1.555125406452861, "learning_rate": 8.852605548225557e-06, "loss": 0.4587, "step": 10115 }, { "epoch": 0.2980422197015453, "grad_norm": 1.5257593140817214, "learning_rate": 8.852277744073636e-06, "loss": 0.4386, "step": 10116 }, { "epoch": 0.29807168215900887, "grad_norm": 1.6315901298163153, "learning_rate": 8.851949899173495e-06, "loss": 0.4839, "step": 10117 }, { "epoch": 0.29810114461647247, "grad_norm": 1.630546073371524, "learning_rate": 8.8516220135286e-06, "loss": 0.4564, "step": 10118 }, { "epoch": 0.298130607073936, "grad_norm": 1.6750565889074434, "learning_rate": 8.851294087142424e-06, "loss": 0.586, "step": 10119 }, { "epoch": 0.2981600695313996, "grad_norm": 1.4689030054926187, "learning_rate": 8.850966120018432e-06, "loss": 0.5205, "step": 10120 }, { "epoch": 0.29818953198886317, "grad_norm": 1.6891546221012024, "learning_rate": 8.850638112160093e-06, "loss": 0.6203, "step": 10121 }, { "epoch": 0.29821899444632677, "grad_norm": 1.5779155020747897, "learning_rate": 8.850310063570878e-06, "loss": 0.3975, "step": 10122 }, { "epoch": 0.29824845690379037, "grad_norm": 1.4366491868688982, "learning_rate": 8.849981974254256e-06, "loss": 0.4137, "step": 10123 }, { "epoch": 0.2982779193612539, "grad_norm": 1.5696110146844693, "learning_rate": 8.849653844213699e-06, "loss": 0.6387, "step": 10124 }, { "epoch": 0.2983073818187175, "grad_norm": 1.5028933857472138, "learning_rate": 8.849325673452678e-06, "loss": 0.5328, "step": 10125 }, { "epoch": 0.29833684427618107, "grad_norm": 1.6988987865805385, "learning_rate": 8.84899746197466e-06, "loss": 0.6121, "step": 10126 }, { "epoch": 0.29836630673364467, "grad_norm": 1.9056377901415393, "learning_rate": 8.848669209783123e-06, "loss": 0.6506, "step": 10127 }, { "epoch": 0.2983957691911082, "grad_norm": 1.759525076785935, "learning_rate": 8.848340916881535e-06, "loss": 0.5379, "step": 10128 }, { "epoch": 0.2984252316485718, "grad_norm": 1.7251136440180126, "learning_rate": 8.848012583273369e-06, "loss": 0.4278, "step": 10129 }, { "epoch": 0.29845469410603537, "grad_norm": 1.4434976903385803, "learning_rate": 8.847684208962098e-06, "loss": 0.5197, "step": 10130 }, { "epoch": 0.29848415656349897, "grad_norm": 1.4163633986525488, "learning_rate": 8.847355793951198e-06, "loss": 0.4778, "step": 10131 }, { "epoch": 0.2985136190209625, "grad_norm": 1.5194983769752213, "learning_rate": 8.847027338244138e-06, "loss": 0.4645, "step": 10132 }, { "epoch": 0.2985430814784261, "grad_norm": 1.428234301552436, "learning_rate": 8.846698841844399e-06, "loss": 0.4757, "step": 10133 }, { "epoch": 0.29857254393588967, "grad_norm": 1.6606409821886887, "learning_rate": 8.84637030475545e-06, "loss": 0.5818, "step": 10134 }, { "epoch": 0.29860200639335327, "grad_norm": 1.660100493053717, "learning_rate": 8.846041726980767e-06, "loss": 0.6978, "step": 10135 }, { "epoch": 0.2986314688508169, "grad_norm": 1.328167294990281, "learning_rate": 8.845713108523828e-06, "loss": 0.3234, "step": 10136 }, { "epoch": 0.2986609313082804, "grad_norm": 1.616622907473211, "learning_rate": 8.845384449388109e-06, "loss": 0.4464, "step": 10137 }, { "epoch": 0.298690393765744, "grad_norm": 1.4280149816641141, "learning_rate": 8.845055749577083e-06, "loss": 0.5157, "step": 10138 }, { "epoch": 0.29871985622320757, "grad_norm": 1.4444407193250521, "learning_rate": 8.84472700909423e-06, "loss": 0.4742, "step": 10139 }, { "epoch": 0.29874931868067117, "grad_norm": 1.5918454898859513, "learning_rate": 8.844398227943026e-06, "loss": 0.415, "step": 10140 }, { "epoch": 0.2987787811381347, "grad_norm": 1.4644437476457803, "learning_rate": 8.844069406126949e-06, "loss": 0.4854, "step": 10141 }, { "epoch": 0.2988082435955983, "grad_norm": 1.4092928258841728, "learning_rate": 8.84374054364948e-06, "loss": 0.4037, "step": 10142 }, { "epoch": 0.29883770605306187, "grad_norm": 1.5272913319824009, "learning_rate": 8.843411640514093e-06, "loss": 0.5233, "step": 10143 }, { "epoch": 0.29886716851052547, "grad_norm": 1.4310762341860848, "learning_rate": 8.843082696724268e-06, "loss": 0.4757, "step": 10144 }, { "epoch": 0.298896630967989, "grad_norm": 1.69337455254377, "learning_rate": 8.842753712283484e-06, "loss": 0.4706, "step": 10145 }, { "epoch": 0.2989260934254526, "grad_norm": 1.4858503994553711, "learning_rate": 8.842424687195227e-06, "loss": 0.4746, "step": 10146 }, { "epoch": 0.29895555588291617, "grad_norm": 2.0433448308186755, "learning_rate": 8.84209562146297e-06, "loss": 0.6259, "step": 10147 }, { "epoch": 0.29898501834037977, "grad_norm": 1.418705676201621, "learning_rate": 8.841766515090196e-06, "loss": 0.4224, "step": 10148 }, { "epoch": 0.2990144807978434, "grad_norm": 1.5537122224428312, "learning_rate": 8.841437368080387e-06, "loss": 0.4555, "step": 10149 }, { "epoch": 0.2990439432553069, "grad_norm": 1.6917285481204678, "learning_rate": 8.841108180437026e-06, "loss": 0.4384, "step": 10150 }, { "epoch": 0.2990734057127705, "grad_norm": 1.418351891367803, "learning_rate": 8.840778952163592e-06, "loss": 0.3227, "step": 10151 }, { "epoch": 0.29910286817023407, "grad_norm": 1.6020262187794125, "learning_rate": 8.840449683263567e-06, "loss": 0.599, "step": 10152 }, { "epoch": 0.2991323306276977, "grad_norm": 1.7614093071493462, "learning_rate": 8.840120373740438e-06, "loss": 0.6591, "step": 10153 }, { "epoch": 0.2991617930851612, "grad_norm": 1.5850386852647105, "learning_rate": 8.839791023597686e-06, "loss": 0.4763, "step": 10154 }, { "epoch": 0.2991912555426248, "grad_norm": 1.5761425529634134, "learning_rate": 8.839461632838793e-06, "loss": 0.4449, "step": 10155 }, { "epoch": 0.29922071800008837, "grad_norm": 1.4726477850894995, "learning_rate": 8.839132201467246e-06, "loss": 0.4251, "step": 10156 }, { "epoch": 0.299250180457552, "grad_norm": 1.5156456247175838, "learning_rate": 8.838802729486529e-06, "loss": 0.5841, "step": 10157 }, { "epoch": 0.2992796429150155, "grad_norm": 1.3412490394976195, "learning_rate": 8.838473216900128e-06, "loss": 0.338, "step": 10158 }, { "epoch": 0.2993091053724791, "grad_norm": 1.6840202676846556, "learning_rate": 8.838143663711524e-06, "loss": 0.5486, "step": 10159 }, { "epoch": 0.29933856782994267, "grad_norm": 1.7722109841928382, "learning_rate": 8.837814069924208e-06, "loss": 0.6791, "step": 10160 }, { "epoch": 0.2993680302874063, "grad_norm": 1.637260489169529, "learning_rate": 8.837484435541664e-06, "loss": 0.4126, "step": 10161 }, { "epoch": 0.2993974927448699, "grad_norm": 1.3981201187667982, "learning_rate": 8.837154760567378e-06, "loss": 0.3271, "step": 10162 }, { "epoch": 0.2994269552023334, "grad_norm": 1.6321165332235372, "learning_rate": 8.836825045004839e-06, "loss": 0.3822, "step": 10163 }, { "epoch": 0.299456417659797, "grad_norm": 1.6285752881411713, "learning_rate": 8.836495288857534e-06, "loss": 0.4717, "step": 10164 }, { "epoch": 0.29948588011726057, "grad_norm": 1.6534670877319875, "learning_rate": 8.836165492128952e-06, "loss": 0.5431, "step": 10165 }, { "epoch": 0.2995153425747242, "grad_norm": 1.7103034006785274, "learning_rate": 8.83583565482258e-06, "loss": 0.4499, "step": 10166 }, { "epoch": 0.2995448050321877, "grad_norm": 1.526501737087104, "learning_rate": 8.835505776941908e-06, "loss": 0.4889, "step": 10167 }, { "epoch": 0.2995742674896513, "grad_norm": 1.605696715770048, "learning_rate": 8.835175858490423e-06, "loss": 0.5463, "step": 10168 }, { "epoch": 0.29960372994711487, "grad_norm": 1.6132461765199189, "learning_rate": 8.834845899471617e-06, "loss": 0.393, "step": 10169 }, { "epoch": 0.2996331924045785, "grad_norm": 1.5391878977927897, "learning_rate": 8.834515899888983e-06, "loss": 0.4117, "step": 10170 }, { "epoch": 0.299662654862042, "grad_norm": 1.6977758532447627, "learning_rate": 8.834185859746005e-06, "loss": 0.6786, "step": 10171 }, { "epoch": 0.2996921173195056, "grad_norm": 1.7258544841735803, "learning_rate": 8.833855779046179e-06, "loss": 0.4632, "step": 10172 }, { "epoch": 0.29972157977696917, "grad_norm": 1.6076042725873991, "learning_rate": 8.833525657792994e-06, "loss": 0.5548, "step": 10173 }, { "epoch": 0.2997510422344328, "grad_norm": 1.6197855527743947, "learning_rate": 8.833195495989944e-06, "loss": 0.473, "step": 10174 }, { "epoch": 0.2997805046918964, "grad_norm": 1.5908022913869175, "learning_rate": 8.832865293640518e-06, "loss": 0.4687, "step": 10175 }, { "epoch": 0.2998099671493599, "grad_norm": 1.7201469660298683, "learning_rate": 8.832535050748213e-06, "loss": 0.4496, "step": 10176 }, { "epoch": 0.2998394296068235, "grad_norm": 1.5699159794867872, "learning_rate": 8.83220476731652e-06, "loss": 0.5046, "step": 10177 }, { "epoch": 0.2998688920642871, "grad_norm": 1.4350337324029887, "learning_rate": 8.831874443348932e-06, "loss": 0.3668, "step": 10178 }, { "epoch": 0.2998983545217507, "grad_norm": 1.5786233789364292, "learning_rate": 8.831544078848946e-06, "loss": 0.4916, "step": 10179 }, { "epoch": 0.2999278169792142, "grad_norm": 1.6925566664154204, "learning_rate": 8.831213673820051e-06, "loss": 0.3938, "step": 10180 }, { "epoch": 0.2999572794366778, "grad_norm": 1.5609721720075473, "learning_rate": 8.830883228265749e-06, "loss": 0.4686, "step": 10181 }, { "epoch": 0.2999867418941414, "grad_norm": 1.5319629881267067, "learning_rate": 8.830552742189528e-06, "loss": 0.4682, "step": 10182 }, { "epoch": 0.300016204351605, "grad_norm": 1.3702359299625533, "learning_rate": 8.83022221559489e-06, "loss": 0.4307, "step": 10183 }, { "epoch": 0.3000456668090685, "grad_norm": 1.4284805521203772, "learning_rate": 8.82989164848533e-06, "loss": 0.4952, "step": 10184 }, { "epoch": 0.3000751292665321, "grad_norm": 1.6288145874417448, "learning_rate": 8.82956104086434e-06, "loss": 0.4647, "step": 10185 }, { "epoch": 0.3001045917239957, "grad_norm": 1.4691023289971696, "learning_rate": 8.829230392735422e-06, "loss": 0.6071, "step": 10186 }, { "epoch": 0.3001340541814593, "grad_norm": 1.6451519379326836, "learning_rate": 8.828899704102073e-06, "loss": 0.5711, "step": 10187 }, { "epoch": 0.3001635166389229, "grad_norm": 1.4662797793286426, "learning_rate": 8.828568974967788e-06, "loss": 0.4257, "step": 10188 }, { "epoch": 0.3001929790963864, "grad_norm": 1.6174206179210093, "learning_rate": 8.828238205336069e-06, "loss": 0.5312, "step": 10189 }, { "epoch": 0.30022244155385003, "grad_norm": 1.4120837278854683, "learning_rate": 8.827907395210411e-06, "loss": 0.4286, "step": 10190 }, { "epoch": 0.3002519040113136, "grad_norm": 1.39233969830673, "learning_rate": 8.827576544594317e-06, "loss": 0.3996, "step": 10191 }, { "epoch": 0.3002813664687772, "grad_norm": 1.4316551142520249, "learning_rate": 8.827245653491283e-06, "loss": 0.4808, "step": 10192 }, { "epoch": 0.3003108289262407, "grad_norm": 1.4790617000806747, "learning_rate": 8.826914721904812e-06, "loss": 0.4102, "step": 10193 }, { "epoch": 0.3003402913837043, "grad_norm": 1.4854891482868458, "learning_rate": 8.826583749838403e-06, "loss": 0.5201, "step": 10194 }, { "epoch": 0.3003697538411679, "grad_norm": 1.883321802332551, "learning_rate": 8.826252737295557e-06, "loss": 0.42, "step": 10195 }, { "epoch": 0.3003992162986315, "grad_norm": 1.862576136519578, "learning_rate": 8.825921684279777e-06, "loss": 0.5469, "step": 10196 }, { "epoch": 0.300428678756095, "grad_norm": 1.5294701580896444, "learning_rate": 8.825590590794562e-06, "loss": 0.496, "step": 10197 }, { "epoch": 0.3004581412135586, "grad_norm": 3.0030945608445374, "learning_rate": 8.825259456843416e-06, "loss": 0.346, "step": 10198 }, { "epoch": 0.3004876036710222, "grad_norm": 1.55980237779248, "learning_rate": 8.82492828242984e-06, "loss": 0.5732, "step": 10199 }, { "epoch": 0.3005170661284858, "grad_norm": 1.8023752771640495, "learning_rate": 8.82459706755734e-06, "loss": 0.4658, "step": 10200 }, { "epoch": 0.3005465285859494, "grad_norm": 1.4836608345630318, "learning_rate": 8.824265812229418e-06, "loss": 0.3994, "step": 10201 }, { "epoch": 0.3005759910434129, "grad_norm": 1.38246602465308, "learning_rate": 8.823934516449578e-06, "loss": 0.4581, "step": 10202 }, { "epoch": 0.30060545350087653, "grad_norm": 1.643126781387586, "learning_rate": 8.823603180221323e-06, "loss": 0.4036, "step": 10203 }, { "epoch": 0.3006349159583401, "grad_norm": 1.5646707722069764, "learning_rate": 8.82327180354816e-06, "loss": 0.46, "step": 10204 }, { "epoch": 0.3006643784158037, "grad_norm": 1.5370546416859738, "learning_rate": 8.82294038643359e-06, "loss": 0.5664, "step": 10205 }, { "epoch": 0.3006938408732672, "grad_norm": 1.6007036341443233, "learning_rate": 8.822608928881124e-06, "loss": 0.357, "step": 10206 }, { "epoch": 0.30072330333073083, "grad_norm": 1.6171021678189008, "learning_rate": 8.822277430894267e-06, "loss": 0.4984, "step": 10207 }, { "epoch": 0.3007527657881944, "grad_norm": 1.631916027988567, "learning_rate": 8.821945892476521e-06, "loss": 0.6077, "step": 10208 }, { "epoch": 0.300782228245658, "grad_norm": 1.6023034163290593, "learning_rate": 8.8216143136314e-06, "loss": 0.4516, "step": 10209 }, { "epoch": 0.3008116907031215, "grad_norm": 1.4216796245804995, "learning_rate": 8.821282694362403e-06, "loss": 0.4789, "step": 10210 }, { "epoch": 0.30084115316058513, "grad_norm": 1.439129484627814, "learning_rate": 8.820951034673045e-06, "loss": 0.5091, "step": 10211 }, { "epoch": 0.3008706156180487, "grad_norm": 1.3422627653920078, "learning_rate": 8.820619334566832e-06, "loss": 0.5182, "step": 10212 }, { "epoch": 0.3009000780755123, "grad_norm": 1.5078503584160292, "learning_rate": 8.82028759404727e-06, "loss": 0.4703, "step": 10213 }, { "epoch": 0.3009295405329759, "grad_norm": 1.4669523201856798, "learning_rate": 8.81995581311787e-06, "loss": 0.5142, "step": 10214 }, { "epoch": 0.30095900299043943, "grad_norm": 1.4441849274161642, "learning_rate": 8.819623991782144e-06, "loss": 0.4062, "step": 10215 }, { "epoch": 0.30098846544790303, "grad_norm": 1.6963352304552874, "learning_rate": 8.819292130043598e-06, "loss": 0.5648, "step": 10216 }, { "epoch": 0.3010179279053666, "grad_norm": 1.4371470675853015, "learning_rate": 8.818960227905745e-06, "loss": 0.4366, "step": 10217 }, { "epoch": 0.3010473903628302, "grad_norm": 1.5849627021852555, "learning_rate": 8.818628285372092e-06, "loss": 0.5328, "step": 10218 }, { "epoch": 0.3010768528202937, "grad_norm": 1.675751060215743, "learning_rate": 8.818296302446154e-06, "loss": 0.5034, "step": 10219 }, { "epoch": 0.30110631527775733, "grad_norm": 1.6611402523546084, "learning_rate": 8.817964279131442e-06, "loss": 0.4877, "step": 10220 }, { "epoch": 0.3011357777352209, "grad_norm": 1.5462224173919916, "learning_rate": 8.817632215431467e-06, "loss": 0.387, "step": 10221 }, { "epoch": 0.3011652401926845, "grad_norm": 1.4924260198737354, "learning_rate": 8.817300111349743e-06, "loss": 0.3997, "step": 10222 }, { "epoch": 0.301194702650148, "grad_norm": 1.7251704811532595, "learning_rate": 8.81696796688978e-06, "loss": 0.5108, "step": 10223 }, { "epoch": 0.30122416510761163, "grad_norm": 1.5523949931732648, "learning_rate": 8.816635782055094e-06, "loss": 0.5262, "step": 10224 }, { "epoch": 0.3012536275650752, "grad_norm": 1.387411018395018, "learning_rate": 8.816303556849198e-06, "loss": 0.339, "step": 10225 }, { "epoch": 0.3012830900225388, "grad_norm": 1.597312459090087, "learning_rate": 8.815971291275605e-06, "loss": 0.6928, "step": 10226 }, { "epoch": 0.3013125524800024, "grad_norm": 1.4372330838626635, "learning_rate": 8.815638985337832e-06, "loss": 0.4538, "step": 10227 }, { "epoch": 0.30134201493746593, "grad_norm": 1.5718487066279867, "learning_rate": 8.815306639039392e-06, "loss": 0.5579, "step": 10228 }, { "epoch": 0.30137147739492953, "grad_norm": 1.6569051482755295, "learning_rate": 8.8149742523838e-06, "loss": 0.5887, "step": 10229 }, { "epoch": 0.3014009398523931, "grad_norm": 1.3796235370830139, "learning_rate": 8.814641825374575e-06, "loss": 0.3837, "step": 10230 }, { "epoch": 0.3014304023098567, "grad_norm": 1.4915512846874777, "learning_rate": 8.814309358015232e-06, "loss": 0.4909, "step": 10231 }, { "epoch": 0.30145986476732023, "grad_norm": 1.6887369875085587, "learning_rate": 8.813976850309284e-06, "loss": 0.5192, "step": 10232 }, { "epoch": 0.30148932722478383, "grad_norm": 1.4122899505379984, "learning_rate": 8.813644302260253e-06, "loss": 0.4531, "step": 10233 }, { "epoch": 0.3015187896822474, "grad_norm": 1.6310083667644242, "learning_rate": 8.813311713871656e-06, "loss": 0.4953, "step": 10234 }, { "epoch": 0.301548252139711, "grad_norm": 1.4070987365878898, "learning_rate": 8.812979085147007e-06, "loss": 0.4609, "step": 10235 }, { "epoch": 0.30157771459717453, "grad_norm": 1.6780413798839824, "learning_rate": 8.81264641608983e-06, "loss": 0.5278, "step": 10236 }, { "epoch": 0.30160717705463813, "grad_norm": 1.5139275260377294, "learning_rate": 8.81231370670364e-06, "loss": 0.4185, "step": 10237 }, { "epoch": 0.3016366395121017, "grad_norm": 1.3802754466006733, "learning_rate": 8.811980956991957e-06, "loss": 0.4591, "step": 10238 }, { "epoch": 0.3016661019695653, "grad_norm": 1.474706863181915, "learning_rate": 8.8116481669583e-06, "loss": 0.4353, "step": 10239 }, { "epoch": 0.3016955644270289, "grad_norm": 1.5507102111423625, "learning_rate": 8.811315336606191e-06, "loss": 0.5412, "step": 10240 }, { "epoch": 0.30172502688449243, "grad_norm": 1.7119931460852387, "learning_rate": 8.810982465939153e-06, "loss": 0.4243, "step": 10241 }, { "epoch": 0.30175448934195603, "grad_norm": 1.5322177165979074, "learning_rate": 8.8106495549607e-06, "loss": 0.5006, "step": 10242 }, { "epoch": 0.3017839517994196, "grad_norm": 1.4850866675890844, "learning_rate": 8.81031660367436e-06, "loss": 0.3463, "step": 10243 }, { "epoch": 0.3018134142568832, "grad_norm": 1.5947670240554768, "learning_rate": 8.809983612083652e-06, "loss": 0.5793, "step": 10244 }, { "epoch": 0.30184287671434673, "grad_norm": 1.6953436749048194, "learning_rate": 8.809650580192096e-06, "loss": 0.5491, "step": 10245 }, { "epoch": 0.30187233917181033, "grad_norm": 1.661458362753246, "learning_rate": 8.80931750800322e-06, "loss": 0.5887, "step": 10246 }, { "epoch": 0.3019018016292739, "grad_norm": 1.4883571570285694, "learning_rate": 8.808984395520543e-06, "loss": 0.4499, "step": 10247 }, { "epoch": 0.3019312640867375, "grad_norm": 1.6516246706313673, "learning_rate": 8.80865124274759e-06, "loss": 0.4643, "step": 10248 }, { "epoch": 0.30196072654420103, "grad_norm": 1.4452979520219797, "learning_rate": 8.808318049687885e-06, "loss": 0.447, "step": 10249 }, { "epoch": 0.30199018900166463, "grad_norm": 1.6949816760720249, "learning_rate": 8.807984816344953e-06, "loss": 0.5888, "step": 10250 }, { "epoch": 0.3020196514591282, "grad_norm": 1.5126649991737466, "learning_rate": 8.807651542722317e-06, "loss": 0.4468, "step": 10251 }, { "epoch": 0.3020491139165918, "grad_norm": 1.5433978767780114, "learning_rate": 8.807318228823503e-06, "loss": 0.5188, "step": 10252 }, { "epoch": 0.3020785763740554, "grad_norm": 1.5089612008689415, "learning_rate": 8.806984874652039e-06, "loss": 0.4715, "step": 10253 }, { "epoch": 0.30210803883151893, "grad_norm": 1.5873827289856695, "learning_rate": 8.806651480211447e-06, "loss": 0.596, "step": 10254 }, { "epoch": 0.30213750128898254, "grad_norm": 1.5985503805019667, "learning_rate": 8.806318045505258e-06, "loss": 0.425, "step": 10255 }, { "epoch": 0.3021669637464461, "grad_norm": 1.716897662969626, "learning_rate": 8.805984570536995e-06, "loss": 0.4396, "step": 10256 }, { "epoch": 0.3021964262039097, "grad_norm": 1.5216146571485025, "learning_rate": 8.805651055310186e-06, "loss": 0.5675, "step": 10257 }, { "epoch": 0.30222588866137323, "grad_norm": 1.5745468200222528, "learning_rate": 8.805317499828364e-06, "loss": 0.5777, "step": 10258 }, { "epoch": 0.30225535111883683, "grad_norm": 1.5078508553893297, "learning_rate": 8.80498390409505e-06, "loss": 0.5059, "step": 10259 }, { "epoch": 0.3022848135763004, "grad_norm": 1.6126356889307911, "learning_rate": 8.804650268113777e-06, "loss": 0.5613, "step": 10260 }, { "epoch": 0.302314276033764, "grad_norm": 1.561202410492067, "learning_rate": 8.804316591888072e-06, "loss": 0.503, "step": 10261 }, { "epoch": 0.30234373849122753, "grad_norm": 1.5887798040023178, "learning_rate": 8.803982875421466e-06, "loss": 0.6695, "step": 10262 }, { "epoch": 0.30237320094869113, "grad_norm": 1.5205846189893026, "learning_rate": 8.803649118717489e-06, "loss": 0.3713, "step": 10263 }, { "epoch": 0.3024026634061547, "grad_norm": 1.3472462298636514, "learning_rate": 8.80331532177967e-06, "loss": 0.404, "step": 10264 }, { "epoch": 0.3024321258636183, "grad_norm": 1.6508438774738352, "learning_rate": 8.802981484611542e-06, "loss": 0.5906, "step": 10265 }, { "epoch": 0.3024615883210819, "grad_norm": 1.549587700543363, "learning_rate": 8.802647607216634e-06, "loss": 0.5623, "step": 10266 }, { "epoch": 0.30249105077854543, "grad_norm": 1.6323593075051526, "learning_rate": 8.802313689598478e-06, "loss": 0.5717, "step": 10267 }, { "epoch": 0.30252051323600904, "grad_norm": 1.4315991746172196, "learning_rate": 8.801979731760607e-06, "loss": 0.5147, "step": 10268 }, { "epoch": 0.3025499756934726, "grad_norm": 1.7590152925807911, "learning_rate": 8.801645733706553e-06, "loss": 0.4307, "step": 10269 }, { "epoch": 0.3025794381509362, "grad_norm": 1.3758867877577485, "learning_rate": 8.801311695439849e-06, "loss": 0.3933, "step": 10270 }, { "epoch": 0.30260890060839973, "grad_norm": 1.4441074857116203, "learning_rate": 8.800977616964028e-06, "loss": 0.5152, "step": 10271 }, { "epoch": 0.30263836306586334, "grad_norm": 1.5120916462950358, "learning_rate": 8.800643498282625e-06, "loss": 0.4819, "step": 10272 }, { "epoch": 0.3026678255233269, "grad_norm": 1.3949914882660392, "learning_rate": 8.800309339399174e-06, "loss": 0.3828, "step": 10273 }, { "epoch": 0.3026972879807905, "grad_norm": 1.753042501158126, "learning_rate": 8.799975140317207e-06, "loss": 0.6424, "step": 10274 }, { "epoch": 0.30272675043825403, "grad_norm": 1.5214440938842102, "learning_rate": 8.799640901040262e-06, "loss": 0.5208, "step": 10275 }, { "epoch": 0.30275621289571764, "grad_norm": 1.4355676870340905, "learning_rate": 8.799306621571874e-06, "loss": 0.3432, "step": 10276 }, { "epoch": 0.3027856753531812, "grad_norm": 1.4347638705065395, "learning_rate": 8.798972301915578e-06, "loss": 0.5395, "step": 10277 }, { "epoch": 0.3028151378106448, "grad_norm": 1.7434211458707654, "learning_rate": 8.79863794207491e-06, "loss": 0.6985, "step": 10278 }, { "epoch": 0.3028446002681084, "grad_norm": 1.4498070257395435, "learning_rate": 8.798303542053408e-06, "loss": 0.4757, "step": 10279 }, { "epoch": 0.30287406272557194, "grad_norm": 1.482189449095672, "learning_rate": 8.79796910185461e-06, "loss": 0.4119, "step": 10280 }, { "epoch": 0.30290352518303554, "grad_norm": 1.6933916483437625, "learning_rate": 8.797634621482051e-06, "loss": 0.5071, "step": 10281 }, { "epoch": 0.3029329876404991, "grad_norm": 1.6313967620789616, "learning_rate": 8.79730010093927e-06, "loss": 0.5275, "step": 10282 }, { "epoch": 0.3029624500979627, "grad_norm": 1.5208189029313641, "learning_rate": 8.796965540229807e-06, "loss": 0.4498, "step": 10283 }, { "epoch": 0.30299191255542623, "grad_norm": 1.6732560872705136, "learning_rate": 8.7966309393572e-06, "loss": 0.537, "step": 10284 }, { "epoch": 0.30302137501288984, "grad_norm": 1.5987843946945943, "learning_rate": 8.796296298324987e-06, "loss": 0.5556, "step": 10285 }, { "epoch": 0.3030508374703534, "grad_norm": 1.5648102597034792, "learning_rate": 8.795961617136708e-06, "loss": 0.4043, "step": 10286 }, { "epoch": 0.303080299927817, "grad_norm": 1.6390536632985413, "learning_rate": 8.795626895795904e-06, "loss": 0.5197, "step": 10287 }, { "epoch": 0.30310976238528053, "grad_norm": 1.7731172235192285, "learning_rate": 8.795292134306117e-06, "loss": 0.6634, "step": 10288 }, { "epoch": 0.30313922484274414, "grad_norm": 1.3931671471367362, "learning_rate": 8.794957332670886e-06, "loss": 0.3305, "step": 10289 }, { "epoch": 0.3031686873002077, "grad_norm": 1.4854440167319665, "learning_rate": 8.794622490893752e-06, "loss": 0.4971, "step": 10290 }, { "epoch": 0.3031981497576713, "grad_norm": 1.5219355721525638, "learning_rate": 8.794287608978258e-06, "loss": 0.4171, "step": 10291 }, { "epoch": 0.3032276122151349, "grad_norm": 1.6979206887849574, "learning_rate": 8.793952686927946e-06, "loss": 0.4909, "step": 10292 }, { "epoch": 0.30325707467259844, "grad_norm": 1.5617147711886348, "learning_rate": 8.79361772474636e-06, "loss": 0.4608, "step": 10293 }, { "epoch": 0.30328653713006204, "grad_norm": 1.6564480153768282, "learning_rate": 8.79328272243704e-06, "loss": 0.6322, "step": 10294 }, { "epoch": 0.3033159995875256, "grad_norm": 1.6250793916960167, "learning_rate": 8.792947680003532e-06, "loss": 0.5019, "step": 10295 }, { "epoch": 0.3033454620449892, "grad_norm": 1.4461258054678232, "learning_rate": 8.792612597449379e-06, "loss": 0.4441, "step": 10296 }, { "epoch": 0.30337492450245274, "grad_norm": 1.448823936497086, "learning_rate": 8.792277474778128e-06, "loss": 0.4255, "step": 10297 }, { "epoch": 0.30340438695991634, "grad_norm": 1.544081035094425, "learning_rate": 8.791942311993318e-06, "loss": 0.399, "step": 10298 }, { "epoch": 0.3034338494173799, "grad_norm": 1.5334402314645934, "learning_rate": 8.7916071090985e-06, "loss": 0.4294, "step": 10299 }, { "epoch": 0.3034633118748435, "grad_norm": 1.8910435401005168, "learning_rate": 8.791271866097217e-06, "loss": 0.5669, "step": 10300 }, { "epoch": 0.30349277433230704, "grad_norm": 1.5488746507287743, "learning_rate": 8.790936582993017e-06, "loss": 0.5432, "step": 10301 }, { "epoch": 0.30352223678977064, "grad_norm": 1.4719988740834153, "learning_rate": 8.790601259789445e-06, "loss": 0.4515, "step": 10302 }, { "epoch": 0.3035516992472342, "grad_norm": 1.6120684014106363, "learning_rate": 8.790265896490046e-06, "loss": 0.4549, "step": 10303 }, { "epoch": 0.3035811617046978, "grad_norm": 1.739511498417752, "learning_rate": 8.789930493098371e-06, "loss": 0.6207, "step": 10304 }, { "epoch": 0.3036106241621614, "grad_norm": 1.62729458732123, "learning_rate": 8.789595049617966e-06, "loss": 0.5021, "step": 10305 }, { "epoch": 0.30364008661962494, "grad_norm": 1.6466915804280877, "learning_rate": 8.78925956605238e-06, "loss": 0.4977, "step": 10306 }, { "epoch": 0.30366954907708854, "grad_norm": 1.7850209395366678, "learning_rate": 8.78892404240516e-06, "loss": 0.4068, "step": 10307 }, { "epoch": 0.3036990115345521, "grad_norm": 1.5849755252482598, "learning_rate": 8.788588478679857e-06, "loss": 0.4887, "step": 10308 }, { "epoch": 0.3037284739920157, "grad_norm": 1.384468973661438, "learning_rate": 8.788252874880018e-06, "loss": 0.4335, "step": 10309 }, { "epoch": 0.30375793644947924, "grad_norm": 1.3626842973212576, "learning_rate": 8.787917231009198e-06, "loss": 0.4971, "step": 10310 }, { "epoch": 0.30378739890694284, "grad_norm": 1.8776299281214406, "learning_rate": 8.787581547070942e-06, "loss": 0.5311, "step": 10311 }, { "epoch": 0.3038168613644064, "grad_norm": 1.6164148916487817, "learning_rate": 8.787245823068802e-06, "loss": 0.4452, "step": 10312 }, { "epoch": 0.30384632382187, "grad_norm": 1.513550218322187, "learning_rate": 8.78691005900633e-06, "loss": 0.5191, "step": 10313 }, { "epoch": 0.30387578627933354, "grad_norm": 1.351221666725529, "learning_rate": 8.78657425488708e-06, "loss": 0.3755, "step": 10314 }, { "epoch": 0.30390524873679714, "grad_norm": 1.3575700026620452, "learning_rate": 8.786238410714599e-06, "loss": 0.3285, "step": 10315 }, { "epoch": 0.3039347111942607, "grad_norm": 1.5130160382423063, "learning_rate": 8.785902526492441e-06, "loss": 0.4822, "step": 10316 }, { "epoch": 0.3039641736517243, "grad_norm": 1.547830626259322, "learning_rate": 8.785566602224162e-06, "loss": 0.4905, "step": 10317 }, { "epoch": 0.3039936361091879, "grad_norm": 1.4713764087037915, "learning_rate": 8.785230637913312e-06, "loss": 0.4887, "step": 10318 }, { "epoch": 0.30402309856665144, "grad_norm": 1.5900381738874318, "learning_rate": 8.784894633563448e-06, "loss": 0.4337, "step": 10319 }, { "epoch": 0.30405256102411504, "grad_norm": 1.5431303907367941, "learning_rate": 8.784558589178121e-06, "loss": 0.5386, "step": 10320 }, { "epoch": 0.3040820234815786, "grad_norm": 1.8204634441138698, "learning_rate": 8.784222504760884e-06, "loss": 0.611, "step": 10321 }, { "epoch": 0.3041114859390422, "grad_norm": 1.7625265847859495, "learning_rate": 8.783886380315297e-06, "loss": 0.5558, "step": 10322 }, { "epoch": 0.30414094839650574, "grad_norm": 1.6655228649116913, "learning_rate": 8.783550215844913e-06, "loss": 0.4678, "step": 10323 }, { "epoch": 0.30417041085396934, "grad_norm": 1.4474076709726458, "learning_rate": 8.783214011353288e-06, "loss": 0.4974, "step": 10324 }, { "epoch": 0.3041998733114329, "grad_norm": 1.643591771001919, "learning_rate": 8.782877766843978e-06, "loss": 0.5258, "step": 10325 }, { "epoch": 0.3042293357688965, "grad_norm": 1.5304059910034327, "learning_rate": 8.78254148232054e-06, "loss": 0.5359, "step": 10326 }, { "epoch": 0.30425879822636004, "grad_norm": 1.4216161602392448, "learning_rate": 8.78220515778653e-06, "loss": 0.3604, "step": 10327 }, { "epoch": 0.30428826068382364, "grad_norm": 1.4825721924611142, "learning_rate": 8.781868793245507e-06, "loss": 0.4558, "step": 10328 }, { "epoch": 0.3043177231412872, "grad_norm": 1.4935610460317814, "learning_rate": 8.781532388701029e-06, "loss": 0.4387, "step": 10329 }, { "epoch": 0.3043471855987508, "grad_norm": 1.5982673907658633, "learning_rate": 8.781195944156653e-06, "loss": 0.5575, "step": 10330 }, { "epoch": 0.3043766480562144, "grad_norm": 1.7371606825561294, "learning_rate": 8.78085945961594e-06, "loss": 0.5566, "step": 10331 }, { "epoch": 0.30440611051367794, "grad_norm": 1.4522870287299041, "learning_rate": 8.780522935082448e-06, "loss": 0.5214, "step": 10332 }, { "epoch": 0.30443557297114154, "grad_norm": 1.6399813307958704, "learning_rate": 8.780186370559734e-06, "loss": 0.5976, "step": 10333 }, { "epoch": 0.3044650354286051, "grad_norm": 1.4404170028451817, "learning_rate": 8.779849766051362e-06, "loss": 0.4205, "step": 10334 }, { "epoch": 0.3044944978860687, "grad_norm": 1.6018478896973245, "learning_rate": 8.779513121560892e-06, "loss": 0.5152, "step": 10335 }, { "epoch": 0.30452396034353224, "grad_norm": 1.6508682554320944, "learning_rate": 8.779176437091882e-06, "loss": 0.3978, "step": 10336 }, { "epoch": 0.30455342280099584, "grad_norm": 1.6051662175516825, "learning_rate": 8.778839712647899e-06, "loss": 0.5267, "step": 10337 }, { "epoch": 0.3045828852584594, "grad_norm": 1.9689041995063088, "learning_rate": 8.7785029482325e-06, "loss": 0.5206, "step": 10338 }, { "epoch": 0.304612347715923, "grad_norm": 1.4894595847192909, "learning_rate": 8.778166143849244e-06, "loss": 0.5937, "step": 10339 }, { "epoch": 0.30464181017338654, "grad_norm": 1.5654955316358135, "learning_rate": 8.777829299501704e-06, "loss": 0.3869, "step": 10340 }, { "epoch": 0.30467127263085014, "grad_norm": 1.53353773557505, "learning_rate": 8.777492415193433e-06, "loss": 0.3965, "step": 10341 }, { "epoch": 0.3047007350883137, "grad_norm": 1.6477664058370536, "learning_rate": 8.777155490928e-06, "loss": 0.5225, "step": 10342 }, { "epoch": 0.3047301975457773, "grad_norm": 1.8382830481199008, "learning_rate": 8.776818526708967e-06, "loss": 0.6678, "step": 10343 }, { "epoch": 0.3047596600032409, "grad_norm": 1.4228873139038398, "learning_rate": 8.7764815225399e-06, "loss": 0.4283, "step": 10344 }, { "epoch": 0.30478912246070444, "grad_norm": 1.688604941186952, "learning_rate": 8.77614447842436e-06, "loss": 0.564, "step": 10345 }, { "epoch": 0.30481858491816805, "grad_norm": 1.6752052322078428, "learning_rate": 8.775807394365917e-06, "loss": 0.4315, "step": 10346 }, { "epoch": 0.3048480473756316, "grad_norm": 1.3897695837701185, "learning_rate": 8.775470270368132e-06, "loss": 0.4206, "step": 10347 }, { "epoch": 0.3048775098330952, "grad_norm": 1.4532365001000158, "learning_rate": 8.775133106434572e-06, "loss": 0.4706, "step": 10348 }, { "epoch": 0.30490697229055874, "grad_norm": 1.3887742504591987, "learning_rate": 8.774795902568807e-06, "loss": 0.4958, "step": 10349 }, { "epoch": 0.30493643474802234, "grad_norm": 1.3767106735919914, "learning_rate": 8.7744586587744e-06, "loss": 0.4079, "step": 10350 }, { "epoch": 0.3049658972054859, "grad_norm": 1.7259890028571636, "learning_rate": 8.77412137505492e-06, "loss": 0.6574, "step": 10351 }, { "epoch": 0.3049953596629495, "grad_norm": 1.6442769339971988, "learning_rate": 8.773784051413936e-06, "loss": 0.4405, "step": 10352 }, { "epoch": 0.30502482212041304, "grad_norm": 1.5090756248452128, "learning_rate": 8.773446687855013e-06, "loss": 0.4783, "step": 10353 }, { "epoch": 0.30505428457787664, "grad_norm": 1.4755662670110963, "learning_rate": 8.77310928438172e-06, "loss": 0.4513, "step": 10354 }, { "epoch": 0.3050837470353402, "grad_norm": 1.5198471733455419, "learning_rate": 8.77277184099763e-06, "loss": 0.4475, "step": 10355 }, { "epoch": 0.3051132094928038, "grad_norm": 2.128914964846226, "learning_rate": 8.772434357706305e-06, "loss": 0.7837, "step": 10356 }, { "epoch": 0.3051426719502674, "grad_norm": 1.5921277306035766, "learning_rate": 8.772096834511321e-06, "loss": 0.4345, "step": 10357 }, { "epoch": 0.30517213440773094, "grad_norm": 1.4424820475784719, "learning_rate": 8.771759271416247e-06, "loss": 0.4696, "step": 10358 }, { "epoch": 0.30520159686519455, "grad_norm": 1.624149667315738, "learning_rate": 8.771421668424652e-06, "loss": 0.5025, "step": 10359 }, { "epoch": 0.3052310593226581, "grad_norm": 1.7117042169897323, "learning_rate": 8.77108402554011e-06, "loss": 0.6146, "step": 10360 }, { "epoch": 0.3052605217801217, "grad_norm": 1.4979314751011068, "learning_rate": 8.770746342766188e-06, "loss": 0.508, "step": 10361 }, { "epoch": 0.30528998423758524, "grad_norm": 1.5032394161468403, "learning_rate": 8.770408620106461e-06, "loss": 0.4075, "step": 10362 }, { "epoch": 0.30531944669504885, "grad_norm": 1.675876193223602, "learning_rate": 8.7700708575645e-06, "loss": 0.5868, "step": 10363 }, { "epoch": 0.3053489091525124, "grad_norm": 1.6783933792132086, "learning_rate": 8.769733055143878e-06, "loss": 0.4873, "step": 10364 }, { "epoch": 0.305378371609976, "grad_norm": 1.6034373102967683, "learning_rate": 8.76939521284817e-06, "loss": 0.4679, "step": 10365 }, { "epoch": 0.30540783406743954, "grad_norm": 1.6485093420074384, "learning_rate": 8.769057330680947e-06, "loss": 0.5618, "step": 10366 }, { "epoch": 0.30543729652490315, "grad_norm": 1.453177508462101, "learning_rate": 8.768719408645787e-06, "loss": 0.4524, "step": 10367 }, { "epoch": 0.3054667589823667, "grad_norm": 1.5485711209076791, "learning_rate": 8.768381446746261e-06, "loss": 0.4762, "step": 10368 }, { "epoch": 0.3054962214398303, "grad_norm": 1.5572353492235664, "learning_rate": 8.768043444985942e-06, "loss": 0.4732, "step": 10369 }, { "epoch": 0.3055256838972939, "grad_norm": 1.5722865418342036, "learning_rate": 8.767705403368409e-06, "loss": 0.3576, "step": 10370 }, { "epoch": 0.30555514635475745, "grad_norm": 1.5991950516094269, "learning_rate": 8.767367321897238e-06, "loss": 0.5197, "step": 10371 }, { "epoch": 0.30558460881222105, "grad_norm": 1.4831526665429724, "learning_rate": 8.767029200576001e-06, "loss": 0.5128, "step": 10372 }, { "epoch": 0.3056140712696846, "grad_norm": 1.7217702862781519, "learning_rate": 8.76669103940828e-06, "loss": 0.4363, "step": 10373 }, { "epoch": 0.3056435337271482, "grad_norm": 1.3906450434738549, "learning_rate": 8.766352838397646e-06, "loss": 0.3928, "step": 10374 }, { "epoch": 0.30567299618461174, "grad_norm": 1.7308511988185442, "learning_rate": 8.76601459754768e-06, "loss": 0.5059, "step": 10375 }, { "epoch": 0.30570245864207535, "grad_norm": 1.6185103165460393, "learning_rate": 8.765676316861962e-06, "loss": 0.5304, "step": 10376 }, { "epoch": 0.3057319210995389, "grad_norm": 1.7653862251752335, "learning_rate": 8.765337996344065e-06, "loss": 0.5955, "step": 10377 }, { "epoch": 0.3057613835570025, "grad_norm": 1.5242497501506675, "learning_rate": 8.76499963599757e-06, "loss": 0.3573, "step": 10378 }, { "epoch": 0.30579084601446604, "grad_norm": 1.4971481695667073, "learning_rate": 8.764661235826059e-06, "loss": 0.5258, "step": 10379 }, { "epoch": 0.30582030847192965, "grad_norm": 1.636986276526935, "learning_rate": 8.764322795833106e-06, "loss": 0.5504, "step": 10380 }, { "epoch": 0.3058497709293932, "grad_norm": 1.5050128714795523, "learning_rate": 8.763984316022296e-06, "loss": 0.4331, "step": 10381 }, { "epoch": 0.3058792333868568, "grad_norm": 1.7641988760159857, "learning_rate": 8.763645796397206e-06, "loss": 0.5705, "step": 10382 }, { "epoch": 0.3059086958443204, "grad_norm": 1.5465799225599874, "learning_rate": 8.763307236961416e-06, "loss": 0.5421, "step": 10383 }, { "epoch": 0.30593815830178395, "grad_norm": 1.4889968332055612, "learning_rate": 8.762968637718512e-06, "loss": 0.534, "step": 10384 }, { "epoch": 0.30596762075924755, "grad_norm": 1.4789021160718292, "learning_rate": 8.762629998672071e-06, "loss": 0.521, "step": 10385 }, { "epoch": 0.3059970832167111, "grad_norm": 1.4822707391329273, "learning_rate": 8.762291319825676e-06, "loss": 0.4311, "step": 10386 }, { "epoch": 0.3060265456741747, "grad_norm": 1.533593992207808, "learning_rate": 8.76195260118291e-06, "loss": 0.564, "step": 10387 }, { "epoch": 0.30605600813163825, "grad_norm": 1.447274764011229, "learning_rate": 8.761613842747357e-06, "loss": 0.4424, "step": 10388 }, { "epoch": 0.30608547058910185, "grad_norm": 1.6473679635570366, "learning_rate": 8.7612750445226e-06, "loss": 0.4989, "step": 10389 }, { "epoch": 0.3061149330465654, "grad_norm": 1.4144454298101803, "learning_rate": 8.76093620651222e-06, "loss": 0.4338, "step": 10390 }, { "epoch": 0.306144395504029, "grad_norm": 1.4991750057104742, "learning_rate": 8.760597328719803e-06, "loss": 0.4101, "step": 10391 }, { "epoch": 0.30617385796149255, "grad_norm": 1.3003891288751401, "learning_rate": 8.760258411148934e-06, "loss": 0.4285, "step": 10392 }, { "epoch": 0.30620332041895615, "grad_norm": 1.4350640996702568, "learning_rate": 8.759919453803198e-06, "loss": 0.4451, "step": 10393 }, { "epoch": 0.3062327828764197, "grad_norm": 1.644001365527105, "learning_rate": 8.759580456686179e-06, "loss": 0.5816, "step": 10394 }, { "epoch": 0.3062622453338833, "grad_norm": 1.5809928102385624, "learning_rate": 8.759241419801465e-06, "loss": 0.5068, "step": 10395 }, { "epoch": 0.3062917077913469, "grad_norm": 1.5396994052901019, "learning_rate": 8.75890234315264e-06, "loss": 0.4875, "step": 10396 }, { "epoch": 0.30632117024881045, "grad_norm": 1.6206752198631507, "learning_rate": 8.758563226743291e-06, "loss": 0.5962, "step": 10397 }, { "epoch": 0.30635063270627405, "grad_norm": 1.5433969876701217, "learning_rate": 8.758224070577005e-06, "loss": 0.4778, "step": 10398 }, { "epoch": 0.3063800951637376, "grad_norm": 1.6979709419380637, "learning_rate": 8.757884874657373e-06, "loss": 0.4847, "step": 10399 }, { "epoch": 0.3064095576212012, "grad_norm": 1.5211252415519423, "learning_rate": 8.75754563898798e-06, "loss": 0.3977, "step": 10400 }, { "epoch": 0.30643902007866475, "grad_norm": 1.4267187816051792, "learning_rate": 8.75720636357241e-06, "loss": 0.4542, "step": 10401 }, { "epoch": 0.30646848253612835, "grad_norm": 1.6688374919735225, "learning_rate": 8.75686704841426e-06, "loss": 0.5483, "step": 10402 }, { "epoch": 0.3064979449935919, "grad_norm": 1.5182672554150343, "learning_rate": 8.756527693517115e-06, "loss": 0.4789, "step": 10403 }, { "epoch": 0.3065274074510555, "grad_norm": 1.465437857887904, "learning_rate": 8.756188298884564e-06, "loss": 0.4501, "step": 10404 }, { "epoch": 0.30655686990851905, "grad_norm": 1.5397654792492266, "learning_rate": 8.7558488645202e-06, "loss": 0.4431, "step": 10405 }, { "epoch": 0.30658633236598265, "grad_norm": 1.4738232043839534, "learning_rate": 8.755509390427608e-06, "loss": 0.5207, "step": 10406 }, { "epoch": 0.3066157948234462, "grad_norm": 1.5298443643150355, "learning_rate": 8.755169876610383e-06, "loss": 0.4288, "step": 10407 }, { "epoch": 0.3066452572809098, "grad_norm": 1.630309426921353, "learning_rate": 8.754830323072117e-06, "loss": 0.5249, "step": 10408 }, { "epoch": 0.3066747197383734, "grad_norm": 1.8301911033808325, "learning_rate": 8.7544907298164e-06, "loss": 0.4443, "step": 10409 }, { "epoch": 0.30670418219583695, "grad_norm": 1.7154370563741883, "learning_rate": 8.754151096846824e-06, "loss": 0.514, "step": 10410 }, { "epoch": 0.30673364465330055, "grad_norm": 1.6109335699652518, "learning_rate": 8.75381142416698e-06, "loss": 0.5849, "step": 10411 }, { "epoch": 0.3067631071107641, "grad_norm": 1.6781302828478566, "learning_rate": 8.753471711780465e-06, "loss": 0.5582, "step": 10412 }, { "epoch": 0.3067925695682277, "grad_norm": 1.4375656418660585, "learning_rate": 8.753131959690868e-06, "loss": 0.5167, "step": 10413 }, { "epoch": 0.30682203202569125, "grad_norm": 1.7761568993182315, "learning_rate": 8.752792167901786e-06, "loss": 0.4813, "step": 10414 }, { "epoch": 0.30685149448315485, "grad_norm": 1.7505156835734639, "learning_rate": 8.752452336416814e-06, "loss": 0.4809, "step": 10415 }, { "epoch": 0.3068809569406184, "grad_norm": 1.6028193867191096, "learning_rate": 8.752112465239541e-06, "loss": 0.5071, "step": 10416 }, { "epoch": 0.306910419398082, "grad_norm": 1.6631956148251474, "learning_rate": 8.75177255437357e-06, "loss": 0.5463, "step": 10417 }, { "epoch": 0.30693988185554555, "grad_norm": 1.7548099410105014, "learning_rate": 8.75143260382249e-06, "loss": 0.517, "step": 10418 }, { "epoch": 0.30696934431300915, "grad_norm": 1.3967790012215384, "learning_rate": 8.7510926135899e-06, "loss": 0.5181, "step": 10419 }, { "epoch": 0.3069988067704727, "grad_norm": 1.5070792176674892, "learning_rate": 8.750752583679394e-06, "loss": 0.5772, "step": 10420 }, { "epoch": 0.3070282692279363, "grad_norm": 1.764168403844127, "learning_rate": 8.750412514094572e-06, "loss": 0.4907, "step": 10421 }, { "epoch": 0.3070577316853999, "grad_norm": 1.7236959980091675, "learning_rate": 8.750072404839029e-06, "loss": 0.4588, "step": 10422 }, { "epoch": 0.30708719414286345, "grad_norm": 1.4528558363241169, "learning_rate": 8.749732255916363e-06, "loss": 0.3965, "step": 10423 }, { "epoch": 0.30711665660032705, "grad_norm": 1.5241107332032366, "learning_rate": 8.74939206733017e-06, "loss": 0.631, "step": 10424 }, { "epoch": 0.3071461190577906, "grad_norm": 1.6007573776611412, "learning_rate": 8.749051839084053e-06, "loss": 0.4775, "step": 10425 }, { "epoch": 0.3071755815152542, "grad_norm": 1.3453374296067075, "learning_rate": 8.748711571181608e-06, "loss": 0.3234, "step": 10426 }, { "epoch": 0.30720504397271775, "grad_norm": 1.5925869169010414, "learning_rate": 8.748371263626433e-06, "loss": 0.6403, "step": 10427 }, { "epoch": 0.30723450643018135, "grad_norm": 1.6417377139914977, "learning_rate": 8.74803091642213e-06, "loss": 0.5472, "step": 10428 }, { "epoch": 0.3072639688876449, "grad_norm": 1.5206025713970353, "learning_rate": 8.747690529572297e-06, "loss": 0.5203, "step": 10429 }, { "epoch": 0.3072934313451085, "grad_norm": 1.4428160984121972, "learning_rate": 8.747350103080537e-06, "loss": 0.4871, "step": 10430 }, { "epoch": 0.30732289380257205, "grad_norm": 1.8308989157300966, "learning_rate": 8.747009636950449e-06, "loss": 0.534, "step": 10431 }, { "epoch": 0.30735235626003565, "grad_norm": 1.6305816262213952, "learning_rate": 8.746669131185636e-06, "loss": 0.4561, "step": 10432 }, { "epoch": 0.3073818187174992, "grad_norm": 1.490801793890586, "learning_rate": 8.746328585789698e-06, "loss": 0.3607, "step": 10433 }, { "epoch": 0.3074112811749628, "grad_norm": 1.4168226872168392, "learning_rate": 8.745988000766239e-06, "loss": 0.3501, "step": 10434 }, { "epoch": 0.3074407436324264, "grad_norm": 1.8762592179099635, "learning_rate": 8.74564737611886e-06, "loss": 0.4688, "step": 10435 }, { "epoch": 0.30747020608988995, "grad_norm": 1.5055546071467643, "learning_rate": 8.745306711851163e-06, "loss": 0.4686, "step": 10436 }, { "epoch": 0.30749966854735356, "grad_norm": 1.5037524819987933, "learning_rate": 8.744966007966754e-06, "loss": 0.4839, "step": 10437 }, { "epoch": 0.3075291310048171, "grad_norm": 1.2314715168499244, "learning_rate": 8.744625264469237e-06, "loss": 0.3067, "step": 10438 }, { "epoch": 0.3075585934622807, "grad_norm": 1.4716073705515473, "learning_rate": 8.744284481362214e-06, "loss": 0.3987, "step": 10439 }, { "epoch": 0.30758805591974425, "grad_norm": 1.724708236216806, "learning_rate": 8.743943658649291e-06, "loss": 0.4498, "step": 10440 }, { "epoch": 0.30761751837720785, "grad_norm": 1.7509344935381737, "learning_rate": 8.743602796334074e-06, "loss": 0.5457, "step": 10441 }, { "epoch": 0.3076469808346714, "grad_norm": 1.4952849927836889, "learning_rate": 8.743261894420165e-06, "loss": 0.4262, "step": 10442 }, { "epoch": 0.307676443292135, "grad_norm": 1.4903157014707278, "learning_rate": 8.742920952911177e-06, "loss": 0.4365, "step": 10443 }, { "epoch": 0.30770590574959855, "grad_norm": 1.4563882217687358, "learning_rate": 8.742579971810708e-06, "loss": 0.3255, "step": 10444 }, { "epoch": 0.30773536820706215, "grad_norm": 1.572653230320587, "learning_rate": 8.742238951122368e-06, "loss": 0.4906, "step": 10445 }, { "epoch": 0.3077648306645257, "grad_norm": 1.676821490857114, "learning_rate": 8.741897890849767e-06, "loss": 0.3658, "step": 10446 }, { "epoch": 0.3077942931219893, "grad_norm": 1.796467843067926, "learning_rate": 8.741556790996511e-06, "loss": 0.5083, "step": 10447 }, { "epoch": 0.3078237555794529, "grad_norm": 1.695829881679977, "learning_rate": 8.741215651566206e-06, "loss": 0.4678, "step": 10448 }, { "epoch": 0.30785321803691645, "grad_norm": 1.7545284195306963, "learning_rate": 8.740874472562461e-06, "loss": 0.607, "step": 10449 }, { "epoch": 0.30788268049438006, "grad_norm": 1.5454812949546137, "learning_rate": 8.74053325398889e-06, "loss": 0.5523, "step": 10450 }, { "epoch": 0.3079121429518436, "grad_norm": 1.5601100883687873, "learning_rate": 8.740191995849094e-06, "loss": 0.5732, "step": 10451 }, { "epoch": 0.3079416054093072, "grad_norm": 1.7017306644474268, "learning_rate": 8.739850698146689e-06, "loss": 0.5012, "step": 10452 }, { "epoch": 0.30797106786677075, "grad_norm": 1.6975183157663045, "learning_rate": 8.739509360885283e-06, "loss": 0.4269, "step": 10453 }, { "epoch": 0.30800053032423436, "grad_norm": 1.5030061484894803, "learning_rate": 8.739167984068486e-06, "loss": 0.4859, "step": 10454 }, { "epoch": 0.3080299927816979, "grad_norm": 1.5983722901119732, "learning_rate": 8.738826567699909e-06, "loss": 0.455, "step": 10455 }, { "epoch": 0.3080594552391615, "grad_norm": 1.7711605223718363, "learning_rate": 8.738485111783165e-06, "loss": 0.5883, "step": 10456 }, { "epoch": 0.30808891769662505, "grad_norm": 1.5877601905045033, "learning_rate": 8.738143616321865e-06, "loss": 0.524, "step": 10457 }, { "epoch": 0.30811838015408866, "grad_norm": 1.5485648670621988, "learning_rate": 8.737802081319622e-06, "loss": 0.4005, "step": 10458 }, { "epoch": 0.3081478426115522, "grad_norm": 1.3974437924002763, "learning_rate": 8.737460506780047e-06, "loss": 0.4258, "step": 10459 }, { "epoch": 0.3081773050690158, "grad_norm": 1.5155106317533424, "learning_rate": 8.737118892706753e-06, "loss": 0.3711, "step": 10460 }, { "epoch": 0.3082067675264794, "grad_norm": 1.651905364905209, "learning_rate": 8.736777239103353e-06, "loss": 0.5888, "step": 10461 }, { "epoch": 0.30823622998394296, "grad_norm": 1.8382949384366765, "learning_rate": 8.736435545973466e-06, "loss": 0.5753, "step": 10462 }, { "epoch": 0.30826569244140656, "grad_norm": 1.8023629850555765, "learning_rate": 8.736093813320699e-06, "loss": 0.5871, "step": 10463 }, { "epoch": 0.3082951548988701, "grad_norm": 1.5913916128005312, "learning_rate": 8.735752041148671e-06, "loss": 0.5078, "step": 10464 }, { "epoch": 0.3083246173563337, "grad_norm": 1.4756102503984194, "learning_rate": 8.735410229460999e-06, "loss": 0.5185, "step": 10465 }, { "epoch": 0.30835407981379725, "grad_norm": 1.6893377467162847, "learning_rate": 8.735068378261292e-06, "loss": 0.5259, "step": 10466 }, { "epoch": 0.30838354227126086, "grad_norm": 1.5745124366944676, "learning_rate": 8.734726487553173e-06, "loss": 0.5371, "step": 10467 }, { "epoch": 0.3084130047287244, "grad_norm": 1.5467482494855302, "learning_rate": 8.734384557340254e-06, "loss": 0.6426, "step": 10468 }, { "epoch": 0.308442467186188, "grad_norm": 1.6909515445941847, "learning_rate": 8.734042587626152e-06, "loss": 0.518, "step": 10469 }, { "epoch": 0.30847192964365155, "grad_norm": 1.5015958615169205, "learning_rate": 8.733700578414487e-06, "loss": 0.3589, "step": 10470 }, { "epoch": 0.30850139210111516, "grad_norm": 1.5114110986994207, "learning_rate": 8.733358529708876e-06, "loss": 0.4514, "step": 10471 }, { "epoch": 0.3085308545585787, "grad_norm": 1.4470521630544946, "learning_rate": 8.733016441512935e-06, "loss": 0.3801, "step": 10472 }, { "epoch": 0.3085603170160423, "grad_norm": 1.613711968674579, "learning_rate": 8.732674313830285e-06, "loss": 0.6553, "step": 10473 }, { "epoch": 0.3085897794735059, "grad_norm": 1.6967764762058974, "learning_rate": 8.732332146664545e-06, "loss": 0.6601, "step": 10474 }, { "epoch": 0.30861924193096946, "grad_norm": 1.5654917178284964, "learning_rate": 8.73198994001933e-06, "loss": 0.5252, "step": 10475 }, { "epoch": 0.30864870438843306, "grad_norm": 1.4254543750519821, "learning_rate": 8.731647693898264e-06, "loss": 0.4974, "step": 10476 }, { "epoch": 0.3086781668458966, "grad_norm": 1.5774307560791985, "learning_rate": 8.731305408304964e-06, "loss": 0.4653, "step": 10477 }, { "epoch": 0.3087076293033602, "grad_norm": 1.7106251950693159, "learning_rate": 8.730963083243054e-06, "loss": 0.6694, "step": 10478 }, { "epoch": 0.30873709176082376, "grad_norm": 1.3102172610388463, "learning_rate": 8.730620718716154e-06, "loss": 0.4138, "step": 10479 }, { "epoch": 0.30876655421828736, "grad_norm": 1.7282817873209786, "learning_rate": 8.730278314727885e-06, "loss": 0.5705, "step": 10480 }, { "epoch": 0.3087960166757509, "grad_norm": 1.8215659648212796, "learning_rate": 8.729935871281869e-06, "loss": 0.6056, "step": 10481 }, { "epoch": 0.3088254791332145, "grad_norm": 1.5437378571687932, "learning_rate": 8.729593388381728e-06, "loss": 0.4681, "step": 10482 }, { "epoch": 0.30885494159067806, "grad_norm": 1.692417032617712, "learning_rate": 8.729250866031084e-06, "loss": 0.4349, "step": 10483 }, { "epoch": 0.30888440404814166, "grad_norm": 1.413239734449131, "learning_rate": 8.728908304233561e-06, "loss": 0.4263, "step": 10484 }, { "epoch": 0.3089138665056052, "grad_norm": 1.5429361332632268, "learning_rate": 8.728565702992783e-06, "loss": 0.4348, "step": 10485 }, { "epoch": 0.3089433289630688, "grad_norm": 1.6841248220009333, "learning_rate": 8.728223062312374e-06, "loss": 0.5784, "step": 10486 }, { "epoch": 0.3089727914205324, "grad_norm": 1.6318736464066494, "learning_rate": 8.727880382195955e-06, "loss": 0.5035, "step": 10487 }, { "epoch": 0.30900225387799596, "grad_norm": 1.7479321278895403, "learning_rate": 8.727537662647156e-06, "loss": 0.5015, "step": 10488 }, { "epoch": 0.30903171633545956, "grad_norm": 1.461218975679201, "learning_rate": 8.727194903669597e-06, "loss": 0.5407, "step": 10489 }, { "epoch": 0.3090611787929231, "grad_norm": 1.656986466337603, "learning_rate": 8.72685210526691e-06, "loss": 0.4706, "step": 10490 }, { "epoch": 0.3090906412503867, "grad_norm": 1.4566383616468443, "learning_rate": 8.726509267442715e-06, "loss": 0.4115, "step": 10491 }, { "epoch": 0.30912010370785026, "grad_norm": 1.4985358568727263, "learning_rate": 8.72616639020064e-06, "loss": 0.4843, "step": 10492 }, { "epoch": 0.30914956616531386, "grad_norm": 1.5182730271954405, "learning_rate": 8.725823473544313e-06, "loss": 0.4853, "step": 10493 }, { "epoch": 0.3091790286227774, "grad_norm": 1.7011057745453624, "learning_rate": 8.72548051747736e-06, "loss": 0.5423, "step": 10494 }, { "epoch": 0.309208491080241, "grad_norm": 1.6763869522604096, "learning_rate": 8.725137522003412e-06, "loss": 0.5578, "step": 10495 }, { "epoch": 0.30923795353770456, "grad_norm": 1.8263207272328592, "learning_rate": 8.724794487126093e-06, "loss": 0.5742, "step": 10496 }, { "epoch": 0.30926741599516816, "grad_norm": 1.6055930136943604, "learning_rate": 8.724451412849032e-06, "loss": 0.5507, "step": 10497 }, { "epoch": 0.3092968784526317, "grad_norm": 1.4657176772159692, "learning_rate": 8.724108299175862e-06, "loss": 0.4509, "step": 10498 }, { "epoch": 0.3093263409100953, "grad_norm": 1.6535152518176868, "learning_rate": 8.723765146110207e-06, "loss": 0.5912, "step": 10499 }, { "epoch": 0.3093558033675589, "grad_norm": 1.5879173112962734, "learning_rate": 8.723421953655699e-06, "loss": 0.5237, "step": 10500 }, { "epoch": 0.30938526582502246, "grad_norm": 1.6458347882887712, "learning_rate": 8.72307872181597e-06, "loss": 0.4857, "step": 10501 }, { "epoch": 0.30941472828248606, "grad_norm": 1.511261439623507, "learning_rate": 8.722735450594648e-06, "loss": 0.5748, "step": 10502 }, { "epoch": 0.3094441907399496, "grad_norm": 1.5266559193494231, "learning_rate": 8.722392139995365e-06, "loss": 0.5573, "step": 10503 }, { "epoch": 0.3094736531974132, "grad_norm": 1.4841332225665689, "learning_rate": 8.722048790021752e-06, "loss": 0.515, "step": 10504 }, { "epoch": 0.30950311565487676, "grad_norm": 1.605280738192344, "learning_rate": 8.721705400677442e-06, "loss": 0.5274, "step": 10505 }, { "epoch": 0.30953257811234036, "grad_norm": 1.5110763352377732, "learning_rate": 8.721361971966064e-06, "loss": 0.4818, "step": 10506 }, { "epoch": 0.3095620405698039, "grad_norm": 1.7452968958996387, "learning_rate": 8.721018503891256e-06, "loss": 0.5322, "step": 10507 }, { "epoch": 0.3095915030272675, "grad_norm": 1.5344148097091233, "learning_rate": 8.720674996456649e-06, "loss": 0.4854, "step": 10508 }, { "epoch": 0.30962096548473106, "grad_norm": 1.6610349183807556, "learning_rate": 8.720331449665873e-06, "loss": 0.5671, "step": 10509 }, { "epoch": 0.30965042794219466, "grad_norm": 1.8442999311993877, "learning_rate": 8.719987863522565e-06, "loss": 0.6249, "step": 10510 }, { "epoch": 0.3096798903996582, "grad_norm": 1.5269439608115352, "learning_rate": 8.719644238030362e-06, "loss": 0.5257, "step": 10511 }, { "epoch": 0.3097093528571218, "grad_norm": 1.6522262531487868, "learning_rate": 8.719300573192894e-06, "loss": 0.5672, "step": 10512 }, { "epoch": 0.3097388153145854, "grad_norm": 1.599732772505008, "learning_rate": 8.718956869013796e-06, "loss": 0.64, "step": 10513 }, { "epoch": 0.30976827777204896, "grad_norm": 1.7946127409643347, "learning_rate": 8.718613125496709e-06, "loss": 0.5445, "step": 10514 }, { "epoch": 0.30979774022951256, "grad_norm": 1.537899057614291, "learning_rate": 8.718269342645265e-06, "loss": 0.4107, "step": 10515 }, { "epoch": 0.3098272026869761, "grad_norm": 1.5046651409766332, "learning_rate": 8.7179255204631e-06, "loss": 0.4862, "step": 10516 }, { "epoch": 0.3098566651444397, "grad_norm": 1.5099149565536174, "learning_rate": 8.717581658953851e-06, "loss": 0.5225, "step": 10517 }, { "epoch": 0.30988612760190326, "grad_norm": 1.522887205739008, "learning_rate": 8.717237758121157e-06, "loss": 0.5656, "step": 10518 }, { "epoch": 0.30991559005936686, "grad_norm": 1.4227523883451547, "learning_rate": 8.716893817968655e-06, "loss": 0.4472, "step": 10519 }, { "epoch": 0.3099450525168304, "grad_norm": 1.4461476028342304, "learning_rate": 8.716549838499984e-06, "loss": 0.4012, "step": 10520 }, { "epoch": 0.309974514974294, "grad_norm": 1.4538559282759702, "learning_rate": 8.71620581971878e-06, "loss": 0.4638, "step": 10521 }, { "epoch": 0.31000397743175756, "grad_norm": 1.6787422457371468, "learning_rate": 8.715861761628686e-06, "loss": 0.4918, "step": 10522 }, { "epoch": 0.31003343988922116, "grad_norm": 1.6470916952482186, "learning_rate": 8.715517664233337e-06, "loss": 0.4833, "step": 10523 }, { "epoch": 0.3100629023466847, "grad_norm": 1.583629042316288, "learning_rate": 8.715173527536374e-06, "loss": 0.4532, "step": 10524 }, { "epoch": 0.3100923648041483, "grad_norm": 1.5448406249721638, "learning_rate": 8.714829351541437e-06, "loss": 0.5775, "step": 10525 }, { "epoch": 0.3101218272616119, "grad_norm": 1.42489025421126, "learning_rate": 8.714485136252168e-06, "loss": 0.4358, "step": 10526 }, { "epoch": 0.31015128971907546, "grad_norm": 1.7487658978167377, "learning_rate": 8.714140881672208e-06, "loss": 0.684, "step": 10527 }, { "epoch": 0.31018075217653907, "grad_norm": 1.4601184467760566, "learning_rate": 8.713796587805197e-06, "loss": 0.4296, "step": 10528 }, { "epoch": 0.3102102146340026, "grad_norm": 1.4992060049312916, "learning_rate": 8.713452254654778e-06, "loss": 0.4309, "step": 10529 }, { "epoch": 0.3102396770914662, "grad_norm": 1.5488557163815135, "learning_rate": 8.713107882224592e-06, "loss": 0.5466, "step": 10530 }, { "epoch": 0.31026913954892976, "grad_norm": 1.5037598157498004, "learning_rate": 8.712763470518282e-06, "loss": 0.5259, "step": 10531 }, { "epoch": 0.31029860200639336, "grad_norm": 1.549586158218628, "learning_rate": 8.712419019539493e-06, "loss": 0.6131, "step": 10532 }, { "epoch": 0.3103280644638569, "grad_norm": 1.3947716663973801, "learning_rate": 8.712074529291867e-06, "loss": 0.4337, "step": 10533 }, { "epoch": 0.3103575269213205, "grad_norm": 1.5044698772204286, "learning_rate": 8.711729999779047e-06, "loss": 0.4577, "step": 10534 }, { "epoch": 0.31038698937878406, "grad_norm": 1.9748595231440604, "learning_rate": 8.711385431004679e-06, "loss": 0.6205, "step": 10535 }, { "epoch": 0.31041645183624766, "grad_norm": 1.4980952234951057, "learning_rate": 8.711040822972405e-06, "loss": 0.5103, "step": 10536 }, { "epoch": 0.3104459142937112, "grad_norm": 1.6546397251508593, "learning_rate": 8.710696175685874e-06, "loss": 0.3853, "step": 10537 }, { "epoch": 0.3104753767511748, "grad_norm": 1.5650934669489993, "learning_rate": 8.71035148914873e-06, "loss": 0.5175, "step": 10538 }, { "epoch": 0.3105048392086384, "grad_norm": 1.6038035672014515, "learning_rate": 8.710006763364618e-06, "loss": 0.6312, "step": 10539 }, { "epoch": 0.31053430166610196, "grad_norm": 1.6745676076382359, "learning_rate": 8.709661998337185e-06, "loss": 0.5988, "step": 10540 }, { "epoch": 0.31056376412356557, "grad_norm": 1.6587552967733088, "learning_rate": 8.709317194070078e-06, "loss": 0.4819, "step": 10541 }, { "epoch": 0.3105932265810291, "grad_norm": 1.4335368579883419, "learning_rate": 8.708972350566944e-06, "loss": 0.4413, "step": 10542 }, { "epoch": 0.3106226890384927, "grad_norm": 1.5904391170814738, "learning_rate": 8.708627467831432e-06, "loss": 0.5795, "step": 10543 }, { "epoch": 0.31065215149595626, "grad_norm": 1.466435135363883, "learning_rate": 8.708282545867188e-06, "loss": 0.5485, "step": 10544 }, { "epoch": 0.31068161395341987, "grad_norm": 1.6449964434542719, "learning_rate": 8.707937584677861e-06, "loss": 0.4151, "step": 10545 }, { "epoch": 0.3107110764108834, "grad_norm": 1.5530224309693266, "learning_rate": 8.7075925842671e-06, "loss": 0.4932, "step": 10546 }, { "epoch": 0.310740538868347, "grad_norm": 1.6729706918260123, "learning_rate": 8.707247544638557e-06, "loss": 0.5421, "step": 10547 }, { "epoch": 0.31077000132581056, "grad_norm": 1.5339166999474618, "learning_rate": 8.706902465795876e-06, "loss": 0.5945, "step": 10548 }, { "epoch": 0.31079946378327417, "grad_norm": 1.349353059097779, "learning_rate": 8.706557347742712e-06, "loss": 0.4278, "step": 10549 }, { "epoch": 0.3108289262407377, "grad_norm": 1.6335479771168095, "learning_rate": 8.706212190482715e-06, "loss": 0.4823, "step": 10550 }, { "epoch": 0.3108583886982013, "grad_norm": 1.467158333044303, "learning_rate": 8.705866994019535e-06, "loss": 0.45, "step": 10551 }, { "epoch": 0.3108878511556649, "grad_norm": 2.019331668267201, "learning_rate": 8.705521758356821e-06, "loss": 0.647, "step": 10552 }, { "epoch": 0.31091731361312847, "grad_norm": 1.5550210743623434, "learning_rate": 8.705176483498228e-06, "loss": 0.429, "step": 10553 }, { "epoch": 0.31094677607059207, "grad_norm": 1.5779041661172832, "learning_rate": 8.704831169447408e-06, "loss": 0.5355, "step": 10554 }, { "epoch": 0.3109762385280556, "grad_norm": 1.355736839025093, "learning_rate": 8.704485816208012e-06, "loss": 0.3621, "step": 10555 }, { "epoch": 0.3110057009855192, "grad_norm": 1.7605802761946707, "learning_rate": 8.704140423783695e-06, "loss": 0.417, "step": 10556 }, { "epoch": 0.31103516344298276, "grad_norm": 1.5596514944469833, "learning_rate": 8.703794992178109e-06, "loss": 0.5132, "step": 10557 }, { "epoch": 0.31106462590044637, "grad_norm": 1.391428219410664, "learning_rate": 8.70344952139491e-06, "loss": 0.4231, "step": 10558 }, { "epoch": 0.3110940883579099, "grad_norm": 1.6396922506629594, "learning_rate": 8.70310401143775e-06, "loss": 0.5415, "step": 10559 }, { "epoch": 0.3111235508153735, "grad_norm": 1.4264648222236402, "learning_rate": 8.702758462310282e-06, "loss": 0.4151, "step": 10560 }, { "epoch": 0.31115301327283706, "grad_norm": 1.5126027534285853, "learning_rate": 8.702412874016164e-06, "loss": 0.4649, "step": 10561 }, { "epoch": 0.31118247573030067, "grad_norm": 1.509594974776786, "learning_rate": 8.702067246559052e-06, "loss": 0.4376, "step": 10562 }, { "epoch": 0.3112119381877642, "grad_norm": 1.605185343692734, "learning_rate": 8.7017215799426e-06, "loss": 0.4999, "step": 10563 }, { "epoch": 0.3112414006452278, "grad_norm": 1.5936869627797356, "learning_rate": 8.701375874170466e-06, "loss": 0.5546, "step": 10564 }, { "epoch": 0.3112708631026914, "grad_norm": 1.5728946522613263, "learning_rate": 8.701030129246307e-06, "loss": 0.607, "step": 10565 }, { "epoch": 0.31130032556015497, "grad_norm": 1.4907698966336473, "learning_rate": 8.700684345173777e-06, "loss": 0.4824, "step": 10566 }, { "epoch": 0.31132978801761857, "grad_norm": 1.30077229516373, "learning_rate": 8.700338521956538e-06, "loss": 0.3926, "step": 10567 }, { "epoch": 0.3113592504750821, "grad_norm": 1.2463025044553477, "learning_rate": 8.699992659598245e-06, "loss": 0.3276, "step": 10568 }, { "epoch": 0.3113887129325457, "grad_norm": 1.6013348395843767, "learning_rate": 8.699646758102558e-06, "loss": 0.5861, "step": 10569 }, { "epoch": 0.31141817539000927, "grad_norm": 1.4183738480206725, "learning_rate": 8.699300817473134e-06, "loss": 0.3362, "step": 10570 }, { "epoch": 0.31144763784747287, "grad_norm": 1.6109586888639222, "learning_rate": 8.698954837713633e-06, "loss": 0.7051, "step": 10571 }, { "epoch": 0.3114771003049364, "grad_norm": 1.6082056106227043, "learning_rate": 8.698608818827716e-06, "loss": 0.4685, "step": 10572 }, { "epoch": 0.3115065627624, "grad_norm": 1.503767797752225, "learning_rate": 8.698262760819043e-06, "loss": 0.4863, "step": 10573 }, { "epoch": 0.31153602521986357, "grad_norm": 1.5633077592274227, "learning_rate": 8.69791666369127e-06, "loss": 0.411, "step": 10574 }, { "epoch": 0.31156548767732717, "grad_norm": 1.6461329847025605, "learning_rate": 8.697570527448064e-06, "loss": 0.4241, "step": 10575 }, { "epoch": 0.3115949501347907, "grad_norm": 1.5534676843371769, "learning_rate": 8.697224352093084e-06, "loss": 0.5082, "step": 10576 }, { "epoch": 0.3116244125922543, "grad_norm": 1.7257721658245955, "learning_rate": 8.696878137629992e-06, "loss": 0.7536, "step": 10577 }, { "epoch": 0.3116538750497179, "grad_norm": 1.5461879986227525, "learning_rate": 8.69653188406245e-06, "loss": 0.4799, "step": 10578 }, { "epoch": 0.31168333750718147, "grad_norm": 1.5850544535083138, "learning_rate": 8.696185591394117e-06, "loss": 0.449, "step": 10579 }, { "epoch": 0.31171279996464507, "grad_norm": 1.61388811292718, "learning_rate": 8.695839259628662e-06, "loss": 0.5313, "step": 10580 }, { "epoch": 0.3117422624221086, "grad_norm": 1.636511624758883, "learning_rate": 8.695492888769745e-06, "loss": 0.417, "step": 10581 }, { "epoch": 0.3117717248795722, "grad_norm": 1.5993627027940536, "learning_rate": 8.695146478821032e-06, "loss": 0.4547, "step": 10582 }, { "epoch": 0.31180118733703577, "grad_norm": 1.4038606509979492, "learning_rate": 8.694800029786183e-06, "loss": 0.4626, "step": 10583 }, { "epoch": 0.31183064979449937, "grad_norm": 1.6021686812008327, "learning_rate": 8.694453541668868e-06, "loss": 0.5546, "step": 10584 }, { "epoch": 0.3118601122519629, "grad_norm": 1.3383069306888835, "learning_rate": 8.694107014472748e-06, "loss": 0.4501, "step": 10585 }, { "epoch": 0.3118895747094265, "grad_norm": 1.5356473040875145, "learning_rate": 8.693760448201489e-06, "loss": 0.3973, "step": 10586 }, { "epoch": 0.31191903716689007, "grad_norm": 1.543012336318253, "learning_rate": 8.693413842858758e-06, "loss": 0.5494, "step": 10587 }, { "epoch": 0.31194849962435367, "grad_norm": 1.5386423343328193, "learning_rate": 8.693067198448222e-06, "loss": 0.5512, "step": 10588 }, { "epoch": 0.3119779620818172, "grad_norm": 1.4669956520913818, "learning_rate": 8.692720514973547e-06, "loss": 0.5, "step": 10589 }, { "epoch": 0.3120074245392808, "grad_norm": 1.4575878937272513, "learning_rate": 8.692373792438397e-06, "loss": 0.5318, "step": 10590 }, { "epoch": 0.3120368869967444, "grad_norm": 1.4811962971315171, "learning_rate": 8.692027030846446e-06, "loss": 0.428, "step": 10591 }, { "epoch": 0.31206634945420797, "grad_norm": 1.4882979105010032, "learning_rate": 8.691680230201356e-06, "loss": 0.4436, "step": 10592 }, { "epoch": 0.3120958119116716, "grad_norm": 1.4909750425911805, "learning_rate": 8.691333390506798e-06, "loss": 0.4132, "step": 10593 }, { "epoch": 0.3121252743691351, "grad_norm": 1.495249023778158, "learning_rate": 8.69098651176644e-06, "loss": 0.4727, "step": 10594 }, { "epoch": 0.3121547368265987, "grad_norm": 1.7002431245421255, "learning_rate": 8.690639593983953e-06, "loss": 0.4202, "step": 10595 }, { "epoch": 0.31218419928406227, "grad_norm": 1.6559890982703638, "learning_rate": 8.690292637163003e-06, "loss": 0.436, "step": 10596 }, { "epoch": 0.31221366174152587, "grad_norm": 1.4131722395035031, "learning_rate": 8.689945641307265e-06, "loss": 0.4648, "step": 10597 }, { "epoch": 0.3122431241989894, "grad_norm": 1.352307459768066, "learning_rate": 8.689598606420404e-06, "loss": 0.4525, "step": 10598 }, { "epoch": 0.312272586656453, "grad_norm": 1.4255175021724107, "learning_rate": 8.689251532506093e-06, "loss": 0.2979, "step": 10599 }, { "epoch": 0.31230204911391657, "grad_norm": 1.608314322390621, "learning_rate": 8.688904419568006e-06, "loss": 0.4908, "step": 10600 }, { "epoch": 0.31233151157138017, "grad_norm": 1.719073631618814, "learning_rate": 8.688557267609811e-06, "loss": 0.5869, "step": 10601 }, { "epoch": 0.3123609740288437, "grad_norm": 1.647486261215697, "learning_rate": 8.688210076635182e-06, "loss": 0.4969, "step": 10602 }, { "epoch": 0.3123904364863073, "grad_norm": 1.531692786180633, "learning_rate": 8.68786284664779e-06, "loss": 0.4092, "step": 10603 }, { "epoch": 0.3124198989437709, "grad_norm": 1.347405066880828, "learning_rate": 8.687515577651309e-06, "loss": 0.3478, "step": 10604 }, { "epoch": 0.31244936140123447, "grad_norm": 1.4109203731598083, "learning_rate": 8.687168269649411e-06, "loss": 0.3893, "step": 10605 }, { "epoch": 0.3124788238586981, "grad_norm": 1.7531943609686944, "learning_rate": 8.686820922645774e-06, "loss": 0.4962, "step": 10606 }, { "epoch": 0.3125082863161616, "grad_norm": 1.417684996095123, "learning_rate": 8.686473536644066e-06, "loss": 0.5053, "step": 10607 }, { "epoch": 0.3125377487736252, "grad_norm": 1.441681883180109, "learning_rate": 8.686126111647964e-06, "loss": 0.5185, "step": 10608 }, { "epoch": 0.31256721123108877, "grad_norm": 1.4912469412867748, "learning_rate": 8.685778647661145e-06, "loss": 0.4908, "step": 10609 }, { "epoch": 0.3125966736885524, "grad_norm": 1.5096682137677577, "learning_rate": 8.685431144687281e-06, "loss": 0.4394, "step": 10610 }, { "epoch": 0.3126261361460159, "grad_norm": 1.4801661392725711, "learning_rate": 8.68508360273005e-06, "loss": 0.4686, "step": 10611 }, { "epoch": 0.3126555986034795, "grad_norm": 1.5607174943500028, "learning_rate": 8.68473602179313e-06, "loss": 0.4989, "step": 10612 }, { "epoch": 0.31268506106094307, "grad_norm": 1.533901118447367, "learning_rate": 8.684388401880194e-06, "loss": 0.5361, "step": 10613 }, { "epoch": 0.3127145235184067, "grad_norm": 1.7332671723742197, "learning_rate": 8.684040742994919e-06, "loss": 0.5646, "step": 10614 }, { "epoch": 0.3127439859758702, "grad_norm": 1.4633045430914087, "learning_rate": 8.683693045140987e-06, "loss": 0.4254, "step": 10615 }, { "epoch": 0.3127734484333338, "grad_norm": 1.485220184934767, "learning_rate": 8.68334530832207e-06, "loss": 0.4083, "step": 10616 }, { "epoch": 0.3128029108907974, "grad_norm": 1.7498552266989669, "learning_rate": 8.682997532541848e-06, "loss": 0.5336, "step": 10617 }, { "epoch": 0.312832373348261, "grad_norm": 1.2993641626670724, "learning_rate": 8.682649717804003e-06, "loss": 0.3363, "step": 10618 }, { "epoch": 0.3128618358057246, "grad_norm": 1.4327454205179604, "learning_rate": 8.68230186411221e-06, "loss": 0.3831, "step": 10619 }, { "epoch": 0.3128912982631881, "grad_norm": 1.50469683635183, "learning_rate": 8.68195397147015e-06, "loss": 0.5135, "step": 10620 }, { "epoch": 0.3129207607206517, "grad_norm": 1.4729941415566654, "learning_rate": 8.681606039881504e-06, "loss": 0.4593, "step": 10621 }, { "epoch": 0.31295022317811527, "grad_norm": 1.491384151034526, "learning_rate": 8.681258069349951e-06, "loss": 0.482, "step": 10622 }, { "epoch": 0.3129796856355789, "grad_norm": 1.7092562427269302, "learning_rate": 8.680910059879172e-06, "loss": 0.5998, "step": 10623 }, { "epoch": 0.3130091480930424, "grad_norm": 1.6270617701871373, "learning_rate": 8.680562011472848e-06, "loss": 0.5435, "step": 10624 }, { "epoch": 0.313038610550506, "grad_norm": 1.5616567271600823, "learning_rate": 8.680213924134661e-06, "loss": 0.4158, "step": 10625 }, { "epoch": 0.31306807300796957, "grad_norm": 1.7241785087356736, "learning_rate": 8.679865797868293e-06, "loss": 0.6046, "step": 10626 }, { "epoch": 0.3130975354654332, "grad_norm": 1.6938784326104352, "learning_rate": 8.679517632677425e-06, "loss": 0.6084, "step": 10627 }, { "epoch": 0.3131269979228967, "grad_norm": 1.6353351447163325, "learning_rate": 8.679169428565742e-06, "loss": 0.6535, "step": 10628 }, { "epoch": 0.3131564603803603, "grad_norm": 1.5850093876272526, "learning_rate": 8.678821185536926e-06, "loss": 0.4015, "step": 10629 }, { "epoch": 0.3131859228378239, "grad_norm": 1.8222772715043367, "learning_rate": 8.678472903594658e-06, "loss": 0.4594, "step": 10630 }, { "epoch": 0.3132153852952875, "grad_norm": 1.401337156380577, "learning_rate": 8.678124582742628e-06, "loss": 0.5092, "step": 10631 }, { "epoch": 0.3132448477527511, "grad_norm": 1.3668978344421199, "learning_rate": 8.677776222984516e-06, "loss": 0.4099, "step": 10632 }, { "epoch": 0.3132743102102146, "grad_norm": 1.5814315737864655, "learning_rate": 8.677427824324007e-06, "loss": 0.5008, "step": 10633 }, { "epoch": 0.3133037726676782, "grad_norm": 1.566770407879723, "learning_rate": 8.677079386764788e-06, "loss": 0.4584, "step": 10634 }, { "epoch": 0.3133332351251418, "grad_norm": 1.5348622850361882, "learning_rate": 8.676730910310545e-06, "loss": 0.6152, "step": 10635 }, { "epoch": 0.3133626975826054, "grad_norm": 1.564880002123008, "learning_rate": 8.676382394964961e-06, "loss": 0.5767, "step": 10636 }, { "epoch": 0.3133921600400689, "grad_norm": 1.6497229094387684, "learning_rate": 8.676033840731725e-06, "loss": 0.5099, "step": 10637 }, { "epoch": 0.3134216224975325, "grad_norm": 1.5134772697619496, "learning_rate": 8.675685247614525e-06, "loss": 0.3735, "step": 10638 }, { "epoch": 0.3134510849549961, "grad_norm": 1.4935390815061418, "learning_rate": 8.675336615617044e-06, "loss": 0.4498, "step": 10639 }, { "epoch": 0.3134805474124597, "grad_norm": 1.4646816514740222, "learning_rate": 8.674987944742975e-06, "loss": 0.4108, "step": 10640 }, { "epoch": 0.3135100098699232, "grad_norm": 1.4203511096394958, "learning_rate": 8.674639234996003e-06, "loss": 0.3874, "step": 10641 }, { "epoch": 0.3135394723273868, "grad_norm": 1.4819406617390645, "learning_rate": 8.674290486379817e-06, "loss": 0.4759, "step": 10642 }, { "epoch": 0.31356893478485043, "grad_norm": 1.5408970625150722, "learning_rate": 8.673941698898103e-06, "loss": 0.5386, "step": 10643 }, { "epoch": 0.313598397242314, "grad_norm": 1.5768189172843867, "learning_rate": 8.673592872554557e-06, "loss": 0.5309, "step": 10644 }, { "epoch": 0.3136278596997776, "grad_norm": 1.4270238284364958, "learning_rate": 8.673244007352863e-06, "loss": 0.5193, "step": 10645 }, { "epoch": 0.3136573221572411, "grad_norm": 1.511424637489676, "learning_rate": 8.672895103296714e-06, "loss": 0.5017, "step": 10646 }, { "epoch": 0.31368678461470473, "grad_norm": 1.6755500882486485, "learning_rate": 8.672546160389801e-06, "loss": 0.4885, "step": 10647 }, { "epoch": 0.3137162470721683, "grad_norm": 1.348904250979877, "learning_rate": 8.672197178635813e-06, "loss": 0.4726, "step": 10648 }, { "epoch": 0.3137457095296319, "grad_norm": 1.3801374483070723, "learning_rate": 8.671848158038444e-06, "loss": 0.3872, "step": 10649 }, { "epoch": 0.3137751719870954, "grad_norm": 1.7684536859732098, "learning_rate": 8.671499098601381e-06, "loss": 0.7269, "step": 10650 }, { "epoch": 0.313804634444559, "grad_norm": 1.7019019194667138, "learning_rate": 8.671150000328321e-06, "loss": 0.5571, "step": 10651 }, { "epoch": 0.3138340969020226, "grad_norm": 1.5929284644877997, "learning_rate": 8.670800863222955e-06, "loss": 0.4108, "step": 10652 }, { "epoch": 0.3138635593594862, "grad_norm": 1.5710999942631132, "learning_rate": 8.670451687288976e-06, "loss": 0.4939, "step": 10653 }, { "epoch": 0.3138930218169497, "grad_norm": 1.5983347345379781, "learning_rate": 8.67010247253008e-06, "loss": 0.4329, "step": 10654 }, { "epoch": 0.3139224842744133, "grad_norm": 1.5006587853097444, "learning_rate": 8.669753218949956e-06, "loss": 0.4077, "step": 10655 }, { "epoch": 0.31395194673187693, "grad_norm": 1.5022716272317191, "learning_rate": 8.6694039265523e-06, "loss": 0.3169, "step": 10656 }, { "epoch": 0.3139814091893405, "grad_norm": 1.6563072062289546, "learning_rate": 8.66905459534081e-06, "loss": 0.488, "step": 10657 }, { "epoch": 0.3140108716468041, "grad_norm": 1.7135530764391984, "learning_rate": 8.668705225319178e-06, "loss": 0.639, "step": 10658 }, { "epoch": 0.3140403341042676, "grad_norm": 1.6303857600591418, "learning_rate": 8.668355816491101e-06, "loss": 0.5744, "step": 10659 }, { "epoch": 0.31406979656173123, "grad_norm": 1.354600782273141, "learning_rate": 8.668006368860272e-06, "loss": 0.4237, "step": 10660 }, { "epoch": 0.3140992590191948, "grad_norm": 1.4936258772607827, "learning_rate": 8.667656882430391e-06, "loss": 0.4988, "step": 10661 }, { "epoch": 0.3141287214766584, "grad_norm": 1.5946734183717226, "learning_rate": 8.667307357205153e-06, "loss": 0.5082, "step": 10662 }, { "epoch": 0.3141581839341219, "grad_norm": 1.6218681826503325, "learning_rate": 8.666957793188255e-06, "loss": 0.504, "step": 10663 }, { "epoch": 0.31418764639158553, "grad_norm": 1.6630249416357685, "learning_rate": 8.666608190383394e-06, "loss": 0.4507, "step": 10664 }, { "epoch": 0.3142171088490491, "grad_norm": 1.7408217520040774, "learning_rate": 8.66625854879427e-06, "loss": 0.4408, "step": 10665 }, { "epoch": 0.3142465713065127, "grad_norm": 1.5737134899705412, "learning_rate": 8.665908868424581e-06, "loss": 0.4615, "step": 10666 }, { "epoch": 0.3142760337639762, "grad_norm": 1.7544944799956694, "learning_rate": 8.665559149278025e-06, "loss": 0.3786, "step": 10667 }, { "epoch": 0.31430549622143983, "grad_norm": 1.435106880296516, "learning_rate": 8.665209391358303e-06, "loss": 0.3627, "step": 10668 }, { "epoch": 0.31433495867890343, "grad_norm": 1.4070590488430375, "learning_rate": 8.66485959466911e-06, "loss": 0.3851, "step": 10669 }, { "epoch": 0.314364421136367, "grad_norm": 1.502921624391422, "learning_rate": 8.66450975921415e-06, "loss": 0.4766, "step": 10670 }, { "epoch": 0.3143938835938306, "grad_norm": 1.6713196058535889, "learning_rate": 8.664159884997124e-06, "loss": 0.5563, "step": 10671 }, { "epoch": 0.31442334605129413, "grad_norm": 1.4655271813111022, "learning_rate": 8.66380997202173e-06, "loss": 0.565, "step": 10672 }, { "epoch": 0.31445280850875773, "grad_norm": 1.5186064137034965, "learning_rate": 8.663460020291672e-06, "loss": 0.5794, "step": 10673 }, { "epoch": 0.3144822709662213, "grad_norm": 1.4142611058373544, "learning_rate": 8.663110029810648e-06, "loss": 0.5156, "step": 10674 }, { "epoch": 0.3145117334236849, "grad_norm": 1.7481200259031724, "learning_rate": 8.662760000582364e-06, "loss": 0.4477, "step": 10675 }, { "epoch": 0.3145411958811484, "grad_norm": 1.609412640024462, "learning_rate": 8.662409932610522e-06, "loss": 0.6486, "step": 10676 }, { "epoch": 0.31457065833861203, "grad_norm": 1.5456253289447317, "learning_rate": 8.662059825898824e-06, "loss": 0.402, "step": 10677 }, { "epoch": 0.3146001207960756, "grad_norm": 1.5617705150352563, "learning_rate": 8.66170968045097e-06, "loss": 0.6241, "step": 10678 }, { "epoch": 0.3146295832535392, "grad_norm": 1.7777532345056497, "learning_rate": 8.661359496270669e-06, "loss": 0.6114, "step": 10679 }, { "epoch": 0.3146590457110027, "grad_norm": 1.576321609170625, "learning_rate": 8.661009273361625e-06, "loss": 0.5961, "step": 10680 }, { "epoch": 0.31468850816846633, "grad_norm": 1.5293540741922678, "learning_rate": 8.660659011727537e-06, "loss": 0.338, "step": 10681 }, { "epoch": 0.31471797062592993, "grad_norm": 1.7366037772106773, "learning_rate": 8.660308711372116e-06, "loss": 0.3277, "step": 10682 }, { "epoch": 0.3147474330833935, "grad_norm": 1.431327464822963, "learning_rate": 8.659958372299065e-06, "loss": 0.4738, "step": 10683 }, { "epoch": 0.3147768955408571, "grad_norm": 1.332228920329857, "learning_rate": 8.659607994512088e-06, "loss": 0.37, "step": 10684 }, { "epoch": 0.31480635799832063, "grad_norm": 1.6679175355605578, "learning_rate": 8.659257578014895e-06, "loss": 0.5466, "step": 10685 }, { "epoch": 0.31483582045578423, "grad_norm": 1.4835577819700876, "learning_rate": 8.658907122811189e-06, "loss": 0.5123, "step": 10686 }, { "epoch": 0.3148652829132478, "grad_norm": 1.8861668407449417, "learning_rate": 8.658556628904679e-06, "loss": 0.6629, "step": 10687 }, { "epoch": 0.3148947453707114, "grad_norm": 1.6641244882883763, "learning_rate": 8.658206096299072e-06, "loss": 0.4986, "step": 10688 }, { "epoch": 0.31492420782817493, "grad_norm": 1.700102034808519, "learning_rate": 8.657855524998076e-06, "loss": 0.5824, "step": 10689 }, { "epoch": 0.31495367028563853, "grad_norm": 1.621827172848083, "learning_rate": 8.657504915005399e-06, "loss": 0.5002, "step": 10690 }, { "epoch": 0.3149831327431021, "grad_norm": 1.792830966338872, "learning_rate": 8.657154266324748e-06, "loss": 0.54, "step": 10691 }, { "epoch": 0.3150125952005657, "grad_norm": 1.5252526263931896, "learning_rate": 8.656803578959836e-06, "loss": 0.3787, "step": 10692 }, { "epoch": 0.31504205765802923, "grad_norm": 1.7859221746678198, "learning_rate": 8.65645285291437e-06, "loss": 0.6639, "step": 10693 }, { "epoch": 0.31507152011549283, "grad_norm": 1.432570682513979, "learning_rate": 8.65610208819206e-06, "loss": 0.4762, "step": 10694 }, { "epoch": 0.31510098257295643, "grad_norm": 1.573579731718878, "learning_rate": 8.655751284796616e-06, "loss": 0.4801, "step": 10695 }, { "epoch": 0.31513044503042, "grad_norm": 1.5445806059595935, "learning_rate": 8.655400442731747e-06, "loss": 0.5351, "step": 10696 }, { "epoch": 0.3151599074878836, "grad_norm": 1.617511725737125, "learning_rate": 8.655049562001168e-06, "loss": 0.5202, "step": 10697 }, { "epoch": 0.31518936994534713, "grad_norm": 1.75328197963062, "learning_rate": 8.65469864260859e-06, "loss": 0.5112, "step": 10698 }, { "epoch": 0.31521883240281073, "grad_norm": 1.4847563996865119, "learning_rate": 8.654347684557723e-06, "loss": 0.4016, "step": 10699 }, { "epoch": 0.3152482948602743, "grad_norm": 1.763516766003213, "learning_rate": 8.653996687852278e-06, "loss": 0.5811, "step": 10700 }, { "epoch": 0.3152777573177379, "grad_norm": 1.5310443135714749, "learning_rate": 8.65364565249597e-06, "loss": 0.4367, "step": 10701 }, { "epoch": 0.31530721977520143, "grad_norm": 1.4900444361668201, "learning_rate": 8.653294578492513e-06, "loss": 0.4499, "step": 10702 }, { "epoch": 0.31533668223266503, "grad_norm": 1.3847810756039247, "learning_rate": 8.652943465845618e-06, "loss": 0.4493, "step": 10703 }, { "epoch": 0.3153661446901286, "grad_norm": 1.4899884020514063, "learning_rate": 8.652592314559003e-06, "loss": 0.4444, "step": 10704 }, { "epoch": 0.3153956071475922, "grad_norm": 1.5683311483808335, "learning_rate": 8.652241124636375e-06, "loss": 0.4852, "step": 10705 }, { "epoch": 0.31542506960505573, "grad_norm": 1.4942579531498135, "learning_rate": 8.651889896081458e-06, "loss": 0.5407, "step": 10706 }, { "epoch": 0.31545453206251933, "grad_norm": 1.5240598128184757, "learning_rate": 8.65153862889796e-06, "loss": 0.4576, "step": 10707 }, { "epoch": 0.31548399451998294, "grad_norm": 1.4668481010546806, "learning_rate": 8.6511873230896e-06, "loss": 0.5378, "step": 10708 }, { "epoch": 0.3155134569774465, "grad_norm": 1.7184547704181372, "learning_rate": 8.650835978660095e-06, "loss": 0.5602, "step": 10709 }, { "epoch": 0.3155429194349101, "grad_norm": 1.7295250426922446, "learning_rate": 8.650484595613158e-06, "loss": 0.6104, "step": 10710 }, { "epoch": 0.31557238189237363, "grad_norm": 1.4201454077564648, "learning_rate": 8.650133173952506e-06, "loss": 0.3981, "step": 10711 }, { "epoch": 0.31560184434983724, "grad_norm": 1.6686805193288456, "learning_rate": 8.64978171368186e-06, "loss": 0.5951, "step": 10712 }, { "epoch": 0.3156313068073008, "grad_norm": 1.4771561908757316, "learning_rate": 8.649430214804934e-06, "loss": 0.5274, "step": 10713 }, { "epoch": 0.3156607692647644, "grad_norm": 1.5287430673232805, "learning_rate": 8.649078677325447e-06, "loss": 0.4049, "step": 10714 }, { "epoch": 0.31569023172222793, "grad_norm": 1.6040849836735056, "learning_rate": 8.648727101247118e-06, "loss": 0.5667, "step": 10715 }, { "epoch": 0.31571969417969153, "grad_norm": 1.5441756920046594, "learning_rate": 8.648375486573669e-06, "loss": 0.5163, "step": 10716 }, { "epoch": 0.3157491566371551, "grad_norm": 1.428377418943941, "learning_rate": 8.648023833308812e-06, "loss": 0.3592, "step": 10717 }, { "epoch": 0.3157786190946187, "grad_norm": 1.4958506771913205, "learning_rate": 8.647672141456271e-06, "loss": 0.5413, "step": 10718 }, { "epoch": 0.31580808155208223, "grad_norm": 1.4761350947960792, "learning_rate": 8.647320411019769e-06, "loss": 0.322, "step": 10719 }, { "epoch": 0.31583754400954583, "grad_norm": 1.378600199760391, "learning_rate": 8.64696864200302e-06, "loss": 0.5444, "step": 10720 }, { "epoch": 0.31586700646700944, "grad_norm": 1.5461731000921946, "learning_rate": 8.646616834409749e-06, "loss": 0.5451, "step": 10721 }, { "epoch": 0.315896468924473, "grad_norm": 1.5589415955473112, "learning_rate": 8.646264988243677e-06, "loss": 0.5723, "step": 10722 }, { "epoch": 0.3159259313819366, "grad_norm": 1.4597267733940789, "learning_rate": 8.645913103508526e-06, "loss": 0.3664, "step": 10723 }, { "epoch": 0.31595539383940013, "grad_norm": 1.6818955810427496, "learning_rate": 8.645561180208015e-06, "loss": 0.5479, "step": 10724 }, { "epoch": 0.31598485629686374, "grad_norm": 1.2691410172040323, "learning_rate": 8.645209218345869e-06, "loss": 0.3419, "step": 10725 }, { "epoch": 0.3160143187543273, "grad_norm": 1.5967188006014907, "learning_rate": 8.644857217925812e-06, "loss": 0.6378, "step": 10726 }, { "epoch": 0.3160437812117909, "grad_norm": 1.497819410403705, "learning_rate": 8.644505178951567e-06, "loss": 0.5272, "step": 10727 }, { "epoch": 0.31607324366925443, "grad_norm": 1.9197960320905207, "learning_rate": 8.644153101426855e-06, "loss": 0.58, "step": 10728 }, { "epoch": 0.31610270612671804, "grad_norm": 1.4409413907126987, "learning_rate": 8.643800985355405e-06, "loss": 0.4903, "step": 10729 }, { "epoch": 0.3161321685841816, "grad_norm": 1.527582731410478, "learning_rate": 8.643448830740935e-06, "loss": 0.3952, "step": 10730 }, { "epoch": 0.3161616310416452, "grad_norm": 1.8288510905348314, "learning_rate": 8.643096637587176e-06, "loss": 0.6395, "step": 10731 }, { "epoch": 0.31619109349910873, "grad_norm": 1.4526192220622902, "learning_rate": 8.64274440589785e-06, "loss": 0.5354, "step": 10732 }, { "epoch": 0.31622055595657234, "grad_norm": 1.6333490900321292, "learning_rate": 8.642392135676684e-06, "loss": 0.4201, "step": 10733 }, { "epoch": 0.31625001841403594, "grad_norm": 1.52739641728701, "learning_rate": 8.642039826927404e-06, "loss": 0.4738, "step": 10734 }, { "epoch": 0.3162794808714995, "grad_norm": 1.512171764601614, "learning_rate": 8.641687479653738e-06, "loss": 0.5483, "step": 10735 }, { "epoch": 0.3163089433289631, "grad_norm": 1.7448129782780581, "learning_rate": 8.641335093859408e-06, "loss": 0.4146, "step": 10736 }, { "epoch": 0.31633840578642664, "grad_norm": 1.5481814164508816, "learning_rate": 8.640982669548149e-06, "loss": 0.5281, "step": 10737 }, { "epoch": 0.31636786824389024, "grad_norm": 1.5595767095338742, "learning_rate": 8.640630206723683e-06, "loss": 0.4387, "step": 10738 }, { "epoch": 0.3163973307013538, "grad_norm": 1.452162805501578, "learning_rate": 8.640277705389741e-06, "loss": 0.4437, "step": 10739 }, { "epoch": 0.3164267931588174, "grad_norm": 1.423977691722857, "learning_rate": 8.639925165550052e-06, "loss": 0.4574, "step": 10740 }, { "epoch": 0.31645625561628093, "grad_norm": 1.654385063017099, "learning_rate": 8.639572587208341e-06, "loss": 0.5189, "step": 10741 }, { "epoch": 0.31648571807374454, "grad_norm": 1.3644570229774404, "learning_rate": 8.639219970368343e-06, "loss": 0.2697, "step": 10742 }, { "epoch": 0.3165151805312081, "grad_norm": 1.6114103598479192, "learning_rate": 8.638867315033784e-06, "loss": 0.5297, "step": 10743 }, { "epoch": 0.3165446429886717, "grad_norm": 1.6186607979543284, "learning_rate": 8.638514621208395e-06, "loss": 0.4666, "step": 10744 }, { "epoch": 0.31657410544613523, "grad_norm": 1.5872461308082833, "learning_rate": 8.638161888895909e-06, "loss": 0.524, "step": 10745 }, { "epoch": 0.31660356790359884, "grad_norm": 1.5486495839078849, "learning_rate": 8.637809118100054e-06, "loss": 0.3635, "step": 10746 }, { "epoch": 0.31663303036106244, "grad_norm": 1.3962421887444398, "learning_rate": 8.637456308824561e-06, "loss": 0.4416, "step": 10747 }, { "epoch": 0.316662492818526, "grad_norm": 1.366442533367088, "learning_rate": 8.637103461073166e-06, "loss": 0.5038, "step": 10748 }, { "epoch": 0.3166919552759896, "grad_norm": 1.418536010772072, "learning_rate": 8.636750574849599e-06, "loss": 0.4182, "step": 10749 }, { "epoch": 0.31672141773345314, "grad_norm": 1.6634048654169247, "learning_rate": 8.636397650157594e-06, "loss": 0.4588, "step": 10750 }, { "epoch": 0.31675088019091674, "grad_norm": 1.517129147817112, "learning_rate": 8.63604468700088e-06, "loss": 0.5265, "step": 10751 }, { "epoch": 0.3167803426483803, "grad_norm": 1.5432952777195854, "learning_rate": 8.635691685383196e-06, "loss": 0.4172, "step": 10752 }, { "epoch": 0.3168098051058439, "grad_norm": 1.5626868155824445, "learning_rate": 8.63533864530827e-06, "loss": 0.6629, "step": 10753 }, { "epoch": 0.31683926756330744, "grad_norm": 1.4601371338230082, "learning_rate": 8.634985566779844e-06, "loss": 0.4175, "step": 10754 }, { "epoch": 0.31686873002077104, "grad_norm": 1.6317832542783826, "learning_rate": 8.634632449801645e-06, "loss": 0.4649, "step": 10755 }, { "epoch": 0.3168981924782346, "grad_norm": 1.4992452570659216, "learning_rate": 8.634279294377414e-06, "loss": 0.3715, "step": 10756 }, { "epoch": 0.3169276549356982, "grad_norm": 1.7987019074389747, "learning_rate": 8.633926100510883e-06, "loss": 0.3847, "step": 10757 }, { "epoch": 0.3169571173931618, "grad_norm": 1.4399483952797636, "learning_rate": 8.63357286820579e-06, "loss": 0.4892, "step": 10758 }, { "epoch": 0.31698657985062534, "grad_norm": 1.7689541547549503, "learning_rate": 8.63321959746587e-06, "loss": 0.4408, "step": 10759 }, { "epoch": 0.31701604230808894, "grad_norm": 1.33663718910932, "learning_rate": 8.63286628829486e-06, "loss": 0.4278, "step": 10760 }, { "epoch": 0.3170455047655525, "grad_norm": 1.6930061815013173, "learning_rate": 8.632512940696497e-06, "loss": 0.5444, "step": 10761 }, { "epoch": 0.3170749672230161, "grad_norm": 1.5343972931288499, "learning_rate": 8.632159554674521e-06, "loss": 0.5183, "step": 10762 }, { "epoch": 0.31710442968047964, "grad_norm": 1.476456059955013, "learning_rate": 8.631806130232666e-06, "loss": 0.4322, "step": 10763 }, { "epoch": 0.31713389213794324, "grad_norm": 1.717324592528502, "learning_rate": 8.631452667374675e-06, "loss": 0.5724, "step": 10764 }, { "epoch": 0.3171633545954068, "grad_norm": 1.369356947064326, "learning_rate": 8.631099166104282e-06, "loss": 0.4253, "step": 10765 }, { "epoch": 0.3171928170528704, "grad_norm": 1.6400823352299991, "learning_rate": 8.63074562642523e-06, "loss": 0.4799, "step": 10766 }, { "epoch": 0.31722227951033394, "grad_norm": 1.5925485205157837, "learning_rate": 8.630392048341257e-06, "loss": 0.5931, "step": 10767 }, { "epoch": 0.31725174196779754, "grad_norm": 1.5304543974034621, "learning_rate": 8.630038431856104e-06, "loss": 0.5824, "step": 10768 }, { "epoch": 0.3172812044252611, "grad_norm": 1.68491950438623, "learning_rate": 8.62968477697351e-06, "loss": 0.4759, "step": 10769 }, { "epoch": 0.3173106668827247, "grad_norm": 1.665322906422613, "learning_rate": 8.629331083697216e-06, "loss": 0.5921, "step": 10770 }, { "epoch": 0.3173401293401883, "grad_norm": 1.8575989623715052, "learning_rate": 8.628977352030966e-06, "loss": 0.5947, "step": 10771 }, { "epoch": 0.31736959179765184, "grad_norm": 1.7101958614104593, "learning_rate": 8.628623581978496e-06, "loss": 0.4489, "step": 10772 }, { "epoch": 0.31739905425511544, "grad_norm": 1.6873270948535475, "learning_rate": 8.628269773543554e-06, "loss": 0.4735, "step": 10773 }, { "epoch": 0.317428516712579, "grad_norm": 1.551019067071749, "learning_rate": 8.627915926729881e-06, "loss": 0.7042, "step": 10774 }, { "epoch": 0.3174579791700426, "grad_norm": 1.661759981380959, "learning_rate": 8.627562041541218e-06, "loss": 0.5797, "step": 10775 }, { "epoch": 0.31748744162750614, "grad_norm": 1.4745276746529683, "learning_rate": 8.62720811798131e-06, "loss": 0.5588, "step": 10776 }, { "epoch": 0.31751690408496974, "grad_norm": 1.6310221442855808, "learning_rate": 8.626854156053899e-06, "loss": 0.616, "step": 10777 }, { "epoch": 0.3175463665424333, "grad_norm": 1.557093648143432, "learning_rate": 8.62650015576273e-06, "loss": 0.5626, "step": 10778 }, { "epoch": 0.3175758289998969, "grad_norm": 1.473369875734234, "learning_rate": 8.626146117111547e-06, "loss": 0.4361, "step": 10779 }, { "epoch": 0.31760529145736044, "grad_norm": 1.4698252567129861, "learning_rate": 8.625792040104096e-06, "loss": 0.4788, "step": 10780 }, { "epoch": 0.31763475391482404, "grad_norm": 1.6732818721096965, "learning_rate": 8.625437924744122e-06, "loss": 0.4846, "step": 10781 }, { "epoch": 0.3176642163722876, "grad_norm": 1.6001030929793656, "learning_rate": 8.625083771035369e-06, "loss": 0.5291, "step": 10782 }, { "epoch": 0.3176936788297512, "grad_norm": 1.7288742082062014, "learning_rate": 8.624729578981588e-06, "loss": 0.5394, "step": 10783 }, { "epoch": 0.3177231412872148, "grad_norm": 1.5515485998135103, "learning_rate": 8.62437534858652e-06, "loss": 0.5146, "step": 10784 }, { "epoch": 0.31775260374467834, "grad_norm": 1.6683259090057416, "learning_rate": 8.624021079853914e-06, "loss": 0.5437, "step": 10785 }, { "epoch": 0.31778206620214194, "grad_norm": 1.5354314500638515, "learning_rate": 8.623666772787518e-06, "loss": 0.462, "step": 10786 }, { "epoch": 0.3178115286596055, "grad_norm": 1.3283172810941506, "learning_rate": 8.623312427391079e-06, "loss": 0.3798, "step": 10787 }, { "epoch": 0.3178409911170691, "grad_norm": 1.7248747322150542, "learning_rate": 8.622958043668345e-06, "loss": 0.5746, "step": 10788 }, { "epoch": 0.31787045357453264, "grad_norm": 1.3490901121000913, "learning_rate": 8.622603621623066e-06, "loss": 0.3365, "step": 10789 }, { "epoch": 0.31789991603199624, "grad_norm": 1.5036655153511798, "learning_rate": 8.622249161258989e-06, "loss": 0.384, "step": 10790 }, { "epoch": 0.3179293784894598, "grad_norm": 1.6424294291108494, "learning_rate": 8.621894662579864e-06, "loss": 0.4472, "step": 10791 }, { "epoch": 0.3179588409469234, "grad_norm": 1.444312404254224, "learning_rate": 8.621540125589443e-06, "loss": 0.4026, "step": 10792 }, { "epoch": 0.31798830340438694, "grad_norm": 1.4515067079374895, "learning_rate": 8.621185550291473e-06, "loss": 0.4152, "step": 10793 }, { "epoch": 0.31801776586185054, "grad_norm": 1.3838707467103908, "learning_rate": 8.620830936689705e-06, "loss": 0.4275, "step": 10794 }, { "epoch": 0.3180472283193141, "grad_norm": 1.469373115351746, "learning_rate": 8.620476284787892e-06, "loss": 0.426, "step": 10795 }, { "epoch": 0.3180766907767777, "grad_norm": 1.6596707275959142, "learning_rate": 8.620121594589785e-06, "loss": 0.4868, "step": 10796 }, { "epoch": 0.3181061532342413, "grad_norm": 1.7263894552174162, "learning_rate": 8.619766866099132e-06, "loss": 0.4782, "step": 10797 }, { "epoch": 0.31813561569170484, "grad_norm": 1.7382842952668818, "learning_rate": 8.61941209931969e-06, "loss": 0.4845, "step": 10798 }, { "epoch": 0.31816507814916845, "grad_norm": 1.832683689148082, "learning_rate": 8.619057294255212e-06, "loss": 0.6221, "step": 10799 }, { "epoch": 0.318194540606632, "grad_norm": 1.5973077725763343, "learning_rate": 8.618702450909446e-06, "loss": 0.5215, "step": 10800 }, { "epoch": 0.3182240030640956, "grad_norm": 1.6490092903553937, "learning_rate": 8.61834756928615e-06, "loss": 0.4877, "step": 10801 }, { "epoch": 0.31825346552155914, "grad_norm": 1.655076581243801, "learning_rate": 8.617992649389076e-06, "loss": 0.4954, "step": 10802 }, { "epoch": 0.31828292797902275, "grad_norm": 1.56306823538829, "learning_rate": 8.617637691221977e-06, "loss": 0.5806, "step": 10803 }, { "epoch": 0.3183123904364863, "grad_norm": 1.5624137766959083, "learning_rate": 8.61728269478861e-06, "loss": 0.5849, "step": 10804 }, { "epoch": 0.3183418528939499, "grad_norm": 1.426925044330617, "learning_rate": 8.616927660092728e-06, "loss": 0.4216, "step": 10805 }, { "epoch": 0.31837131535141344, "grad_norm": 1.4855992460375287, "learning_rate": 8.616572587138087e-06, "loss": 0.4954, "step": 10806 }, { "epoch": 0.31840077780887704, "grad_norm": 1.7224997447875976, "learning_rate": 8.616217475928445e-06, "loss": 0.5166, "step": 10807 }, { "epoch": 0.3184302402663406, "grad_norm": 1.4481019406455329, "learning_rate": 8.615862326467556e-06, "loss": 0.3652, "step": 10808 }, { "epoch": 0.3184597027238042, "grad_norm": 1.6228746851378202, "learning_rate": 8.615507138759179e-06, "loss": 0.3988, "step": 10809 }, { "epoch": 0.3184891651812678, "grad_norm": 1.6463426843300892, "learning_rate": 8.615151912807068e-06, "loss": 0.4701, "step": 10810 }, { "epoch": 0.31851862763873134, "grad_norm": 1.372966198351988, "learning_rate": 8.614796648614981e-06, "loss": 0.4195, "step": 10811 }, { "epoch": 0.31854809009619495, "grad_norm": 1.4725412359525243, "learning_rate": 8.614441346186677e-06, "loss": 0.531, "step": 10812 }, { "epoch": 0.3185775525536585, "grad_norm": 1.5660274302270112, "learning_rate": 8.614086005525915e-06, "loss": 0.4383, "step": 10813 }, { "epoch": 0.3186070150111221, "grad_norm": 1.5300074036374423, "learning_rate": 8.613730626636452e-06, "loss": 0.4113, "step": 10814 }, { "epoch": 0.31863647746858564, "grad_norm": 1.774907585912201, "learning_rate": 8.613375209522047e-06, "loss": 0.6319, "step": 10815 }, { "epoch": 0.31866593992604925, "grad_norm": 1.6458804719771016, "learning_rate": 8.613019754186463e-06, "loss": 0.5142, "step": 10816 }, { "epoch": 0.3186954023835128, "grad_norm": 1.4942849720220646, "learning_rate": 8.612664260633454e-06, "loss": 0.481, "step": 10817 }, { "epoch": 0.3187248648409764, "grad_norm": 1.2589728181539919, "learning_rate": 8.612308728866785e-06, "loss": 0.3432, "step": 10818 }, { "epoch": 0.31875432729843994, "grad_norm": 1.768310116771332, "learning_rate": 8.611953158890214e-06, "loss": 0.4764, "step": 10819 }, { "epoch": 0.31878378975590355, "grad_norm": 1.665310697276872, "learning_rate": 8.611597550707504e-06, "loss": 0.4955, "step": 10820 }, { "epoch": 0.3188132522133671, "grad_norm": 1.4873014930410746, "learning_rate": 8.611241904322417e-06, "loss": 0.4858, "step": 10821 }, { "epoch": 0.3188427146708307, "grad_norm": 1.4419344672706758, "learning_rate": 8.610886219738713e-06, "loss": 0.4439, "step": 10822 }, { "epoch": 0.3188721771282943, "grad_norm": 1.4151722004392313, "learning_rate": 8.610530496960155e-06, "loss": 0.4152, "step": 10823 }, { "epoch": 0.31890163958575785, "grad_norm": 1.5790554268890309, "learning_rate": 8.610174735990504e-06, "loss": 0.5386, "step": 10824 }, { "epoch": 0.31893110204322145, "grad_norm": 1.6173102086465685, "learning_rate": 8.609818936833526e-06, "loss": 0.4833, "step": 10825 }, { "epoch": 0.318960564500685, "grad_norm": 1.5407760963428496, "learning_rate": 8.609463099492985e-06, "loss": 0.5277, "step": 10826 }, { "epoch": 0.3189900269581486, "grad_norm": 1.4963112846897133, "learning_rate": 8.609107223972641e-06, "loss": 0.4189, "step": 10827 }, { "epoch": 0.31901948941561215, "grad_norm": 1.5420110681930466, "learning_rate": 8.608751310276262e-06, "loss": 0.5288, "step": 10828 }, { "epoch": 0.31904895187307575, "grad_norm": 1.5340477226662808, "learning_rate": 8.60839535840761e-06, "loss": 0.438, "step": 10829 }, { "epoch": 0.3190784143305393, "grad_norm": 1.4757690926206724, "learning_rate": 8.608039368370452e-06, "loss": 0.3817, "step": 10830 }, { "epoch": 0.3191078767880029, "grad_norm": 1.5666287700740795, "learning_rate": 8.607683340168556e-06, "loss": 0.5112, "step": 10831 }, { "epoch": 0.31913733924546644, "grad_norm": 1.942815921659509, "learning_rate": 8.607327273805682e-06, "loss": 0.6379, "step": 10832 }, { "epoch": 0.31916680170293005, "grad_norm": 1.5165396514202203, "learning_rate": 8.6069711692856e-06, "loss": 0.4833, "step": 10833 }, { "epoch": 0.3191962641603936, "grad_norm": 1.3071869740879443, "learning_rate": 8.606615026612077e-06, "loss": 0.3753, "step": 10834 }, { "epoch": 0.3192257266178572, "grad_norm": 1.4753625515214555, "learning_rate": 8.606258845788879e-06, "loss": 0.474, "step": 10835 }, { "epoch": 0.3192551890753208, "grad_norm": 1.5200149057631407, "learning_rate": 8.605902626819775e-06, "loss": 0.6053, "step": 10836 }, { "epoch": 0.31928465153278435, "grad_norm": 1.6805939717569347, "learning_rate": 8.605546369708531e-06, "loss": 0.5001, "step": 10837 }, { "epoch": 0.31931411399024795, "grad_norm": 1.4784337740780977, "learning_rate": 8.605190074458915e-06, "loss": 0.547, "step": 10838 }, { "epoch": 0.3193435764477115, "grad_norm": 1.4984535173581113, "learning_rate": 8.604833741074699e-06, "loss": 0.4385, "step": 10839 }, { "epoch": 0.3193730389051751, "grad_norm": 1.3751432298188964, "learning_rate": 8.604477369559649e-06, "loss": 0.4109, "step": 10840 }, { "epoch": 0.31940250136263865, "grad_norm": 1.444863416223475, "learning_rate": 8.604120959917537e-06, "loss": 0.4391, "step": 10841 }, { "epoch": 0.31943196382010225, "grad_norm": 1.2601813497820435, "learning_rate": 8.603764512152131e-06, "loss": 0.3601, "step": 10842 }, { "epoch": 0.3194614262775658, "grad_norm": 1.5142806954659365, "learning_rate": 8.603408026267203e-06, "loss": 0.3986, "step": 10843 }, { "epoch": 0.3194908887350294, "grad_norm": 1.6134061684512513, "learning_rate": 8.603051502266524e-06, "loss": 0.4767, "step": 10844 }, { "epoch": 0.31952035119249295, "grad_norm": 1.559334628591819, "learning_rate": 8.602694940153862e-06, "loss": 0.4129, "step": 10845 }, { "epoch": 0.31954981364995655, "grad_norm": 1.6763566403156611, "learning_rate": 8.602338339932992e-06, "loss": 0.5499, "step": 10846 }, { "epoch": 0.3195792761074201, "grad_norm": 1.6652933719921656, "learning_rate": 8.601981701607685e-06, "loss": 0.6031, "step": 10847 }, { "epoch": 0.3196087385648837, "grad_norm": 1.5981477767242322, "learning_rate": 8.601625025181713e-06, "loss": 0.4945, "step": 10848 }, { "epoch": 0.3196382010223473, "grad_norm": 1.4616484892754245, "learning_rate": 8.60126831065885e-06, "loss": 0.4387, "step": 10849 }, { "epoch": 0.31966766347981085, "grad_norm": 1.4564903967932379, "learning_rate": 8.600911558042867e-06, "loss": 0.4805, "step": 10850 }, { "epoch": 0.31969712593727445, "grad_norm": 1.3179456033223484, "learning_rate": 8.60055476733754e-06, "loss": 0.4464, "step": 10851 }, { "epoch": 0.319726588394738, "grad_norm": 1.7447714781066146, "learning_rate": 8.600197938546642e-06, "loss": 0.4509, "step": 10852 }, { "epoch": 0.3197560508522016, "grad_norm": 1.468261064957594, "learning_rate": 8.599841071673947e-06, "loss": 0.4854, "step": 10853 }, { "epoch": 0.31978551330966515, "grad_norm": 1.6119958922661626, "learning_rate": 8.59948416672323e-06, "loss": 0.4732, "step": 10854 }, { "epoch": 0.31981497576712875, "grad_norm": 1.5817837638104246, "learning_rate": 8.599127223698267e-06, "loss": 0.4405, "step": 10855 }, { "epoch": 0.3198444382245923, "grad_norm": 1.5108545473999788, "learning_rate": 8.598770242602832e-06, "loss": 0.4128, "step": 10856 }, { "epoch": 0.3198739006820559, "grad_norm": 1.421475651732875, "learning_rate": 8.598413223440703e-06, "loss": 0.5249, "step": 10857 }, { "epoch": 0.31990336313951945, "grad_norm": 1.5478092061473159, "learning_rate": 8.598056166215655e-06, "loss": 0.5165, "step": 10858 }, { "epoch": 0.31993282559698305, "grad_norm": 1.3876686296430418, "learning_rate": 8.597699070931467e-06, "loss": 0.403, "step": 10859 }, { "epoch": 0.3199622880544466, "grad_norm": 1.501320427339906, "learning_rate": 8.597341937591912e-06, "loss": 0.4546, "step": 10860 }, { "epoch": 0.3199917505119102, "grad_norm": 1.5448278549199523, "learning_rate": 8.596984766200771e-06, "loss": 0.4083, "step": 10861 }, { "epoch": 0.3200212129693738, "grad_norm": 1.6747185480648834, "learning_rate": 8.596627556761823e-06, "loss": 0.5121, "step": 10862 }, { "epoch": 0.32005067542683735, "grad_norm": 1.640210249243732, "learning_rate": 8.596270309278845e-06, "loss": 0.5209, "step": 10863 }, { "epoch": 0.32008013788430095, "grad_norm": 1.5192262816055317, "learning_rate": 8.595913023755614e-06, "loss": 0.4198, "step": 10864 }, { "epoch": 0.3201096003417645, "grad_norm": 1.5741630569381422, "learning_rate": 8.595555700195913e-06, "loss": 0.3934, "step": 10865 }, { "epoch": 0.3201390627992281, "grad_norm": 1.6094052025139125, "learning_rate": 8.595198338603516e-06, "loss": 0.5018, "step": 10866 }, { "epoch": 0.32016852525669165, "grad_norm": 1.7926103224551186, "learning_rate": 8.59484093898221e-06, "loss": 0.5814, "step": 10867 }, { "epoch": 0.32019798771415525, "grad_norm": 1.3272347464926328, "learning_rate": 8.594483501335771e-06, "loss": 0.3769, "step": 10868 }, { "epoch": 0.3202274501716188, "grad_norm": 1.7092578841946473, "learning_rate": 8.59412602566798e-06, "loss": 0.49, "step": 10869 }, { "epoch": 0.3202569126290824, "grad_norm": 1.4174230429561872, "learning_rate": 8.59376851198262e-06, "loss": 0.2941, "step": 10870 }, { "epoch": 0.32028637508654595, "grad_norm": 1.6630798398348603, "learning_rate": 8.593410960283472e-06, "loss": 0.4811, "step": 10871 }, { "epoch": 0.32031583754400955, "grad_norm": 1.6194909565110478, "learning_rate": 8.593053370574318e-06, "loss": 0.5649, "step": 10872 }, { "epoch": 0.3203453000014731, "grad_norm": 1.5600110938708107, "learning_rate": 8.592695742858941e-06, "loss": 0.4601, "step": 10873 }, { "epoch": 0.3203747624589367, "grad_norm": 1.6588906572389708, "learning_rate": 8.592338077141123e-06, "loss": 0.4854, "step": 10874 }, { "epoch": 0.3204042249164003, "grad_norm": 1.640395829599966, "learning_rate": 8.591980373424648e-06, "loss": 0.5208, "step": 10875 }, { "epoch": 0.32043368737386385, "grad_norm": 1.7198387712447507, "learning_rate": 8.591622631713299e-06, "loss": 0.5184, "step": 10876 }, { "epoch": 0.32046314983132745, "grad_norm": 1.6309812863866409, "learning_rate": 8.591264852010858e-06, "loss": 0.511, "step": 10877 }, { "epoch": 0.320492612288791, "grad_norm": 1.4499503779029146, "learning_rate": 8.590907034321116e-06, "loss": 0.3504, "step": 10878 }, { "epoch": 0.3205220747462546, "grad_norm": 1.496449856974674, "learning_rate": 8.590549178647852e-06, "loss": 0.4811, "step": 10879 }, { "epoch": 0.32055153720371815, "grad_norm": 1.5820823679607696, "learning_rate": 8.590191284994852e-06, "loss": 0.4887, "step": 10880 }, { "epoch": 0.32058099966118175, "grad_norm": 1.507624209244777, "learning_rate": 8.589833353365903e-06, "loss": 0.3439, "step": 10881 }, { "epoch": 0.3206104621186453, "grad_norm": 1.8762335894042461, "learning_rate": 8.589475383764792e-06, "loss": 0.5601, "step": 10882 }, { "epoch": 0.3206399245761089, "grad_norm": 1.69232988296732, "learning_rate": 8.589117376195305e-06, "loss": 0.4131, "step": 10883 }, { "epoch": 0.32066938703357245, "grad_norm": 1.5339793953459169, "learning_rate": 8.588759330661225e-06, "loss": 0.5435, "step": 10884 }, { "epoch": 0.32069884949103605, "grad_norm": 1.6604355302844318, "learning_rate": 8.588401247166345e-06, "loss": 0.5482, "step": 10885 }, { "epoch": 0.3207283119484996, "grad_norm": 1.4928605120181375, "learning_rate": 8.588043125714449e-06, "loss": 0.5731, "step": 10886 }, { "epoch": 0.3207577744059632, "grad_norm": 1.6066460552319173, "learning_rate": 8.587684966309327e-06, "loss": 0.5261, "step": 10887 }, { "epoch": 0.3207872368634268, "grad_norm": 1.750195017545624, "learning_rate": 8.587326768954768e-06, "loss": 0.6323, "step": 10888 }, { "epoch": 0.32081669932089035, "grad_norm": 1.587972316994114, "learning_rate": 8.586968533654558e-06, "loss": 0.4249, "step": 10889 }, { "epoch": 0.32084616177835396, "grad_norm": 1.486085502979462, "learning_rate": 8.58661026041249e-06, "loss": 0.4691, "step": 10890 }, { "epoch": 0.3208756242358175, "grad_norm": 1.372125138482861, "learning_rate": 8.58625194923235e-06, "loss": 0.4522, "step": 10891 }, { "epoch": 0.3209050866932811, "grad_norm": 1.4959813399738322, "learning_rate": 8.58589360011793e-06, "loss": 0.3994, "step": 10892 }, { "epoch": 0.32093454915074465, "grad_norm": 1.529224639307776, "learning_rate": 8.58553521307302e-06, "loss": 0.5198, "step": 10893 }, { "epoch": 0.32096401160820826, "grad_norm": 1.5453925925632748, "learning_rate": 8.58517678810141e-06, "loss": 0.4165, "step": 10894 }, { "epoch": 0.3209934740656718, "grad_norm": 1.4243691708894222, "learning_rate": 8.584818325206894e-06, "loss": 0.4231, "step": 10895 }, { "epoch": 0.3210229365231354, "grad_norm": 1.5807721643841772, "learning_rate": 8.584459824393263e-06, "loss": 0.5118, "step": 10896 }, { "epoch": 0.32105239898059895, "grad_norm": 1.6895978972178296, "learning_rate": 8.584101285664308e-06, "loss": 0.6721, "step": 10897 }, { "epoch": 0.32108186143806255, "grad_norm": 1.5489018064202278, "learning_rate": 8.583742709023821e-06, "loss": 0.4854, "step": 10898 }, { "epoch": 0.3211113238955261, "grad_norm": 1.639727711601931, "learning_rate": 8.583384094475598e-06, "loss": 0.4058, "step": 10899 }, { "epoch": 0.3211407863529897, "grad_norm": 1.303189201422334, "learning_rate": 8.583025442023428e-06, "loss": 0.3826, "step": 10900 }, { "epoch": 0.3211702488104533, "grad_norm": 1.529494747841285, "learning_rate": 8.582666751671107e-06, "loss": 0.5558, "step": 10901 }, { "epoch": 0.32119971126791685, "grad_norm": 1.399872398866437, "learning_rate": 8.58230802342243e-06, "loss": 0.322, "step": 10902 }, { "epoch": 0.32122917372538046, "grad_norm": 1.3629717828963115, "learning_rate": 8.58194925728119e-06, "loss": 0.4522, "step": 10903 }, { "epoch": 0.321258636182844, "grad_norm": 1.3160394002248121, "learning_rate": 8.581590453251181e-06, "loss": 0.3544, "step": 10904 }, { "epoch": 0.3212880986403076, "grad_norm": 1.4838446556942961, "learning_rate": 8.581231611336202e-06, "loss": 0.4281, "step": 10905 }, { "epoch": 0.32131756109777115, "grad_norm": 1.5337897825526159, "learning_rate": 8.580872731540045e-06, "loss": 0.505, "step": 10906 }, { "epoch": 0.32134702355523476, "grad_norm": 1.4813361865107417, "learning_rate": 8.580513813866507e-06, "loss": 0.5109, "step": 10907 }, { "epoch": 0.3213764860126983, "grad_norm": 1.5956027005033375, "learning_rate": 8.580154858319387e-06, "loss": 0.5755, "step": 10908 }, { "epoch": 0.3214059484701619, "grad_norm": 1.6609918438507947, "learning_rate": 8.57979586490248e-06, "loss": 0.516, "step": 10909 }, { "epoch": 0.32143541092762545, "grad_norm": 1.4955887328673094, "learning_rate": 8.57943683361958e-06, "loss": 0.2981, "step": 10910 }, { "epoch": 0.32146487338508906, "grad_norm": 1.650536906844689, "learning_rate": 8.579077764474492e-06, "loss": 0.5732, "step": 10911 }, { "epoch": 0.3214943358425526, "grad_norm": 1.5419578104045175, "learning_rate": 8.578718657471008e-06, "loss": 0.483, "step": 10912 }, { "epoch": 0.3215237983000162, "grad_norm": 1.428317822629159, "learning_rate": 8.578359512612929e-06, "loss": 0.4512, "step": 10913 }, { "epoch": 0.3215532607574798, "grad_norm": 1.617034422240293, "learning_rate": 8.578000329904054e-06, "loss": 0.6032, "step": 10914 }, { "epoch": 0.32158272321494336, "grad_norm": 1.589649408552152, "learning_rate": 8.577641109348182e-06, "loss": 0.481, "step": 10915 }, { "epoch": 0.32161218567240696, "grad_norm": 1.4081110232377938, "learning_rate": 8.577281850949112e-06, "loss": 0.347, "step": 10916 }, { "epoch": 0.3216416481298705, "grad_norm": 1.4621210289540938, "learning_rate": 8.576922554710647e-06, "loss": 0.5467, "step": 10917 }, { "epoch": 0.3216711105873341, "grad_norm": 1.473397983393701, "learning_rate": 8.576563220636583e-06, "loss": 0.43, "step": 10918 }, { "epoch": 0.32170057304479766, "grad_norm": 1.4685660658919815, "learning_rate": 8.576203848730724e-06, "loss": 0.5211, "step": 10919 }, { "epoch": 0.32173003550226126, "grad_norm": 1.631662003186432, "learning_rate": 8.57584443899687e-06, "loss": 0.5539, "step": 10920 }, { "epoch": 0.3217594979597248, "grad_norm": 1.370121010617452, "learning_rate": 8.575484991438825e-06, "loss": 0.4776, "step": 10921 }, { "epoch": 0.3217889604171884, "grad_norm": 1.6213516992144617, "learning_rate": 8.575125506060387e-06, "loss": 0.5583, "step": 10922 }, { "epoch": 0.32181842287465195, "grad_norm": 1.7532461207553645, "learning_rate": 8.57476598286536e-06, "loss": 0.5854, "step": 10923 }, { "epoch": 0.32184788533211556, "grad_norm": 1.9038824194221395, "learning_rate": 8.574406421857551e-06, "loss": 0.5946, "step": 10924 }, { "epoch": 0.3218773477895791, "grad_norm": 1.5658398440655337, "learning_rate": 8.57404682304076e-06, "loss": 0.5774, "step": 10925 }, { "epoch": 0.3219068102470427, "grad_norm": 1.4095011662774044, "learning_rate": 8.57368718641879e-06, "loss": 0.3557, "step": 10926 }, { "epoch": 0.3219362727045063, "grad_norm": 1.6164082881056596, "learning_rate": 8.573327511995447e-06, "loss": 0.5069, "step": 10927 }, { "epoch": 0.32196573516196986, "grad_norm": 1.5356683908844144, "learning_rate": 8.572967799774532e-06, "loss": 0.609, "step": 10928 }, { "epoch": 0.32199519761943346, "grad_norm": 1.655976621743572, "learning_rate": 8.572608049759855e-06, "loss": 0.6366, "step": 10929 }, { "epoch": 0.322024660076897, "grad_norm": 1.720350819664706, "learning_rate": 8.572248261955219e-06, "loss": 0.555, "step": 10930 }, { "epoch": 0.3220541225343606, "grad_norm": 1.7953414748164969, "learning_rate": 8.57188843636443e-06, "loss": 0.4767, "step": 10931 }, { "epoch": 0.32208358499182416, "grad_norm": 1.4529645535773965, "learning_rate": 8.57152857299129e-06, "loss": 0.4079, "step": 10932 }, { "epoch": 0.32211304744928776, "grad_norm": 1.4278083411553701, "learning_rate": 8.571168671839611e-06, "loss": 0.6011, "step": 10933 }, { "epoch": 0.3221425099067513, "grad_norm": 1.6571249538560504, "learning_rate": 8.570808732913199e-06, "loss": 0.4867, "step": 10934 }, { "epoch": 0.3221719723642149, "grad_norm": 1.4555170225715894, "learning_rate": 8.570448756215862e-06, "loss": 0.389, "step": 10935 }, { "epoch": 0.32220143482167846, "grad_norm": 1.5565640772161213, "learning_rate": 8.570088741751404e-06, "loss": 0.4851, "step": 10936 }, { "epoch": 0.32223089727914206, "grad_norm": 1.4330083699587013, "learning_rate": 8.569728689523635e-06, "loss": 0.4666, "step": 10937 }, { "epoch": 0.3222603597366056, "grad_norm": 1.574785468582785, "learning_rate": 8.569368599536363e-06, "loss": 0.6127, "step": 10938 }, { "epoch": 0.3222898221940692, "grad_norm": 1.627772914954403, "learning_rate": 8.5690084717934e-06, "loss": 0.4157, "step": 10939 }, { "epoch": 0.3223192846515328, "grad_norm": 1.6615219453124923, "learning_rate": 8.568648306298553e-06, "loss": 0.5856, "step": 10940 }, { "epoch": 0.32234874710899636, "grad_norm": 1.7681696184114457, "learning_rate": 8.56828810305563e-06, "loss": 0.6085, "step": 10941 }, { "epoch": 0.32237820956645996, "grad_norm": 1.603947703883638, "learning_rate": 8.567927862068445e-06, "loss": 0.5438, "step": 10942 }, { "epoch": 0.3224076720239235, "grad_norm": 1.6472189400448534, "learning_rate": 8.567567583340805e-06, "loss": 0.5787, "step": 10943 }, { "epoch": 0.3224371344813871, "grad_norm": 1.5850362563297278, "learning_rate": 8.567207266876523e-06, "loss": 0.4721, "step": 10944 }, { "epoch": 0.32246659693885066, "grad_norm": 1.6363171853113896, "learning_rate": 8.56684691267941e-06, "loss": 0.384, "step": 10945 }, { "epoch": 0.32249605939631426, "grad_norm": 1.5246196663254996, "learning_rate": 8.566486520753276e-06, "loss": 0.3561, "step": 10946 }, { "epoch": 0.3225255218537778, "grad_norm": 1.6568516944737328, "learning_rate": 8.566126091101936e-06, "loss": 0.4858, "step": 10947 }, { "epoch": 0.3225549843112414, "grad_norm": 1.4872138680728935, "learning_rate": 8.5657656237292e-06, "loss": 0.4252, "step": 10948 }, { "epoch": 0.32258444676870496, "grad_norm": 1.4740114629718335, "learning_rate": 8.565405118638882e-06, "loss": 0.5311, "step": 10949 }, { "epoch": 0.32261390922616856, "grad_norm": 1.6055220752538615, "learning_rate": 8.565044575834796e-06, "loss": 0.5578, "step": 10950 }, { "epoch": 0.3226433716836321, "grad_norm": 1.2841661720831887, "learning_rate": 8.564683995320755e-06, "loss": 0.3546, "step": 10951 }, { "epoch": 0.3226728341410957, "grad_norm": 1.6373884594701067, "learning_rate": 8.564323377100572e-06, "loss": 0.4947, "step": 10952 }, { "epoch": 0.3227022965985593, "grad_norm": 1.6097467704188242, "learning_rate": 8.563962721178064e-06, "loss": 0.4828, "step": 10953 }, { "epoch": 0.32273175905602286, "grad_norm": 1.4722041247193454, "learning_rate": 8.563602027557044e-06, "loss": 0.4886, "step": 10954 }, { "epoch": 0.32276122151348646, "grad_norm": 1.4928899807601717, "learning_rate": 8.563241296241327e-06, "loss": 0.4423, "step": 10955 }, { "epoch": 0.32279068397095, "grad_norm": 1.6144201840786148, "learning_rate": 8.562880527234729e-06, "loss": 0.4699, "step": 10956 }, { "epoch": 0.3228201464284136, "grad_norm": 1.7002617190434586, "learning_rate": 8.562519720541069e-06, "loss": 0.6505, "step": 10957 }, { "epoch": 0.32284960888587716, "grad_norm": 1.5348831019143123, "learning_rate": 8.56215887616416e-06, "loss": 0.4039, "step": 10958 }, { "epoch": 0.32287907134334076, "grad_norm": 1.6770083733525518, "learning_rate": 8.561797994107818e-06, "loss": 0.6196, "step": 10959 }, { "epoch": 0.3229085338008043, "grad_norm": 1.4860121734485754, "learning_rate": 8.561437074375864e-06, "loss": 0.5313, "step": 10960 }, { "epoch": 0.3229379962582679, "grad_norm": 1.561672683271687, "learning_rate": 8.561076116972115e-06, "loss": 0.5655, "step": 10961 }, { "epoch": 0.32296745871573146, "grad_norm": 1.7371521535519474, "learning_rate": 8.560715121900388e-06, "loss": 0.6003, "step": 10962 }, { "epoch": 0.32299692117319506, "grad_norm": 1.5009771538668766, "learning_rate": 8.5603540891645e-06, "loss": 0.4526, "step": 10963 }, { "epoch": 0.3230263836306586, "grad_norm": 1.5628115414514063, "learning_rate": 8.559993018768272e-06, "loss": 0.3865, "step": 10964 }, { "epoch": 0.3230558460881222, "grad_norm": 1.420525363903502, "learning_rate": 8.559631910715522e-06, "loss": 0.3653, "step": 10965 }, { "epoch": 0.3230853085455858, "grad_norm": 1.483796881013625, "learning_rate": 8.559270765010072e-06, "loss": 0.4763, "step": 10966 }, { "epoch": 0.32311477100304936, "grad_norm": 1.7284927027428556, "learning_rate": 8.55890958165574e-06, "loss": 0.509, "step": 10967 }, { "epoch": 0.32314423346051296, "grad_norm": 1.40243735886257, "learning_rate": 8.558548360656347e-06, "loss": 0.4312, "step": 10968 }, { "epoch": 0.3231736959179765, "grad_norm": 1.4817141301695798, "learning_rate": 8.558187102015713e-06, "loss": 0.5206, "step": 10969 }, { "epoch": 0.3232031583754401, "grad_norm": 1.8630924935396187, "learning_rate": 8.557825805737663e-06, "loss": 0.5324, "step": 10970 }, { "epoch": 0.32323262083290366, "grad_norm": 1.611861205719637, "learning_rate": 8.557464471826014e-06, "loss": 0.4282, "step": 10971 }, { "epoch": 0.32326208329036726, "grad_norm": 1.6459495530473978, "learning_rate": 8.557103100284589e-06, "loss": 0.5852, "step": 10972 }, { "epoch": 0.3232915457478308, "grad_norm": 1.5374254530072897, "learning_rate": 8.556741691117214e-06, "loss": 0.49, "step": 10973 }, { "epoch": 0.3233210082052944, "grad_norm": 1.6000241564168765, "learning_rate": 8.556380244327708e-06, "loss": 0.3912, "step": 10974 }, { "epoch": 0.32335047066275796, "grad_norm": 1.3676913148550078, "learning_rate": 8.556018759919895e-06, "loss": 0.4268, "step": 10975 }, { "epoch": 0.32337993312022156, "grad_norm": 1.7523423116632146, "learning_rate": 8.5556572378976e-06, "loss": 0.5079, "step": 10976 }, { "epoch": 0.3234093955776851, "grad_norm": 1.7659752375771745, "learning_rate": 8.555295678264645e-06, "loss": 0.5467, "step": 10977 }, { "epoch": 0.3234388580351487, "grad_norm": 1.4989786216807846, "learning_rate": 8.554934081024859e-06, "loss": 0.4227, "step": 10978 }, { "epoch": 0.3234683204926123, "grad_norm": 1.8678651358555372, "learning_rate": 8.554572446182062e-06, "loss": 0.4368, "step": 10979 }, { "epoch": 0.32349778295007586, "grad_norm": 1.552138222814412, "learning_rate": 8.55421077374008e-06, "loss": 0.4873, "step": 10980 }, { "epoch": 0.32352724540753947, "grad_norm": 1.5540092367450675, "learning_rate": 8.55384906370274e-06, "loss": 0.5141, "step": 10981 }, { "epoch": 0.323556707865003, "grad_norm": 1.6999610299622578, "learning_rate": 8.553487316073868e-06, "loss": 0.4816, "step": 10982 }, { "epoch": 0.3235861703224666, "grad_norm": 1.4175890790250116, "learning_rate": 8.553125530857291e-06, "loss": 0.4113, "step": 10983 }, { "epoch": 0.32361563277993016, "grad_norm": 1.5545460843948407, "learning_rate": 8.552763708056836e-06, "loss": 0.5544, "step": 10984 }, { "epoch": 0.32364509523739377, "grad_norm": 1.5726180666187344, "learning_rate": 8.552401847676325e-06, "loss": 0.4272, "step": 10985 }, { "epoch": 0.3236745576948573, "grad_norm": 1.596465177220318, "learning_rate": 8.552039949719592e-06, "loss": 0.582, "step": 10986 }, { "epoch": 0.3237040201523209, "grad_norm": 1.6121609598076383, "learning_rate": 8.551678014190463e-06, "loss": 0.5261, "step": 10987 }, { "epoch": 0.32373348260978446, "grad_norm": 1.5439810224505137, "learning_rate": 8.551316041092768e-06, "loss": 0.4747, "step": 10988 }, { "epoch": 0.32376294506724806, "grad_norm": 1.4407310748137507, "learning_rate": 8.550954030430332e-06, "loss": 0.4595, "step": 10989 }, { "epoch": 0.3237924075247116, "grad_norm": 1.5149367052328457, "learning_rate": 8.550591982206988e-06, "loss": 0.4526, "step": 10990 }, { "epoch": 0.3238218699821752, "grad_norm": 1.6245334283744604, "learning_rate": 8.550229896426563e-06, "loss": 0.5151, "step": 10991 }, { "epoch": 0.3238513324396388, "grad_norm": 1.593325481139877, "learning_rate": 8.549867773092887e-06, "loss": 0.5609, "step": 10992 }, { "epoch": 0.32388079489710236, "grad_norm": 1.713406983779066, "learning_rate": 8.549505612209794e-06, "loss": 0.5551, "step": 10993 }, { "epoch": 0.32391025735456597, "grad_norm": 1.4514601433204894, "learning_rate": 8.54914341378111e-06, "loss": 0.3878, "step": 10994 }, { "epoch": 0.3239397198120295, "grad_norm": 1.357489976650935, "learning_rate": 8.548781177810672e-06, "loss": 0.3704, "step": 10995 }, { "epoch": 0.3239691822694931, "grad_norm": 1.3457783474273974, "learning_rate": 8.548418904302305e-06, "loss": 0.3613, "step": 10996 }, { "epoch": 0.32399864472695666, "grad_norm": 1.5024737647505673, "learning_rate": 8.548056593259845e-06, "loss": 0.5356, "step": 10997 }, { "epoch": 0.32402810718442027, "grad_norm": 1.5784310403107267, "learning_rate": 8.547694244687125e-06, "loss": 0.4793, "step": 10998 }, { "epoch": 0.3240575696418838, "grad_norm": 1.5215541918014426, "learning_rate": 8.547331858587976e-06, "loss": 0.614, "step": 10999 }, { "epoch": 0.3240870320993474, "grad_norm": 1.4286140111371544, "learning_rate": 8.546969434966231e-06, "loss": 0.3319, "step": 11000 }, { "epoch": 0.32411649455681096, "grad_norm": 1.8077306145268786, "learning_rate": 8.546606973825724e-06, "loss": 0.5958, "step": 11001 }, { "epoch": 0.32414595701427457, "grad_norm": 1.4822981709883847, "learning_rate": 8.546244475170291e-06, "loss": 0.4366, "step": 11002 }, { "epoch": 0.3241754194717381, "grad_norm": 1.6587767543080583, "learning_rate": 8.545881939003765e-06, "loss": 0.5061, "step": 11003 }, { "epoch": 0.3242048819292017, "grad_norm": 1.5650408652509695, "learning_rate": 8.545519365329977e-06, "loss": 0.5239, "step": 11004 }, { "epoch": 0.3242343443866653, "grad_norm": 1.4394619896994618, "learning_rate": 8.545156754152768e-06, "loss": 0.5313, "step": 11005 }, { "epoch": 0.32426380684412887, "grad_norm": 1.4743226157816625, "learning_rate": 8.544794105475972e-06, "loss": 0.4362, "step": 11006 }, { "epoch": 0.32429326930159247, "grad_norm": 1.5669539447527043, "learning_rate": 8.544431419303422e-06, "loss": 0.5045, "step": 11007 }, { "epoch": 0.324322731759056, "grad_norm": 1.4940927865697582, "learning_rate": 8.544068695638958e-06, "loss": 0.4382, "step": 11008 }, { "epoch": 0.3243521942165196, "grad_norm": 1.5894301863575, "learning_rate": 8.543705934486415e-06, "loss": 0.4047, "step": 11009 }, { "epoch": 0.32438165667398317, "grad_norm": 1.6738563220979485, "learning_rate": 8.543343135849632e-06, "loss": 0.5248, "step": 11010 }, { "epoch": 0.32441111913144677, "grad_norm": 1.5667204063070435, "learning_rate": 8.542980299732444e-06, "loss": 0.3767, "step": 11011 }, { "epoch": 0.3244405815889103, "grad_norm": 1.6167442453732124, "learning_rate": 8.542617426138691e-06, "loss": 0.5998, "step": 11012 }, { "epoch": 0.3244700440463739, "grad_norm": 1.5222179853899938, "learning_rate": 8.542254515072211e-06, "loss": 0.4187, "step": 11013 }, { "epoch": 0.32449950650383746, "grad_norm": 1.7343055109633454, "learning_rate": 8.541891566536839e-06, "loss": 0.6559, "step": 11014 }, { "epoch": 0.32452896896130107, "grad_norm": 1.5607766040599527, "learning_rate": 8.54152858053642e-06, "loss": 0.4899, "step": 11015 }, { "epoch": 0.3245584314187646, "grad_norm": 1.6434853969327359, "learning_rate": 8.54116555707479e-06, "loss": 0.52, "step": 11016 }, { "epoch": 0.3245878938762282, "grad_norm": 1.3234457101160695, "learning_rate": 8.540802496155792e-06, "loss": 0.3497, "step": 11017 }, { "epoch": 0.3246173563336918, "grad_norm": 1.6872523477479606, "learning_rate": 8.540439397783262e-06, "loss": 0.4807, "step": 11018 }, { "epoch": 0.32464681879115537, "grad_norm": 1.5673833537613715, "learning_rate": 8.540076261961044e-06, "loss": 0.4892, "step": 11019 }, { "epoch": 0.32467628124861897, "grad_norm": 1.6632671954400633, "learning_rate": 8.539713088692977e-06, "loss": 0.4676, "step": 11020 }, { "epoch": 0.3247057437060825, "grad_norm": 1.5149892809944128, "learning_rate": 8.539349877982905e-06, "loss": 0.4751, "step": 11021 }, { "epoch": 0.3247352061635461, "grad_norm": 1.9884139013687403, "learning_rate": 8.53898662983467e-06, "loss": 0.509, "step": 11022 }, { "epoch": 0.32476466862100967, "grad_norm": 1.4587939231772293, "learning_rate": 8.538623344252109e-06, "loss": 0.4638, "step": 11023 }, { "epoch": 0.32479413107847327, "grad_norm": 1.4981563624551297, "learning_rate": 8.538260021239071e-06, "loss": 0.408, "step": 11024 }, { "epoch": 0.3248235935359368, "grad_norm": 1.55437484625734, "learning_rate": 8.537896660799397e-06, "loss": 0.5057, "step": 11025 }, { "epoch": 0.3248530559934004, "grad_norm": 1.5372823816339147, "learning_rate": 8.53753326293693e-06, "loss": 0.4832, "step": 11026 }, { "epoch": 0.32488251845086397, "grad_norm": 1.4289032116382285, "learning_rate": 8.537169827655516e-06, "loss": 0.3497, "step": 11027 }, { "epoch": 0.32491198090832757, "grad_norm": 1.5746746635587583, "learning_rate": 8.536806354958995e-06, "loss": 0.4235, "step": 11028 }, { "epoch": 0.3249414433657911, "grad_norm": 1.6385732095079797, "learning_rate": 8.536442844851216e-06, "loss": 0.4916, "step": 11029 }, { "epoch": 0.3249709058232547, "grad_norm": 1.764070236518915, "learning_rate": 8.536079297336019e-06, "loss": 0.5699, "step": 11030 }, { "epoch": 0.3250003682807183, "grad_norm": 1.5047308616082775, "learning_rate": 8.535715712417257e-06, "loss": 0.5751, "step": 11031 }, { "epoch": 0.32502983073818187, "grad_norm": 1.4936166310136065, "learning_rate": 8.53535209009877e-06, "loss": 0.4509, "step": 11032 }, { "epoch": 0.32505929319564547, "grad_norm": 1.6149467776434412, "learning_rate": 8.534988430384406e-06, "loss": 0.5252, "step": 11033 }, { "epoch": 0.325088755653109, "grad_norm": 1.5275702100377315, "learning_rate": 8.534624733278012e-06, "loss": 0.5377, "step": 11034 }, { "epoch": 0.3251182181105726, "grad_norm": 1.4936999271124904, "learning_rate": 8.534260998783434e-06, "loss": 0.4058, "step": 11035 }, { "epoch": 0.32514768056803617, "grad_norm": 1.551553421916564, "learning_rate": 8.533897226904522e-06, "loss": 0.4349, "step": 11036 }, { "epoch": 0.32517714302549977, "grad_norm": 1.4434000518898615, "learning_rate": 8.533533417645122e-06, "loss": 0.3645, "step": 11037 }, { "epoch": 0.3252066054829633, "grad_norm": 1.3003856744211408, "learning_rate": 8.533169571009079e-06, "loss": 0.328, "step": 11038 }, { "epoch": 0.3252360679404269, "grad_norm": 1.7475096030660486, "learning_rate": 8.532805687000249e-06, "loss": 0.4182, "step": 11039 }, { "epoch": 0.32526553039789047, "grad_norm": 1.6552677845664256, "learning_rate": 8.532441765622475e-06, "loss": 0.5864, "step": 11040 }, { "epoch": 0.32529499285535407, "grad_norm": 1.4980322067431757, "learning_rate": 8.53207780687961e-06, "loss": 0.596, "step": 11041 }, { "epoch": 0.3253244553128176, "grad_norm": 1.5424753989763051, "learning_rate": 8.531713810775501e-06, "loss": 0.3683, "step": 11042 }, { "epoch": 0.3253539177702812, "grad_norm": 1.5272603365746855, "learning_rate": 8.531349777313999e-06, "loss": 0.4452, "step": 11043 }, { "epoch": 0.3253833802277448, "grad_norm": 1.4638622673966588, "learning_rate": 8.530985706498957e-06, "loss": 0.4543, "step": 11044 }, { "epoch": 0.32541284268520837, "grad_norm": 1.5611629984111628, "learning_rate": 8.530621598334223e-06, "loss": 0.6596, "step": 11045 }, { "epoch": 0.325442305142672, "grad_norm": 1.5422483775614373, "learning_rate": 8.530257452823652e-06, "loss": 0.4807, "step": 11046 }, { "epoch": 0.3254717676001355, "grad_norm": 1.606170228772133, "learning_rate": 8.52989326997109e-06, "loss": 0.4045, "step": 11047 }, { "epoch": 0.3255012300575991, "grad_norm": 1.4311949227150913, "learning_rate": 8.529529049780397e-06, "loss": 0.3618, "step": 11048 }, { "epoch": 0.32553069251506267, "grad_norm": 1.6322263688242489, "learning_rate": 8.529164792255417e-06, "loss": 0.5985, "step": 11049 }, { "epoch": 0.32556015497252627, "grad_norm": 1.4328509044351634, "learning_rate": 8.52880049740001e-06, "loss": 0.4927, "step": 11050 }, { "epoch": 0.3255896174299898, "grad_norm": 1.2830325384013406, "learning_rate": 8.528436165218026e-06, "loss": 0.4443, "step": 11051 }, { "epoch": 0.3256190798874534, "grad_norm": 1.5133223960895361, "learning_rate": 8.52807179571332e-06, "loss": 0.4979, "step": 11052 }, { "epoch": 0.32564854234491697, "grad_norm": 1.605541873683559, "learning_rate": 8.527707388889745e-06, "loss": 0.6535, "step": 11053 }, { "epoch": 0.32567800480238057, "grad_norm": 1.6963364706474109, "learning_rate": 8.527342944751158e-06, "loss": 0.6461, "step": 11054 }, { "epoch": 0.3257074672598441, "grad_norm": 1.4707767364783164, "learning_rate": 8.526978463301411e-06, "loss": 0.4162, "step": 11055 }, { "epoch": 0.3257369297173077, "grad_norm": 1.5086239723501924, "learning_rate": 8.52661394454436e-06, "loss": 0.5634, "step": 11056 }, { "epoch": 0.3257663921747713, "grad_norm": 1.5655306401361688, "learning_rate": 8.526249388483862e-06, "loss": 0.5172, "step": 11057 }, { "epoch": 0.32579585463223487, "grad_norm": 1.5390304885544752, "learning_rate": 8.525884795123775e-06, "loss": 0.561, "step": 11058 }, { "epoch": 0.3258253170896985, "grad_norm": 1.447695881728948, "learning_rate": 8.525520164467952e-06, "loss": 0.5644, "step": 11059 }, { "epoch": 0.325854779547162, "grad_norm": 1.6326430759516066, "learning_rate": 8.52515549652025e-06, "loss": 0.5591, "step": 11060 }, { "epoch": 0.3258842420046256, "grad_norm": 1.5563635078126126, "learning_rate": 8.524790791284526e-06, "loss": 0.4804, "step": 11061 }, { "epoch": 0.32591370446208917, "grad_norm": 1.4823430114105094, "learning_rate": 8.524426048764641e-06, "loss": 0.4084, "step": 11062 }, { "epoch": 0.3259431669195528, "grad_norm": 1.538180060014627, "learning_rate": 8.524061268964452e-06, "loss": 0.4787, "step": 11063 }, { "epoch": 0.3259726293770163, "grad_norm": 1.6506048864560872, "learning_rate": 8.523696451887817e-06, "loss": 0.5317, "step": 11064 }, { "epoch": 0.3260020918344799, "grad_norm": 1.5523361001441496, "learning_rate": 8.523331597538593e-06, "loss": 0.3388, "step": 11065 }, { "epoch": 0.32603155429194347, "grad_norm": 1.6064148342641682, "learning_rate": 8.522966705920643e-06, "loss": 0.4685, "step": 11066 }, { "epoch": 0.3260610167494071, "grad_norm": 1.4989120374947706, "learning_rate": 8.522601777037824e-06, "loss": 0.5339, "step": 11067 }, { "epoch": 0.3260904792068706, "grad_norm": 1.5609582399426143, "learning_rate": 8.522236810893997e-06, "loss": 0.4621, "step": 11068 }, { "epoch": 0.3261199416643342, "grad_norm": 1.4641101896814315, "learning_rate": 8.521871807493024e-06, "loss": 0.377, "step": 11069 }, { "epoch": 0.3261494041217978, "grad_norm": 1.8170209572621883, "learning_rate": 8.52150676683876e-06, "loss": 0.5009, "step": 11070 }, { "epoch": 0.3261788665792614, "grad_norm": 1.6093083584885757, "learning_rate": 8.521141688935075e-06, "loss": 0.4885, "step": 11071 }, { "epoch": 0.326208329036725, "grad_norm": 1.5680470246006535, "learning_rate": 8.520776573785825e-06, "loss": 0.4701, "step": 11072 }, { "epoch": 0.3262377914941885, "grad_norm": 1.5537433427505984, "learning_rate": 8.520411421394872e-06, "loss": 0.5065, "step": 11073 }, { "epoch": 0.3262672539516521, "grad_norm": 1.4504751797406898, "learning_rate": 8.520046231766082e-06, "loss": 0.461, "step": 11074 }, { "epoch": 0.3262967164091157, "grad_norm": 1.4332176810289055, "learning_rate": 8.519681004903315e-06, "loss": 0.3489, "step": 11075 }, { "epoch": 0.3263261788665793, "grad_norm": 1.5806150466063011, "learning_rate": 8.519315740810434e-06, "loss": 0.6171, "step": 11076 }, { "epoch": 0.3263556413240428, "grad_norm": 1.607265924668933, "learning_rate": 8.518950439491304e-06, "loss": 0.5005, "step": 11077 }, { "epoch": 0.3263851037815064, "grad_norm": 1.6356955306189906, "learning_rate": 8.51858510094979e-06, "loss": 0.5219, "step": 11078 }, { "epoch": 0.32641456623896997, "grad_norm": 1.3669634800461499, "learning_rate": 8.518219725189754e-06, "loss": 0.2767, "step": 11079 }, { "epoch": 0.3264440286964336, "grad_norm": 1.7335068426868636, "learning_rate": 8.517854312215061e-06, "loss": 0.661, "step": 11080 }, { "epoch": 0.3264734911538971, "grad_norm": 1.507510477278503, "learning_rate": 8.517488862029579e-06, "loss": 0.5702, "step": 11081 }, { "epoch": 0.3265029536113607, "grad_norm": 1.6844142368397275, "learning_rate": 8.51712337463717e-06, "loss": 0.4875, "step": 11082 }, { "epoch": 0.3265324160688243, "grad_norm": 1.5349360506711756, "learning_rate": 8.516757850041703e-06, "loss": 0.4381, "step": 11083 }, { "epoch": 0.3265618785262879, "grad_norm": 1.6006085409252575, "learning_rate": 8.516392288247045e-06, "loss": 0.2643, "step": 11084 }, { "epoch": 0.3265913409837515, "grad_norm": 1.6858593845961771, "learning_rate": 8.51602668925706e-06, "loss": 0.4942, "step": 11085 }, { "epoch": 0.326620803441215, "grad_norm": 1.5207973760288407, "learning_rate": 8.515661053075615e-06, "loss": 0.4567, "step": 11086 }, { "epoch": 0.3266502658986786, "grad_norm": 1.487395150568522, "learning_rate": 8.515295379706579e-06, "loss": 0.4497, "step": 11087 }, { "epoch": 0.3266797283561422, "grad_norm": 1.6846281673747594, "learning_rate": 8.514929669153819e-06, "loss": 0.5761, "step": 11088 }, { "epoch": 0.3267091908136058, "grad_norm": 1.5111284379473529, "learning_rate": 8.514563921421205e-06, "loss": 0.5045, "step": 11089 }, { "epoch": 0.3267386532710693, "grad_norm": 1.5200745019771231, "learning_rate": 8.514198136512605e-06, "loss": 0.5386, "step": 11090 }, { "epoch": 0.3267681157285329, "grad_norm": 1.4581909814745975, "learning_rate": 8.513832314431888e-06, "loss": 0.4384, "step": 11091 }, { "epoch": 0.3267975781859965, "grad_norm": 1.5280265365527785, "learning_rate": 8.513466455182927e-06, "loss": 0.5813, "step": 11092 }, { "epoch": 0.3268270406434601, "grad_norm": 1.3276108131139985, "learning_rate": 8.513100558769583e-06, "loss": 0.2716, "step": 11093 }, { "epoch": 0.3268565031009236, "grad_norm": 1.4238443570474442, "learning_rate": 8.512734625195735e-06, "loss": 0.5307, "step": 11094 }, { "epoch": 0.3268859655583872, "grad_norm": 1.392717448988836, "learning_rate": 8.51236865446525e-06, "loss": 0.3243, "step": 11095 }, { "epoch": 0.32691542801585083, "grad_norm": 1.811953194204716, "learning_rate": 8.512002646582e-06, "loss": 0.5167, "step": 11096 }, { "epoch": 0.3269448904733144, "grad_norm": 1.5206243660531351, "learning_rate": 8.511636601549857e-06, "loss": 0.4395, "step": 11097 }, { "epoch": 0.326974352930778, "grad_norm": 1.2986121541484201, "learning_rate": 8.511270519372692e-06, "loss": 0.37, "step": 11098 }, { "epoch": 0.3270038153882415, "grad_norm": 1.5240874976642609, "learning_rate": 8.510904400054377e-06, "loss": 0.4857, "step": 11099 }, { "epoch": 0.32703327784570513, "grad_norm": 1.4591820784681582, "learning_rate": 8.510538243598785e-06, "loss": 0.5492, "step": 11100 }, { "epoch": 0.3270627403031687, "grad_norm": 1.460385155967449, "learning_rate": 8.51017205000979e-06, "loss": 0.3823, "step": 11101 }, { "epoch": 0.3270922027606323, "grad_norm": 1.572412814338491, "learning_rate": 8.509805819291264e-06, "loss": 0.4475, "step": 11102 }, { "epoch": 0.3271216652180958, "grad_norm": 1.6001098610184472, "learning_rate": 8.509439551447082e-06, "loss": 0.4854, "step": 11103 }, { "epoch": 0.3271511276755594, "grad_norm": 1.6235811570729421, "learning_rate": 8.509073246481118e-06, "loss": 0.643, "step": 11104 }, { "epoch": 0.327180590133023, "grad_norm": 1.353880867813741, "learning_rate": 8.508706904397246e-06, "loss": 0.3942, "step": 11105 }, { "epoch": 0.3272100525904866, "grad_norm": 1.4725630200898336, "learning_rate": 8.508340525199344e-06, "loss": 0.4057, "step": 11106 }, { "epoch": 0.3272395150479501, "grad_norm": 1.6126688982240966, "learning_rate": 8.507974108891282e-06, "loss": 0.4544, "step": 11107 }, { "epoch": 0.3272689775054137, "grad_norm": 1.7042048596267823, "learning_rate": 8.507607655476941e-06, "loss": 0.5317, "step": 11108 }, { "epoch": 0.32729843996287733, "grad_norm": 1.5345114325663893, "learning_rate": 8.507241164960194e-06, "loss": 0.4968, "step": 11109 }, { "epoch": 0.3273279024203409, "grad_norm": 1.5114804748572155, "learning_rate": 8.50687463734492e-06, "loss": 0.479, "step": 11110 }, { "epoch": 0.3273573648778045, "grad_norm": 1.4298556253903618, "learning_rate": 8.506508072634995e-06, "loss": 0.4004, "step": 11111 }, { "epoch": 0.327386827335268, "grad_norm": 1.3657905329547093, "learning_rate": 8.506141470834294e-06, "loss": 0.4191, "step": 11112 }, { "epoch": 0.32741628979273163, "grad_norm": 1.6271523829402208, "learning_rate": 8.5057748319467e-06, "loss": 0.3448, "step": 11113 }, { "epoch": 0.3274457522501952, "grad_norm": 1.6208467936331483, "learning_rate": 8.505408155976088e-06, "loss": 0.5205, "step": 11114 }, { "epoch": 0.3274752147076588, "grad_norm": 1.6200716772824981, "learning_rate": 8.505041442926333e-06, "loss": 0.4831, "step": 11115 }, { "epoch": 0.3275046771651223, "grad_norm": 1.590142373155395, "learning_rate": 8.504674692801323e-06, "loss": 0.4145, "step": 11116 }, { "epoch": 0.32753413962258593, "grad_norm": 1.7228622615592333, "learning_rate": 8.50430790560493e-06, "loss": 0.5808, "step": 11117 }, { "epoch": 0.3275636020800495, "grad_norm": 1.6258664137954206, "learning_rate": 8.503941081341034e-06, "loss": 0.4547, "step": 11118 }, { "epoch": 0.3275930645375131, "grad_norm": 1.5837102799576326, "learning_rate": 8.50357422001352e-06, "loss": 0.5844, "step": 11119 }, { "epoch": 0.3276225269949766, "grad_norm": 1.7808795387789953, "learning_rate": 8.503207321626263e-06, "loss": 0.4046, "step": 11120 }, { "epoch": 0.32765198945244023, "grad_norm": 1.5446523189978247, "learning_rate": 8.50284038618315e-06, "loss": 0.565, "step": 11121 }, { "epoch": 0.32768145190990383, "grad_norm": 1.4329742391424558, "learning_rate": 8.502473413688055e-06, "loss": 0.4537, "step": 11122 }, { "epoch": 0.3277109143673674, "grad_norm": 1.3728349922914314, "learning_rate": 8.502106404144865e-06, "loss": 0.3209, "step": 11123 }, { "epoch": 0.327740376824831, "grad_norm": 1.5054697935278314, "learning_rate": 8.501739357557463e-06, "loss": 0.4971, "step": 11124 }, { "epoch": 0.32776983928229453, "grad_norm": 1.391722547543359, "learning_rate": 8.501372273929726e-06, "loss": 0.3775, "step": 11125 }, { "epoch": 0.32779930173975813, "grad_norm": 1.5273162721655698, "learning_rate": 8.501005153265542e-06, "loss": 0.4736, "step": 11126 }, { "epoch": 0.3278287641972217, "grad_norm": 1.4494795346599052, "learning_rate": 8.500637995568793e-06, "loss": 0.4236, "step": 11127 }, { "epoch": 0.3278582266546853, "grad_norm": 1.550143499633121, "learning_rate": 8.500270800843362e-06, "loss": 0.4574, "step": 11128 }, { "epoch": 0.32788768911214883, "grad_norm": 1.577938595173664, "learning_rate": 8.499903569093132e-06, "loss": 0.5018, "step": 11129 }, { "epoch": 0.32791715156961243, "grad_norm": 1.6913762337442586, "learning_rate": 8.499536300321988e-06, "loss": 0.5072, "step": 11130 }, { "epoch": 0.327946614027076, "grad_norm": 1.5638756501533038, "learning_rate": 8.499168994533817e-06, "loss": 0.422, "step": 11131 }, { "epoch": 0.3279760764845396, "grad_norm": 1.6572069266300733, "learning_rate": 8.498801651732503e-06, "loss": 0.5847, "step": 11132 }, { "epoch": 0.3280055389420031, "grad_norm": 1.5792014028690706, "learning_rate": 8.498434271921931e-06, "loss": 0.5036, "step": 11133 }, { "epoch": 0.32803500139946673, "grad_norm": 1.7209797844732175, "learning_rate": 8.498066855105989e-06, "loss": 0.5211, "step": 11134 }, { "epoch": 0.32806446385693033, "grad_norm": 1.5541239244333096, "learning_rate": 8.49769940128856e-06, "loss": 0.5168, "step": 11135 }, { "epoch": 0.3280939263143939, "grad_norm": 1.5860511353997193, "learning_rate": 8.497331910473532e-06, "loss": 0.458, "step": 11136 }, { "epoch": 0.3281233887718575, "grad_norm": 1.4365268979850805, "learning_rate": 8.496964382664795e-06, "loss": 0.4529, "step": 11137 }, { "epoch": 0.32815285122932103, "grad_norm": 1.9752555288656446, "learning_rate": 8.496596817866232e-06, "loss": 0.6715, "step": 11138 }, { "epoch": 0.32818231368678463, "grad_norm": 1.4255800678209425, "learning_rate": 8.496229216081735e-06, "loss": 0.5526, "step": 11139 }, { "epoch": 0.3282117761442482, "grad_norm": 1.5842250494343386, "learning_rate": 8.495861577315191e-06, "loss": 0.4345, "step": 11140 }, { "epoch": 0.3282412386017118, "grad_norm": 1.8086793206839222, "learning_rate": 8.495493901570488e-06, "loss": 0.5035, "step": 11141 }, { "epoch": 0.32827070105917533, "grad_norm": 1.407771561025301, "learning_rate": 8.495126188851517e-06, "loss": 0.4454, "step": 11142 }, { "epoch": 0.32830016351663893, "grad_norm": 1.4770758434811597, "learning_rate": 8.494758439162165e-06, "loss": 0.4691, "step": 11143 }, { "epoch": 0.3283296259741025, "grad_norm": 1.4721245308459945, "learning_rate": 8.494390652506324e-06, "loss": 0.3654, "step": 11144 }, { "epoch": 0.3283590884315661, "grad_norm": 1.8127154561132635, "learning_rate": 8.494022828887884e-06, "loss": 0.6506, "step": 11145 }, { "epoch": 0.32838855088902963, "grad_norm": 1.550219296117354, "learning_rate": 8.493654968310734e-06, "loss": 0.573, "step": 11146 }, { "epoch": 0.32841801334649323, "grad_norm": 1.4010692715216193, "learning_rate": 8.493287070778768e-06, "loss": 0.3528, "step": 11147 }, { "epoch": 0.32844747580395683, "grad_norm": 1.3308238690118135, "learning_rate": 8.492919136295873e-06, "loss": 0.4604, "step": 11148 }, { "epoch": 0.3284769382614204, "grad_norm": 1.4313320700440806, "learning_rate": 8.492551164865947e-06, "loss": 0.4946, "step": 11149 }, { "epoch": 0.328506400718884, "grad_norm": 1.4062540030803472, "learning_rate": 8.49218315649288e-06, "loss": 0.4009, "step": 11150 }, { "epoch": 0.32853586317634753, "grad_norm": 1.699116333837233, "learning_rate": 8.491815111180562e-06, "loss": 0.6073, "step": 11151 }, { "epoch": 0.32856532563381113, "grad_norm": 1.4400045804443766, "learning_rate": 8.491447028932887e-06, "loss": 0.5422, "step": 11152 }, { "epoch": 0.3285947880912747, "grad_norm": 1.5875052851018463, "learning_rate": 8.49107890975375e-06, "loss": 0.452, "step": 11153 }, { "epoch": 0.3286242505487383, "grad_norm": 1.6057741307743376, "learning_rate": 8.490710753647045e-06, "loss": 0.4512, "step": 11154 }, { "epoch": 0.32865371300620183, "grad_norm": 1.4231611880466584, "learning_rate": 8.490342560616666e-06, "loss": 0.48, "step": 11155 }, { "epoch": 0.32868317546366543, "grad_norm": 1.518018160592611, "learning_rate": 8.489974330666506e-06, "loss": 0.447, "step": 11156 }, { "epoch": 0.328712637921129, "grad_norm": 1.4853048168673655, "learning_rate": 8.489606063800462e-06, "loss": 0.3122, "step": 11157 }, { "epoch": 0.3287421003785926, "grad_norm": 1.5112169976829628, "learning_rate": 8.489237760022426e-06, "loss": 0.5136, "step": 11158 }, { "epoch": 0.32877156283605613, "grad_norm": 1.5280031281839577, "learning_rate": 8.4888694193363e-06, "loss": 0.5185, "step": 11159 }, { "epoch": 0.32880102529351973, "grad_norm": 1.5544595941197086, "learning_rate": 8.488501041745974e-06, "loss": 0.6456, "step": 11160 }, { "epoch": 0.32883048775098334, "grad_norm": 1.6897827941454209, "learning_rate": 8.488132627255348e-06, "loss": 0.5829, "step": 11161 }, { "epoch": 0.3288599502084469, "grad_norm": 1.3389835124583511, "learning_rate": 8.487764175868318e-06, "loss": 0.4173, "step": 11162 }, { "epoch": 0.3288894126659105, "grad_norm": 1.6284718789048003, "learning_rate": 8.487395687588782e-06, "loss": 0.5353, "step": 11163 }, { "epoch": 0.32891887512337403, "grad_norm": 1.5286345714645948, "learning_rate": 8.487027162420637e-06, "loss": 0.3896, "step": 11164 }, { "epoch": 0.32894833758083764, "grad_norm": 1.5738089437281164, "learning_rate": 8.486658600367781e-06, "loss": 0.4727, "step": 11165 }, { "epoch": 0.3289778000383012, "grad_norm": 1.5615528022987228, "learning_rate": 8.486290001434113e-06, "loss": 0.5501, "step": 11166 }, { "epoch": 0.3290072624957648, "grad_norm": 1.4699607058393347, "learning_rate": 8.485921365623532e-06, "loss": 0.4762, "step": 11167 }, { "epoch": 0.32903672495322833, "grad_norm": 1.6952855911853917, "learning_rate": 8.485552692939937e-06, "loss": 0.4705, "step": 11168 }, { "epoch": 0.32906618741069193, "grad_norm": 1.395048066975726, "learning_rate": 8.485183983387228e-06, "loss": 0.4037, "step": 11169 }, { "epoch": 0.3290956498681555, "grad_norm": 1.4908730902972955, "learning_rate": 8.484815236969305e-06, "loss": 0.4773, "step": 11170 }, { "epoch": 0.3291251123256191, "grad_norm": 1.6200879988734667, "learning_rate": 8.484446453690068e-06, "loss": 0.5544, "step": 11171 }, { "epoch": 0.32915457478308263, "grad_norm": 1.4178378175255846, "learning_rate": 8.484077633553419e-06, "loss": 0.3921, "step": 11172 }, { "epoch": 0.32918403724054623, "grad_norm": 1.5021879235066897, "learning_rate": 8.483708776563257e-06, "loss": 0.3525, "step": 11173 }, { "epoch": 0.32921349969800984, "grad_norm": 1.3907169068125511, "learning_rate": 8.483339882723487e-06, "loss": 0.5086, "step": 11174 }, { "epoch": 0.3292429621554734, "grad_norm": 1.4958367548648208, "learning_rate": 8.48297095203801e-06, "loss": 0.4751, "step": 11175 }, { "epoch": 0.329272424612937, "grad_norm": 1.607476543862796, "learning_rate": 8.482601984510725e-06, "loss": 0.5172, "step": 11176 }, { "epoch": 0.32930188707040053, "grad_norm": 1.638010826183887, "learning_rate": 8.482232980145541e-06, "loss": 0.6561, "step": 11177 }, { "epoch": 0.32933134952786414, "grad_norm": 1.4425383579789817, "learning_rate": 8.481863938946356e-06, "loss": 0.3864, "step": 11178 }, { "epoch": 0.3293608119853277, "grad_norm": 1.4681542211882939, "learning_rate": 8.481494860917075e-06, "loss": 0.3436, "step": 11179 }, { "epoch": 0.3293902744427913, "grad_norm": 1.658750146982556, "learning_rate": 8.481125746061603e-06, "loss": 0.5094, "step": 11180 }, { "epoch": 0.32941973690025483, "grad_norm": 1.701395647069039, "learning_rate": 8.480756594383844e-06, "loss": 0.4164, "step": 11181 }, { "epoch": 0.32944919935771844, "grad_norm": 1.3257548808250201, "learning_rate": 8.4803874058877e-06, "loss": 0.4761, "step": 11182 }, { "epoch": 0.329478661815182, "grad_norm": 1.5484218204427882, "learning_rate": 8.480018180577081e-06, "loss": 0.5312, "step": 11183 }, { "epoch": 0.3295081242726456, "grad_norm": 1.6295368840651125, "learning_rate": 8.479648918455892e-06, "loss": 0.5376, "step": 11184 }, { "epoch": 0.32953758673010913, "grad_norm": 1.6725373000784136, "learning_rate": 8.479279619528034e-06, "loss": 0.4313, "step": 11185 }, { "epoch": 0.32956704918757274, "grad_norm": 1.5056529862960593, "learning_rate": 8.478910283797418e-06, "loss": 0.4815, "step": 11186 }, { "epoch": 0.32959651164503634, "grad_norm": 1.80470778478867, "learning_rate": 8.47854091126795e-06, "loss": 0.6398, "step": 11187 }, { "epoch": 0.3296259741024999, "grad_norm": 1.4642452986895083, "learning_rate": 8.478171501943534e-06, "loss": 0.4617, "step": 11188 }, { "epoch": 0.3296554365599635, "grad_norm": 1.4798341700845745, "learning_rate": 8.477802055828081e-06, "loss": 0.45, "step": 11189 }, { "epoch": 0.32968489901742704, "grad_norm": 1.5130040234322297, "learning_rate": 8.4774325729255e-06, "loss": 0.3653, "step": 11190 }, { "epoch": 0.32971436147489064, "grad_norm": 1.3704255588893943, "learning_rate": 8.477063053239693e-06, "loss": 0.3825, "step": 11191 }, { "epoch": 0.3297438239323542, "grad_norm": 1.785513274574184, "learning_rate": 8.476693496774575e-06, "loss": 0.6166, "step": 11192 }, { "epoch": 0.3297732863898178, "grad_norm": 1.4946930928898745, "learning_rate": 8.476323903534051e-06, "loss": 0.5729, "step": 11193 }, { "epoch": 0.32980274884728134, "grad_norm": 1.5022737624141174, "learning_rate": 8.475954273522035e-06, "loss": 0.5468, "step": 11194 }, { "epoch": 0.32983221130474494, "grad_norm": 1.6584861291906974, "learning_rate": 8.475584606742433e-06, "loss": 0.493, "step": 11195 }, { "epoch": 0.3298616737622085, "grad_norm": 1.387312038194336, "learning_rate": 8.475214903199156e-06, "loss": 0.4053, "step": 11196 }, { "epoch": 0.3298911362196721, "grad_norm": 1.6012580686309144, "learning_rate": 8.474845162896115e-06, "loss": 0.4548, "step": 11197 }, { "epoch": 0.32992059867713563, "grad_norm": 1.3873605996655503, "learning_rate": 8.474475385837219e-06, "loss": 0.3876, "step": 11198 }, { "epoch": 0.32995006113459924, "grad_norm": 1.5482117214967548, "learning_rate": 8.474105572026384e-06, "loss": 0.4626, "step": 11199 }, { "epoch": 0.32997952359206284, "grad_norm": 1.5677109619145653, "learning_rate": 8.473735721467518e-06, "loss": 0.5087, "step": 11200 }, { "epoch": 0.3300089860495264, "grad_norm": 1.363091555810985, "learning_rate": 8.473365834164533e-06, "loss": 0.3572, "step": 11201 }, { "epoch": 0.33003844850699, "grad_norm": 1.5279398568836835, "learning_rate": 8.472995910121345e-06, "loss": 0.5056, "step": 11202 }, { "epoch": 0.33006791096445354, "grad_norm": 1.4616296094806425, "learning_rate": 8.472625949341863e-06, "loss": 0.4616, "step": 11203 }, { "epoch": 0.33009737342191714, "grad_norm": 1.7023364080131158, "learning_rate": 8.472255951830004e-06, "loss": 0.5738, "step": 11204 }, { "epoch": 0.3301268358793807, "grad_norm": 1.8321094121390398, "learning_rate": 8.471885917589677e-06, "loss": 0.5298, "step": 11205 }, { "epoch": 0.3301562983368443, "grad_norm": 1.5965761974969441, "learning_rate": 8.4715158466248e-06, "loss": 0.5013, "step": 11206 }, { "epoch": 0.33018576079430784, "grad_norm": 1.4278745559740247, "learning_rate": 8.471145738939286e-06, "loss": 0.452, "step": 11207 }, { "epoch": 0.33021522325177144, "grad_norm": 1.5787004672504148, "learning_rate": 8.47077559453705e-06, "loss": 0.5799, "step": 11208 }, { "epoch": 0.330244685709235, "grad_norm": 1.5687031410134675, "learning_rate": 8.470405413422009e-06, "loss": 0.3854, "step": 11209 }, { "epoch": 0.3302741481666986, "grad_norm": 1.5346375601263094, "learning_rate": 8.470035195598077e-06, "loss": 0.4725, "step": 11210 }, { "epoch": 0.33030361062416214, "grad_norm": 1.4478120438980497, "learning_rate": 8.46966494106917e-06, "loss": 0.5062, "step": 11211 }, { "epoch": 0.33033307308162574, "grad_norm": 1.539885702373729, "learning_rate": 8.469294649839204e-06, "loss": 0.4022, "step": 11212 }, { "epoch": 0.33036253553908934, "grad_norm": 1.46913962441037, "learning_rate": 8.468924321912097e-06, "loss": 0.4792, "step": 11213 }, { "epoch": 0.3303919979965529, "grad_norm": 1.5927691108653388, "learning_rate": 8.468553957291765e-06, "loss": 0.4577, "step": 11214 }, { "epoch": 0.3304214604540165, "grad_norm": 1.4684412523304504, "learning_rate": 8.468183555982128e-06, "loss": 0.4179, "step": 11215 }, { "epoch": 0.33045092291148004, "grad_norm": 1.7365478633259062, "learning_rate": 8.4678131179871e-06, "loss": 0.4833, "step": 11216 }, { "epoch": 0.33048038536894364, "grad_norm": 1.422982010299674, "learning_rate": 8.467442643310605e-06, "loss": 0.4461, "step": 11217 }, { "epoch": 0.3305098478264072, "grad_norm": 1.4619314680441136, "learning_rate": 8.467072131956555e-06, "loss": 0.4506, "step": 11218 }, { "epoch": 0.3305393102838708, "grad_norm": 1.7260930748530376, "learning_rate": 8.466701583928873e-06, "loss": 0.4846, "step": 11219 }, { "epoch": 0.33056877274133434, "grad_norm": 1.5949414438538991, "learning_rate": 8.46633099923148e-06, "loss": 0.4645, "step": 11220 }, { "epoch": 0.33059823519879794, "grad_norm": 1.450882518155283, "learning_rate": 8.465960377868291e-06, "loss": 0.4552, "step": 11221 }, { "epoch": 0.3306276976562615, "grad_norm": 1.3848774605522736, "learning_rate": 8.465589719843233e-06, "loss": 0.3398, "step": 11222 }, { "epoch": 0.3306571601137251, "grad_norm": 1.499285109610717, "learning_rate": 8.465219025160221e-06, "loss": 0.4827, "step": 11223 }, { "epoch": 0.33068662257118864, "grad_norm": 1.467823887393273, "learning_rate": 8.46484829382318e-06, "loss": 0.47, "step": 11224 }, { "epoch": 0.33071608502865224, "grad_norm": 1.6167410668925217, "learning_rate": 8.464477525836028e-06, "loss": 0.4741, "step": 11225 }, { "epoch": 0.33074554748611584, "grad_norm": 1.428293453496625, "learning_rate": 8.46410672120269e-06, "loss": 0.4079, "step": 11226 }, { "epoch": 0.3307750099435794, "grad_norm": 1.8223564733675652, "learning_rate": 8.463735879927085e-06, "loss": 0.4482, "step": 11227 }, { "epoch": 0.330804472401043, "grad_norm": 1.7938375893070329, "learning_rate": 8.463365002013138e-06, "loss": 0.6524, "step": 11228 }, { "epoch": 0.33083393485850654, "grad_norm": 1.9036600390646994, "learning_rate": 8.462994087464773e-06, "loss": 0.6013, "step": 11229 }, { "epoch": 0.33086339731597014, "grad_norm": 1.6284845022324712, "learning_rate": 8.46262313628591e-06, "loss": 0.3891, "step": 11230 }, { "epoch": 0.3308928597734337, "grad_norm": 1.67574838909616, "learning_rate": 8.462252148480477e-06, "loss": 0.5756, "step": 11231 }, { "epoch": 0.3309223222308973, "grad_norm": 1.6377614315546691, "learning_rate": 8.461881124052394e-06, "loss": 0.5255, "step": 11232 }, { "epoch": 0.33095178468836084, "grad_norm": 1.3600616077584595, "learning_rate": 8.461510063005586e-06, "loss": 0.4657, "step": 11233 }, { "epoch": 0.33098124714582444, "grad_norm": 1.5951023421658965, "learning_rate": 8.461138965343985e-06, "loss": 0.547, "step": 11234 }, { "epoch": 0.331010709603288, "grad_norm": 1.324732358073384, "learning_rate": 8.460767831071506e-06, "loss": 0.3704, "step": 11235 }, { "epoch": 0.3310401720607516, "grad_norm": 1.6761304713644685, "learning_rate": 8.46039666019208e-06, "loss": 0.3752, "step": 11236 }, { "epoch": 0.33106963451821514, "grad_norm": 1.7066810843020266, "learning_rate": 8.460025452709636e-06, "loss": 0.5338, "step": 11237 }, { "epoch": 0.33109909697567874, "grad_norm": 1.5666857176678797, "learning_rate": 8.459654208628095e-06, "loss": 0.5894, "step": 11238 }, { "epoch": 0.33112855943314234, "grad_norm": 1.796279009971197, "learning_rate": 8.459282927951385e-06, "loss": 0.5219, "step": 11239 }, { "epoch": 0.3311580218906059, "grad_norm": 1.615419410638444, "learning_rate": 8.458911610683436e-06, "loss": 0.4897, "step": 11240 }, { "epoch": 0.3311874843480695, "grad_norm": 1.7053936009362962, "learning_rate": 8.458540256828172e-06, "loss": 0.4726, "step": 11241 }, { "epoch": 0.33121694680553304, "grad_norm": 1.5146940741418793, "learning_rate": 8.458168866389524e-06, "loss": 0.4945, "step": 11242 }, { "epoch": 0.33124640926299664, "grad_norm": 1.4213806023763955, "learning_rate": 8.457797439371422e-06, "loss": 0.5034, "step": 11243 }, { "epoch": 0.3312758717204602, "grad_norm": 1.53007250559782, "learning_rate": 8.457425975777791e-06, "loss": 0.5146, "step": 11244 }, { "epoch": 0.3313053341779238, "grad_norm": 1.560185950377029, "learning_rate": 8.457054475612562e-06, "loss": 0.4932, "step": 11245 }, { "epoch": 0.33133479663538734, "grad_norm": 1.6631156556919706, "learning_rate": 8.456682938879663e-06, "loss": 0.3865, "step": 11246 }, { "epoch": 0.33136425909285094, "grad_norm": 1.4970331454740917, "learning_rate": 8.456311365583026e-06, "loss": 0.5397, "step": 11247 }, { "epoch": 0.3313937215503145, "grad_norm": 1.5064362401821592, "learning_rate": 8.455939755726582e-06, "loss": 0.4169, "step": 11248 }, { "epoch": 0.3314231840077781, "grad_norm": 1.6198969259848655, "learning_rate": 8.455568109314259e-06, "loss": 0.5593, "step": 11249 }, { "epoch": 0.33145264646524164, "grad_norm": 1.6194936997094473, "learning_rate": 8.45519642634999e-06, "loss": 0.5029, "step": 11250 }, { "epoch": 0.33148210892270524, "grad_norm": 1.7064579977162824, "learning_rate": 8.454824706837708e-06, "loss": 0.4324, "step": 11251 }, { "epoch": 0.33151157138016885, "grad_norm": 1.607218441901239, "learning_rate": 8.45445295078134e-06, "loss": 0.4885, "step": 11252 }, { "epoch": 0.3315410338376324, "grad_norm": 1.5178774071917949, "learning_rate": 8.454081158184823e-06, "loss": 0.6039, "step": 11253 }, { "epoch": 0.331570496295096, "grad_norm": 1.81473836044903, "learning_rate": 8.45370932905209e-06, "loss": 0.48, "step": 11254 }, { "epoch": 0.33159995875255954, "grad_norm": 1.560554285879054, "learning_rate": 8.453337463387069e-06, "loss": 0.5659, "step": 11255 }, { "epoch": 0.33162942121002315, "grad_norm": 1.4498770973236872, "learning_rate": 8.452965561193698e-06, "loss": 0.489, "step": 11256 }, { "epoch": 0.3316588836674867, "grad_norm": 1.7442276400555579, "learning_rate": 8.452593622475909e-06, "loss": 0.5258, "step": 11257 }, { "epoch": 0.3316883461249503, "grad_norm": 1.5663617852634177, "learning_rate": 8.452221647237639e-06, "loss": 0.5846, "step": 11258 }, { "epoch": 0.33171780858241384, "grad_norm": 1.5275556432933675, "learning_rate": 8.451849635482818e-06, "loss": 0.5591, "step": 11259 }, { "epoch": 0.33174727103987744, "grad_norm": 1.5649672438089735, "learning_rate": 8.451477587215387e-06, "loss": 0.4527, "step": 11260 }, { "epoch": 0.331776733497341, "grad_norm": 1.7204566443338953, "learning_rate": 8.451105502439275e-06, "loss": 0.5712, "step": 11261 }, { "epoch": 0.3318061959548046, "grad_norm": 1.4705963441029442, "learning_rate": 8.45073338115842e-06, "loss": 0.3544, "step": 11262 }, { "epoch": 0.33183565841226814, "grad_norm": 1.741851563561023, "learning_rate": 8.45036122337676e-06, "loss": 0.4998, "step": 11263 }, { "epoch": 0.33186512086973174, "grad_norm": 1.2858109752802254, "learning_rate": 8.449989029098232e-06, "loss": 0.4124, "step": 11264 }, { "epoch": 0.33189458332719535, "grad_norm": 1.7531848113332371, "learning_rate": 8.449616798326769e-06, "loss": 0.4444, "step": 11265 }, { "epoch": 0.3319240457846589, "grad_norm": 1.7481853742060904, "learning_rate": 8.449244531066314e-06, "loss": 0.6292, "step": 11266 }, { "epoch": 0.3319535082421225, "grad_norm": 1.5171369860097261, "learning_rate": 8.448872227320798e-06, "loss": 0.5677, "step": 11267 }, { "epoch": 0.33198297069958604, "grad_norm": 1.4632274553333107, "learning_rate": 8.448499887094166e-06, "loss": 0.4542, "step": 11268 }, { "epoch": 0.33201243315704965, "grad_norm": 1.4330986004642956, "learning_rate": 8.448127510390351e-06, "loss": 0.3574, "step": 11269 }, { "epoch": 0.3320418956145132, "grad_norm": 1.5388281947697813, "learning_rate": 8.447755097213295e-06, "loss": 0.4809, "step": 11270 }, { "epoch": 0.3320713580719768, "grad_norm": 1.6446254904116036, "learning_rate": 8.447382647566934e-06, "loss": 0.5373, "step": 11271 }, { "epoch": 0.33210082052944034, "grad_norm": 1.5167599370606903, "learning_rate": 8.447010161455213e-06, "loss": 0.4037, "step": 11272 }, { "epoch": 0.33213028298690395, "grad_norm": 1.455799712921406, "learning_rate": 8.446637638882069e-06, "loss": 0.4807, "step": 11273 }, { "epoch": 0.3321597454443675, "grad_norm": 1.4148460069961042, "learning_rate": 8.446265079851441e-06, "loss": 0.4006, "step": 11274 }, { "epoch": 0.3321892079018311, "grad_norm": 1.3664179424117424, "learning_rate": 8.445892484367271e-06, "loss": 0.4405, "step": 11275 }, { "epoch": 0.33221867035929464, "grad_norm": 1.6916228008094365, "learning_rate": 8.445519852433502e-06, "loss": 0.6615, "step": 11276 }, { "epoch": 0.33224813281675825, "grad_norm": 1.4541308412607354, "learning_rate": 8.445147184054073e-06, "loss": 0.5496, "step": 11277 }, { "epoch": 0.33227759527422185, "grad_norm": 1.4551955574654516, "learning_rate": 8.444774479232927e-06, "loss": 0.4172, "step": 11278 }, { "epoch": 0.3323070577316854, "grad_norm": 1.580151456140174, "learning_rate": 8.444401737974006e-06, "loss": 0.4769, "step": 11279 }, { "epoch": 0.332336520189149, "grad_norm": 1.5517726468141029, "learning_rate": 8.444028960281255e-06, "loss": 0.5055, "step": 11280 }, { "epoch": 0.33236598264661255, "grad_norm": 1.5627283498465179, "learning_rate": 8.443656146158613e-06, "loss": 0.5007, "step": 11281 }, { "epoch": 0.33239544510407615, "grad_norm": 1.3726534539831732, "learning_rate": 8.443283295610027e-06, "loss": 0.4162, "step": 11282 }, { "epoch": 0.3324249075615397, "grad_norm": 1.355667663930717, "learning_rate": 8.442910408639438e-06, "loss": 0.344, "step": 11283 }, { "epoch": 0.3324543700190033, "grad_norm": 1.6520162320461358, "learning_rate": 8.442537485250795e-06, "loss": 0.515, "step": 11284 }, { "epoch": 0.33248383247646685, "grad_norm": 1.5574658742245757, "learning_rate": 8.442164525448037e-06, "loss": 0.438, "step": 11285 }, { "epoch": 0.33251329493393045, "grad_norm": 1.5134787463892747, "learning_rate": 8.441791529235113e-06, "loss": 0.5476, "step": 11286 }, { "epoch": 0.332542757391394, "grad_norm": 1.556741700002142, "learning_rate": 8.441418496615967e-06, "loss": 0.4737, "step": 11287 }, { "epoch": 0.3325722198488576, "grad_norm": 1.4122236948672082, "learning_rate": 8.441045427594545e-06, "loss": 0.3326, "step": 11288 }, { "epoch": 0.33260168230632114, "grad_norm": 1.512933053574535, "learning_rate": 8.440672322174793e-06, "loss": 0.2969, "step": 11289 }, { "epoch": 0.33263114476378475, "grad_norm": 1.4917317558083707, "learning_rate": 8.440299180360657e-06, "loss": 0.5004, "step": 11290 }, { "epoch": 0.33266060722124835, "grad_norm": 1.5615709867897591, "learning_rate": 8.439926002156085e-06, "loss": 0.5261, "step": 11291 }, { "epoch": 0.3326900696787119, "grad_norm": 1.5025697723009626, "learning_rate": 8.439552787565024e-06, "loss": 0.4625, "step": 11292 }, { "epoch": 0.3327195321361755, "grad_norm": 1.4308977781513779, "learning_rate": 8.439179536591423e-06, "loss": 0.4528, "step": 11293 }, { "epoch": 0.33274899459363905, "grad_norm": 1.7647148248663616, "learning_rate": 8.438806249239229e-06, "loss": 0.434, "step": 11294 }, { "epoch": 0.33277845705110265, "grad_norm": 1.6943996167837063, "learning_rate": 8.438432925512389e-06, "loss": 0.4689, "step": 11295 }, { "epoch": 0.3328079195085662, "grad_norm": 1.5129083999117594, "learning_rate": 8.438059565414856e-06, "loss": 0.5997, "step": 11296 }, { "epoch": 0.3328373819660298, "grad_norm": 1.7383238411201078, "learning_rate": 8.437686168950575e-06, "loss": 0.4938, "step": 11297 }, { "epoch": 0.33286684442349335, "grad_norm": 1.408145974312666, "learning_rate": 8.437312736123496e-06, "loss": 0.4753, "step": 11298 }, { "epoch": 0.33289630688095695, "grad_norm": 1.5262787568052756, "learning_rate": 8.436939266937572e-06, "loss": 0.5079, "step": 11299 }, { "epoch": 0.3329257693384205, "grad_norm": 1.7479749422902258, "learning_rate": 8.436565761396752e-06, "loss": 0.6798, "step": 11300 }, { "epoch": 0.3329552317958841, "grad_norm": 1.493506932797783, "learning_rate": 8.436192219504985e-06, "loss": 0.4892, "step": 11301 }, { "epoch": 0.33298469425334765, "grad_norm": 1.4317025278175943, "learning_rate": 8.435818641266226e-06, "loss": 0.4137, "step": 11302 }, { "epoch": 0.33301415671081125, "grad_norm": 1.5453137994935924, "learning_rate": 8.435445026684423e-06, "loss": 0.4386, "step": 11303 }, { "epoch": 0.33304361916827485, "grad_norm": 1.4146900681466192, "learning_rate": 8.43507137576353e-06, "loss": 0.4576, "step": 11304 }, { "epoch": 0.3330730816257384, "grad_norm": 1.5453823750586937, "learning_rate": 8.4346976885075e-06, "loss": 0.4328, "step": 11305 }, { "epoch": 0.333102544083202, "grad_norm": 1.6111103728596408, "learning_rate": 8.434323964920283e-06, "loss": 0.4877, "step": 11306 }, { "epoch": 0.33313200654066555, "grad_norm": 1.552093226060185, "learning_rate": 8.433950205005835e-06, "loss": 0.5397, "step": 11307 }, { "epoch": 0.33316146899812915, "grad_norm": 1.6097398066569764, "learning_rate": 8.433576408768106e-06, "loss": 0.4883, "step": 11308 }, { "epoch": 0.3331909314555927, "grad_norm": 1.668308195994831, "learning_rate": 8.433202576211055e-06, "loss": 0.5347, "step": 11309 }, { "epoch": 0.3332203939130563, "grad_norm": 1.78658946779707, "learning_rate": 8.432828707338634e-06, "loss": 0.5392, "step": 11310 }, { "epoch": 0.33324985637051985, "grad_norm": 2.021132294325351, "learning_rate": 8.432454802154794e-06, "loss": 0.5129, "step": 11311 }, { "epoch": 0.33327931882798345, "grad_norm": 1.774330135041423, "learning_rate": 8.432080860663495e-06, "loss": 0.6218, "step": 11312 }, { "epoch": 0.333308781285447, "grad_norm": 1.5663211294278456, "learning_rate": 8.431706882868691e-06, "loss": 0.486, "step": 11313 }, { "epoch": 0.3333382437429106, "grad_norm": 1.5668547799266996, "learning_rate": 8.431332868774337e-06, "loss": 0.335, "step": 11314 }, { "epoch": 0.33336770620037415, "grad_norm": 1.6132575750071982, "learning_rate": 8.43095881838439e-06, "loss": 0.5594, "step": 11315 }, { "epoch": 0.33339716865783775, "grad_norm": 1.3291990242686251, "learning_rate": 8.430584731702804e-06, "loss": 0.3579, "step": 11316 }, { "epoch": 0.33342663111530135, "grad_norm": 1.5349661835224422, "learning_rate": 8.43021060873354e-06, "loss": 0.4819, "step": 11317 }, { "epoch": 0.3334560935727649, "grad_norm": 1.4100153486880902, "learning_rate": 8.429836449480553e-06, "loss": 0.424, "step": 11318 }, { "epoch": 0.3334855560302285, "grad_norm": 1.6597553870110486, "learning_rate": 8.429462253947802e-06, "loss": 0.5266, "step": 11319 }, { "epoch": 0.33351501848769205, "grad_norm": 1.4717769922395312, "learning_rate": 8.429088022139245e-06, "loss": 0.4351, "step": 11320 }, { "epoch": 0.33354448094515565, "grad_norm": 1.6384151503637852, "learning_rate": 8.428713754058838e-06, "loss": 0.5039, "step": 11321 }, { "epoch": 0.3335739434026192, "grad_norm": 1.4598920544395055, "learning_rate": 8.428339449710543e-06, "loss": 0.5117, "step": 11322 }, { "epoch": 0.3336034058600828, "grad_norm": 1.3951833193541483, "learning_rate": 8.427965109098317e-06, "loss": 0.4754, "step": 11323 }, { "epoch": 0.33363286831754635, "grad_norm": 1.575711844195147, "learning_rate": 8.427590732226123e-06, "loss": 0.4881, "step": 11324 }, { "epoch": 0.33366233077500995, "grad_norm": 1.4382913751366504, "learning_rate": 8.427216319097918e-06, "loss": 0.4463, "step": 11325 }, { "epoch": 0.3336917932324735, "grad_norm": 1.6365191462376918, "learning_rate": 8.426841869717663e-06, "loss": 0.5286, "step": 11326 }, { "epoch": 0.3337212556899371, "grad_norm": 1.527326925152294, "learning_rate": 8.426467384089319e-06, "loss": 0.4871, "step": 11327 }, { "epoch": 0.33375071814740065, "grad_norm": 1.6556915852960976, "learning_rate": 8.426092862216847e-06, "loss": 0.5224, "step": 11328 }, { "epoch": 0.33378018060486425, "grad_norm": 1.565755045846741, "learning_rate": 8.425718304104209e-06, "loss": 0.3352, "step": 11329 }, { "epoch": 0.33380964306232785, "grad_norm": 1.4979040285127645, "learning_rate": 8.425343709755368e-06, "loss": 0.3754, "step": 11330 }, { "epoch": 0.3338391055197914, "grad_norm": 1.3129152437099942, "learning_rate": 8.424969079174284e-06, "loss": 0.4032, "step": 11331 }, { "epoch": 0.333868567977255, "grad_norm": 1.3624246435510492, "learning_rate": 8.42459441236492e-06, "loss": 0.4528, "step": 11332 }, { "epoch": 0.33389803043471855, "grad_norm": 1.6954771864035842, "learning_rate": 8.42421970933124e-06, "loss": 0.4123, "step": 11333 }, { "epoch": 0.33392749289218215, "grad_norm": 1.6904531764224735, "learning_rate": 8.423844970077208e-06, "loss": 0.4679, "step": 11334 }, { "epoch": 0.3339569553496457, "grad_norm": 1.433154623397618, "learning_rate": 8.423470194606788e-06, "loss": 0.3022, "step": 11335 }, { "epoch": 0.3339864178071093, "grad_norm": 1.7030970570964103, "learning_rate": 8.423095382923943e-06, "loss": 0.4591, "step": 11336 }, { "epoch": 0.33401588026457285, "grad_norm": 1.4184076466869002, "learning_rate": 8.42272053503264e-06, "loss": 0.3375, "step": 11337 }, { "epoch": 0.33404534272203645, "grad_norm": 1.4736116005718356, "learning_rate": 8.422345650936841e-06, "loss": 0.5217, "step": 11338 }, { "epoch": 0.3340748051795, "grad_norm": 1.5354082119007553, "learning_rate": 8.421970730640511e-06, "loss": 0.4687, "step": 11339 }, { "epoch": 0.3341042676369636, "grad_norm": 1.5516234631878598, "learning_rate": 8.42159577414762e-06, "loss": 0.3928, "step": 11340 }, { "epoch": 0.33413373009442715, "grad_norm": 1.4585040885895795, "learning_rate": 8.42122078146213e-06, "loss": 0.3933, "step": 11341 }, { "epoch": 0.33416319255189075, "grad_norm": 1.5127437831824522, "learning_rate": 8.42084575258801e-06, "loss": 0.535, "step": 11342 }, { "epoch": 0.33419265500935436, "grad_norm": 1.5125680665706365, "learning_rate": 8.420470687529226e-06, "loss": 0.5477, "step": 11343 }, { "epoch": 0.3342221174668179, "grad_norm": 1.7789695354587136, "learning_rate": 8.420095586289745e-06, "loss": 0.6752, "step": 11344 }, { "epoch": 0.3342515799242815, "grad_norm": 1.3717362012525058, "learning_rate": 8.419720448873536e-06, "loss": 0.4343, "step": 11345 }, { "epoch": 0.33428104238174505, "grad_norm": 1.382558921342512, "learning_rate": 8.419345275284567e-06, "loss": 0.4006, "step": 11346 }, { "epoch": 0.33431050483920866, "grad_norm": 1.6561401222382974, "learning_rate": 8.418970065526804e-06, "loss": 0.5365, "step": 11347 }, { "epoch": 0.3343399672966722, "grad_norm": 1.696259476881436, "learning_rate": 8.41859481960422e-06, "loss": 0.5567, "step": 11348 }, { "epoch": 0.3343694297541358, "grad_norm": 1.5964968057037536, "learning_rate": 8.41821953752078e-06, "loss": 0.4111, "step": 11349 }, { "epoch": 0.33439889221159935, "grad_norm": 1.564460314531366, "learning_rate": 8.417844219280455e-06, "loss": 0.5497, "step": 11350 }, { "epoch": 0.33442835466906295, "grad_norm": 1.622909404371964, "learning_rate": 8.417468864887217e-06, "loss": 0.5567, "step": 11351 }, { "epoch": 0.3344578171265265, "grad_norm": 1.5161281725418463, "learning_rate": 8.417093474345034e-06, "loss": 0.4566, "step": 11352 }, { "epoch": 0.3344872795839901, "grad_norm": 1.4077315598040783, "learning_rate": 8.416718047657876e-06, "loss": 0.5061, "step": 11353 }, { "epoch": 0.33451674204145365, "grad_norm": 1.490261187431552, "learning_rate": 8.41634258482972e-06, "loss": 0.5402, "step": 11354 }, { "epoch": 0.33454620449891725, "grad_norm": 1.4512191856590209, "learning_rate": 8.41596708586453e-06, "loss": 0.4072, "step": 11355 }, { "epoch": 0.33457566695638086, "grad_norm": 1.5989927526300314, "learning_rate": 8.415591550766282e-06, "loss": 0.4083, "step": 11356 }, { "epoch": 0.3346051294138444, "grad_norm": 1.611229705360865, "learning_rate": 8.415215979538946e-06, "loss": 0.549, "step": 11357 }, { "epoch": 0.334634591871308, "grad_norm": 1.4111169032083937, "learning_rate": 8.4148403721865e-06, "loss": 0.42, "step": 11358 }, { "epoch": 0.33466405432877155, "grad_norm": 1.363344718687649, "learning_rate": 8.41446472871291e-06, "loss": 0.485, "step": 11359 }, { "epoch": 0.33469351678623516, "grad_norm": 1.3785956088312865, "learning_rate": 8.414089049122154e-06, "loss": 0.4059, "step": 11360 }, { "epoch": 0.3347229792436987, "grad_norm": 1.5319791389793185, "learning_rate": 8.413713333418203e-06, "loss": 0.4488, "step": 11361 }, { "epoch": 0.3347524417011623, "grad_norm": 1.5700741469807777, "learning_rate": 8.413337581605035e-06, "loss": 0.6308, "step": 11362 }, { "epoch": 0.33478190415862585, "grad_norm": 1.5612334831245367, "learning_rate": 8.412961793686621e-06, "loss": 0.443, "step": 11363 }, { "epoch": 0.33481136661608946, "grad_norm": 1.6236534206101707, "learning_rate": 8.412585969666938e-06, "loss": 0.5511, "step": 11364 }, { "epoch": 0.334840829073553, "grad_norm": 1.557136923642282, "learning_rate": 8.412210109549959e-06, "loss": 0.4392, "step": 11365 }, { "epoch": 0.3348702915310166, "grad_norm": 1.5204342294615314, "learning_rate": 8.411834213339662e-06, "loss": 0.434, "step": 11366 }, { "epoch": 0.33489975398848015, "grad_norm": 1.6500940573080376, "learning_rate": 8.411458281040024e-06, "loss": 0.5558, "step": 11367 }, { "epoch": 0.33492921644594376, "grad_norm": 1.3872208336876033, "learning_rate": 8.411082312655018e-06, "loss": 0.4355, "step": 11368 }, { "epoch": 0.33495867890340736, "grad_norm": 1.3269971396857119, "learning_rate": 8.410706308188625e-06, "loss": 0.4217, "step": 11369 }, { "epoch": 0.3349881413608709, "grad_norm": 1.4355803874525777, "learning_rate": 8.410330267644819e-06, "loss": 0.31, "step": 11370 }, { "epoch": 0.3350176038183345, "grad_norm": 1.7072051344496406, "learning_rate": 8.409954191027578e-06, "loss": 0.5448, "step": 11371 }, { "epoch": 0.33504706627579806, "grad_norm": 1.649222826865478, "learning_rate": 8.409578078340881e-06, "loss": 0.6365, "step": 11372 }, { "epoch": 0.33507652873326166, "grad_norm": 1.3742552498112934, "learning_rate": 8.409201929588707e-06, "loss": 0.5279, "step": 11373 }, { "epoch": 0.3351059911907252, "grad_norm": 1.68047533362229, "learning_rate": 8.408825744775034e-06, "loss": 0.5891, "step": 11374 }, { "epoch": 0.3351354536481888, "grad_norm": 1.722702530391135, "learning_rate": 8.408449523903842e-06, "loss": 0.4583, "step": 11375 }, { "epoch": 0.33516491610565236, "grad_norm": 1.6372782327660502, "learning_rate": 8.408073266979106e-06, "loss": 0.5298, "step": 11376 }, { "epoch": 0.33519437856311596, "grad_norm": 1.4925639168711478, "learning_rate": 8.407696974004814e-06, "loss": 0.4854, "step": 11377 }, { "epoch": 0.3352238410205795, "grad_norm": 1.8614282839733631, "learning_rate": 8.40732064498494e-06, "loss": 0.3772, "step": 11378 }, { "epoch": 0.3352533034780431, "grad_norm": 1.416685218137789, "learning_rate": 8.406944279923465e-06, "loss": 0.3218, "step": 11379 }, { "epoch": 0.33528276593550665, "grad_norm": 1.7540977615200497, "learning_rate": 8.406567878824374e-06, "loss": 0.6346, "step": 11380 }, { "epoch": 0.33531222839297026, "grad_norm": 1.777463110782789, "learning_rate": 8.406191441691645e-06, "loss": 0.6058, "step": 11381 }, { "epoch": 0.33534169085043386, "grad_norm": 1.5119112335192983, "learning_rate": 8.405814968529263e-06, "loss": 0.3884, "step": 11382 }, { "epoch": 0.3353711533078974, "grad_norm": 1.3491612568820983, "learning_rate": 8.405438459341207e-06, "loss": 0.4233, "step": 11383 }, { "epoch": 0.335400615765361, "grad_norm": 1.475863749017115, "learning_rate": 8.40506191413146e-06, "loss": 0.4753, "step": 11384 }, { "epoch": 0.33543007822282456, "grad_norm": 1.3800414218221497, "learning_rate": 8.404685332904006e-06, "loss": 0.3616, "step": 11385 }, { "epoch": 0.33545954068028816, "grad_norm": 1.7013255408660655, "learning_rate": 8.404308715662828e-06, "loss": 0.5128, "step": 11386 }, { "epoch": 0.3354890031377517, "grad_norm": 1.5594284345386868, "learning_rate": 8.40393206241191e-06, "loss": 0.618, "step": 11387 }, { "epoch": 0.3355184655952153, "grad_norm": 1.5270509154700194, "learning_rate": 8.403555373155236e-06, "loss": 0.3818, "step": 11388 }, { "epoch": 0.33554792805267886, "grad_norm": 1.5688820162244967, "learning_rate": 8.403178647896793e-06, "loss": 0.3654, "step": 11389 }, { "epoch": 0.33557739051014246, "grad_norm": 1.562716009359604, "learning_rate": 8.402801886640562e-06, "loss": 0.4497, "step": 11390 }, { "epoch": 0.335606852967606, "grad_norm": 1.5426201355638036, "learning_rate": 8.402425089390527e-06, "loss": 0.5191, "step": 11391 }, { "epoch": 0.3356363154250696, "grad_norm": 1.5157866804337705, "learning_rate": 8.40204825615068e-06, "loss": 0.561, "step": 11392 }, { "epoch": 0.33566577788253316, "grad_norm": 1.3237241005938083, "learning_rate": 8.401671386925001e-06, "loss": 0.3922, "step": 11393 }, { "epoch": 0.33569524033999676, "grad_norm": 1.7003141038443872, "learning_rate": 8.401294481717479e-06, "loss": 0.5297, "step": 11394 }, { "epoch": 0.33572470279746036, "grad_norm": 1.393347609965451, "learning_rate": 8.400917540532101e-06, "loss": 0.4003, "step": 11395 }, { "epoch": 0.3357541652549239, "grad_norm": 1.4600573663302867, "learning_rate": 8.400540563372855e-06, "loss": 0.4177, "step": 11396 }, { "epoch": 0.3357836277123875, "grad_norm": 1.4009533058365684, "learning_rate": 8.400163550243726e-06, "loss": 0.4709, "step": 11397 }, { "epoch": 0.33581309016985106, "grad_norm": 1.4705333501984084, "learning_rate": 8.399786501148704e-06, "loss": 0.414, "step": 11398 }, { "epoch": 0.33584255262731466, "grad_norm": 1.6218068416161113, "learning_rate": 8.399409416091775e-06, "loss": 0.5143, "step": 11399 }, { "epoch": 0.3358720150847782, "grad_norm": 1.5139092230479128, "learning_rate": 8.39903229507693e-06, "loss": 0.4072, "step": 11400 }, { "epoch": 0.3359014775422418, "grad_norm": 1.2489888016678634, "learning_rate": 8.398655138108156e-06, "loss": 0.2899, "step": 11401 }, { "epoch": 0.33593093999970536, "grad_norm": 1.6171309213675726, "learning_rate": 8.398277945189445e-06, "loss": 0.6027, "step": 11402 }, { "epoch": 0.33596040245716896, "grad_norm": 1.5322592934131494, "learning_rate": 8.397900716324786e-06, "loss": 0.4927, "step": 11403 }, { "epoch": 0.3359898649146325, "grad_norm": 1.5130660550485002, "learning_rate": 8.397523451518168e-06, "loss": 0.477, "step": 11404 }, { "epoch": 0.3360193273720961, "grad_norm": 1.3744804939758246, "learning_rate": 8.397146150773583e-06, "loss": 0.5175, "step": 11405 }, { "epoch": 0.33604878982955966, "grad_norm": 1.4739009732079076, "learning_rate": 8.39676881409502e-06, "loss": 0.383, "step": 11406 }, { "epoch": 0.33607825228702326, "grad_norm": 1.432594751826223, "learning_rate": 8.396391441486473e-06, "loss": 0.5372, "step": 11407 }, { "epoch": 0.33610771474448686, "grad_norm": 1.5241234435133537, "learning_rate": 8.396014032951934e-06, "loss": 0.4901, "step": 11408 }, { "epoch": 0.3361371772019504, "grad_norm": 1.4782869027917946, "learning_rate": 8.395636588495391e-06, "loss": 0.372, "step": 11409 }, { "epoch": 0.336166639659414, "grad_norm": 1.5115261201457462, "learning_rate": 8.395259108120839e-06, "loss": 0.4555, "step": 11410 }, { "epoch": 0.33619610211687756, "grad_norm": 1.748968370190763, "learning_rate": 8.39488159183227e-06, "loss": 0.5332, "step": 11411 }, { "epoch": 0.33622556457434116, "grad_norm": 1.5759886011874755, "learning_rate": 8.394504039633681e-06, "loss": 0.5172, "step": 11412 }, { "epoch": 0.3362550270318047, "grad_norm": 1.59732718898934, "learning_rate": 8.39412645152906e-06, "loss": 0.5283, "step": 11413 }, { "epoch": 0.3362844894892683, "grad_norm": 1.570912412418813, "learning_rate": 8.393748827522406e-06, "loss": 0.5749, "step": 11414 }, { "epoch": 0.33631395194673186, "grad_norm": 1.535707468622591, "learning_rate": 8.39337116761771e-06, "loss": 0.4157, "step": 11415 }, { "epoch": 0.33634341440419546, "grad_norm": 1.5283540410996261, "learning_rate": 8.39299347181897e-06, "loss": 0.5115, "step": 11416 }, { "epoch": 0.336372876861659, "grad_norm": 1.5228739444353474, "learning_rate": 8.392615740130177e-06, "loss": 0.559, "step": 11417 }, { "epoch": 0.3364023393191226, "grad_norm": 1.6707633579595806, "learning_rate": 8.392237972555327e-06, "loss": 0.5159, "step": 11418 }, { "epoch": 0.33643180177658616, "grad_norm": 1.4717694702472244, "learning_rate": 8.391860169098423e-06, "loss": 0.4027, "step": 11419 }, { "epoch": 0.33646126423404976, "grad_norm": 1.7809371281596256, "learning_rate": 8.391482329763452e-06, "loss": 0.5363, "step": 11420 }, { "epoch": 0.33649072669151336, "grad_norm": 1.6135810738067065, "learning_rate": 8.391104454554413e-06, "loss": 0.5198, "step": 11421 }, { "epoch": 0.3365201891489769, "grad_norm": 1.5957709825762887, "learning_rate": 8.390726543475308e-06, "loss": 0.526, "step": 11422 }, { "epoch": 0.3365496516064405, "grad_norm": 1.5209135549049448, "learning_rate": 8.390348596530129e-06, "loss": 0.4619, "step": 11423 }, { "epoch": 0.33657911406390406, "grad_norm": 1.6849352949085905, "learning_rate": 8.389970613722876e-06, "loss": 0.572, "step": 11424 }, { "epoch": 0.33660857652136766, "grad_norm": 1.522523525311819, "learning_rate": 8.389592595057547e-06, "loss": 0.4335, "step": 11425 }, { "epoch": 0.3366380389788312, "grad_norm": 1.5913764340286356, "learning_rate": 8.389214540538142e-06, "loss": 0.4729, "step": 11426 }, { "epoch": 0.3366675014362948, "grad_norm": 1.5329592199300142, "learning_rate": 8.388836450168655e-06, "loss": 0.4263, "step": 11427 }, { "epoch": 0.33669696389375836, "grad_norm": 1.465302860442774, "learning_rate": 8.388458323953093e-06, "loss": 0.4599, "step": 11428 }, { "epoch": 0.33672642635122196, "grad_norm": 1.4137602516071532, "learning_rate": 8.38808016189545e-06, "loss": 0.5252, "step": 11429 }, { "epoch": 0.3367558888086855, "grad_norm": 1.6947551384160882, "learning_rate": 8.387701963999726e-06, "loss": 0.4801, "step": 11430 }, { "epoch": 0.3367853512661491, "grad_norm": 1.5746621973232537, "learning_rate": 8.387323730269924e-06, "loss": 0.5631, "step": 11431 }, { "epoch": 0.33681481372361266, "grad_norm": 1.61945213098027, "learning_rate": 8.386945460710045e-06, "loss": 0.6243, "step": 11432 }, { "epoch": 0.33684427618107626, "grad_norm": 1.5878310618537912, "learning_rate": 8.386567155324087e-06, "loss": 0.6305, "step": 11433 }, { "epoch": 0.33687373863853987, "grad_norm": 1.668312243474639, "learning_rate": 8.386188814116056e-06, "loss": 0.5896, "step": 11434 }, { "epoch": 0.3369032010960034, "grad_norm": 1.4894747200885219, "learning_rate": 8.385810437089952e-06, "loss": 0.3424, "step": 11435 }, { "epoch": 0.336932663553467, "grad_norm": 1.594385541004528, "learning_rate": 8.385432024249774e-06, "loss": 0.5581, "step": 11436 }, { "epoch": 0.33696212601093056, "grad_norm": 1.3636538052808933, "learning_rate": 8.38505357559953e-06, "loss": 0.3699, "step": 11437 }, { "epoch": 0.33699158846839417, "grad_norm": 1.403048943133169, "learning_rate": 8.384675091143221e-06, "loss": 0.4264, "step": 11438 }, { "epoch": 0.3370210509258577, "grad_norm": 2.0237427696810846, "learning_rate": 8.38429657088485e-06, "loss": 0.6142, "step": 11439 }, { "epoch": 0.3370505133833213, "grad_norm": 1.594637986645361, "learning_rate": 8.38391801482842e-06, "loss": 0.4868, "step": 11440 }, { "epoch": 0.33707997584078486, "grad_norm": 1.3945232720450664, "learning_rate": 8.383539422977939e-06, "loss": 0.4194, "step": 11441 }, { "epoch": 0.33710943829824846, "grad_norm": 1.4442662500580912, "learning_rate": 8.383160795337407e-06, "loss": 0.4601, "step": 11442 }, { "epoch": 0.337138900755712, "grad_norm": 1.4115546567160624, "learning_rate": 8.382782131910832e-06, "loss": 0.4755, "step": 11443 }, { "epoch": 0.3371683632131756, "grad_norm": 1.7298112931231249, "learning_rate": 8.38240343270222e-06, "loss": 0.6119, "step": 11444 }, { "epoch": 0.33719782567063916, "grad_norm": 1.5556335242361743, "learning_rate": 8.382024697715573e-06, "loss": 0.4847, "step": 11445 }, { "epoch": 0.33722728812810276, "grad_norm": 1.494858059790118, "learning_rate": 8.3816459269549e-06, "loss": 0.4459, "step": 11446 }, { "epoch": 0.33725675058556637, "grad_norm": 1.6468499522582827, "learning_rate": 8.381267120424208e-06, "loss": 0.4466, "step": 11447 }, { "epoch": 0.3372862130430299, "grad_norm": 1.4864228285582106, "learning_rate": 8.380888278127502e-06, "loss": 0.411, "step": 11448 }, { "epoch": 0.3373156755004935, "grad_norm": 1.567129392616773, "learning_rate": 8.38050940006879e-06, "loss": 0.4463, "step": 11449 }, { "epoch": 0.33734513795795706, "grad_norm": 1.5329471766993932, "learning_rate": 8.38013048625208e-06, "loss": 0.4815, "step": 11450 }, { "epoch": 0.33737460041542067, "grad_norm": 1.5739273277305346, "learning_rate": 8.37975153668138e-06, "loss": 0.4741, "step": 11451 }, { "epoch": 0.3374040628728842, "grad_norm": 1.5977721447187658, "learning_rate": 8.379372551360698e-06, "loss": 0.584, "step": 11452 }, { "epoch": 0.3374335253303478, "grad_norm": 1.4024371035238403, "learning_rate": 8.378993530294045e-06, "loss": 0.4746, "step": 11453 }, { "epoch": 0.33746298778781136, "grad_norm": 1.5857936844427807, "learning_rate": 8.378614473485425e-06, "loss": 0.5319, "step": 11454 }, { "epoch": 0.33749245024527497, "grad_norm": 1.3050510321119588, "learning_rate": 8.378235380938851e-06, "loss": 0.2879, "step": 11455 }, { "epoch": 0.3375219127027385, "grad_norm": 1.4608792002928748, "learning_rate": 8.377856252658335e-06, "loss": 0.458, "step": 11456 }, { "epoch": 0.3375513751602021, "grad_norm": 1.5152847321172935, "learning_rate": 8.377477088647883e-06, "loss": 0.5417, "step": 11457 }, { "epoch": 0.33758083761766566, "grad_norm": 1.8205209157731448, "learning_rate": 8.37709788891151e-06, "loss": 0.6144, "step": 11458 }, { "epoch": 0.33761030007512927, "grad_norm": 1.4242494972877355, "learning_rate": 8.376718653453221e-06, "loss": 0.4007, "step": 11459 }, { "epoch": 0.33763976253259287, "grad_norm": 1.494716941024696, "learning_rate": 8.376339382277034e-06, "loss": 0.3954, "step": 11460 }, { "epoch": 0.3376692249900564, "grad_norm": 1.6052749408922515, "learning_rate": 8.375960075386959e-06, "loss": 0.4343, "step": 11461 }, { "epoch": 0.33769868744752, "grad_norm": 1.4005744250764496, "learning_rate": 8.375580732787005e-06, "loss": 0.388, "step": 11462 }, { "epoch": 0.33772814990498357, "grad_norm": 1.3709931829194, "learning_rate": 8.375201354481188e-06, "loss": 0.3801, "step": 11463 }, { "epoch": 0.33775761236244717, "grad_norm": 1.6457058516366996, "learning_rate": 8.374821940473518e-06, "loss": 0.5791, "step": 11464 }, { "epoch": 0.3377870748199107, "grad_norm": 1.6283229806554864, "learning_rate": 8.37444249076801e-06, "loss": 0.4175, "step": 11465 }, { "epoch": 0.3378165372773743, "grad_norm": 1.4813199083460222, "learning_rate": 8.37406300536868e-06, "loss": 0.3827, "step": 11466 }, { "epoch": 0.33784599973483787, "grad_norm": 1.4781790344607162, "learning_rate": 8.37368348427954e-06, "loss": 0.3804, "step": 11467 }, { "epoch": 0.33787546219230147, "grad_norm": 1.5307319222442386, "learning_rate": 8.373303927504602e-06, "loss": 0.4027, "step": 11468 }, { "epoch": 0.337904924649765, "grad_norm": 1.6074500723997702, "learning_rate": 8.372924335047885e-06, "loss": 0.4643, "step": 11469 }, { "epoch": 0.3379343871072286, "grad_norm": 1.5101532846601384, "learning_rate": 8.372544706913401e-06, "loss": 0.4295, "step": 11470 }, { "epoch": 0.33796384956469216, "grad_norm": 1.6347696620989964, "learning_rate": 8.372165043105166e-06, "loss": 0.5602, "step": 11471 }, { "epoch": 0.33799331202215577, "grad_norm": 1.5111491149625496, "learning_rate": 8.371785343627198e-06, "loss": 0.4516, "step": 11472 }, { "epoch": 0.33802277447961937, "grad_norm": 1.601803287844482, "learning_rate": 8.371405608483514e-06, "loss": 0.4697, "step": 11473 }, { "epoch": 0.3380522369370829, "grad_norm": 1.5775574916845092, "learning_rate": 8.371025837678126e-06, "loss": 0.4728, "step": 11474 }, { "epoch": 0.3380816993945465, "grad_norm": 1.5369789426023654, "learning_rate": 8.370646031215056e-06, "loss": 0.4118, "step": 11475 }, { "epoch": 0.33811116185201007, "grad_norm": 1.5836019889516042, "learning_rate": 8.370266189098319e-06, "loss": 0.5815, "step": 11476 }, { "epoch": 0.33814062430947367, "grad_norm": 1.3115326752362717, "learning_rate": 8.369886311331935e-06, "loss": 0.3519, "step": 11477 }, { "epoch": 0.3381700867669372, "grad_norm": 1.7101355498641844, "learning_rate": 8.369506397919917e-06, "loss": 0.6706, "step": 11478 }, { "epoch": 0.3381995492244008, "grad_norm": 1.5617181164874556, "learning_rate": 8.36912644886629e-06, "loss": 0.5015, "step": 11479 }, { "epoch": 0.33822901168186437, "grad_norm": 1.6672372857646844, "learning_rate": 8.368746464175069e-06, "loss": 0.5595, "step": 11480 }, { "epoch": 0.33825847413932797, "grad_norm": 1.7363702646336068, "learning_rate": 8.368366443850275e-06, "loss": 0.5763, "step": 11481 }, { "epoch": 0.3382879365967915, "grad_norm": 1.466760206842845, "learning_rate": 8.367986387895926e-06, "loss": 0.3927, "step": 11482 }, { "epoch": 0.3383173990542551, "grad_norm": 1.6940409660424625, "learning_rate": 8.367606296316046e-06, "loss": 0.6213, "step": 11483 }, { "epoch": 0.33834686151171867, "grad_norm": 1.6840665913594977, "learning_rate": 8.36722616911465e-06, "loss": 0.5018, "step": 11484 }, { "epoch": 0.33837632396918227, "grad_norm": 1.4206835541986869, "learning_rate": 8.366846006295764e-06, "loss": 0.3735, "step": 11485 }, { "epoch": 0.33840578642664587, "grad_norm": 1.5590978018103205, "learning_rate": 8.366465807863406e-06, "loss": 0.4254, "step": 11486 }, { "epoch": 0.3384352488841094, "grad_norm": 1.4995040124025694, "learning_rate": 8.366085573821598e-06, "loss": 0.5331, "step": 11487 }, { "epoch": 0.338464711341573, "grad_norm": 1.5883497680406153, "learning_rate": 8.365705304174363e-06, "loss": 0.3763, "step": 11488 }, { "epoch": 0.33849417379903657, "grad_norm": 1.2548052285419236, "learning_rate": 8.365324998925724e-06, "loss": 0.3626, "step": 11489 }, { "epoch": 0.33852363625650017, "grad_norm": 1.3941771289702714, "learning_rate": 8.364944658079703e-06, "loss": 0.3693, "step": 11490 }, { "epoch": 0.3385530987139637, "grad_norm": 1.655285026420713, "learning_rate": 8.364564281640321e-06, "loss": 0.436, "step": 11491 }, { "epoch": 0.3385825611714273, "grad_norm": 1.7252699797905493, "learning_rate": 8.364183869611607e-06, "loss": 0.5796, "step": 11492 }, { "epoch": 0.33861202362889087, "grad_norm": 1.3060507024124133, "learning_rate": 8.363803421997579e-06, "loss": 0.3791, "step": 11493 }, { "epoch": 0.33864148608635447, "grad_norm": 1.653810356643275, "learning_rate": 8.363422938802264e-06, "loss": 0.5977, "step": 11494 }, { "epoch": 0.338670948543818, "grad_norm": 1.2459251400164313, "learning_rate": 8.363042420029684e-06, "loss": 0.3683, "step": 11495 }, { "epoch": 0.3387004110012816, "grad_norm": 1.516524404671676, "learning_rate": 8.362661865683868e-06, "loss": 0.5187, "step": 11496 }, { "epoch": 0.33872987345874517, "grad_norm": 1.5633673026922803, "learning_rate": 8.36228127576884e-06, "loss": 0.62, "step": 11497 }, { "epoch": 0.33875933591620877, "grad_norm": 1.56878085867035, "learning_rate": 8.361900650288624e-06, "loss": 0.5709, "step": 11498 }, { "epoch": 0.3387887983736724, "grad_norm": 1.6631488273766772, "learning_rate": 8.361519989247248e-06, "loss": 0.4441, "step": 11499 }, { "epoch": 0.3388182608311359, "grad_norm": 1.5086290424878241, "learning_rate": 8.361139292648738e-06, "loss": 0.5616, "step": 11500 }, { "epoch": 0.3388477232885995, "grad_norm": 1.3928370207538399, "learning_rate": 8.360758560497122e-06, "loss": 0.4711, "step": 11501 }, { "epoch": 0.33887718574606307, "grad_norm": 1.6442820365941464, "learning_rate": 8.360377792796426e-06, "loss": 0.523, "step": 11502 }, { "epoch": 0.3389066482035267, "grad_norm": 1.4720969663260042, "learning_rate": 8.359996989550678e-06, "loss": 0.4595, "step": 11503 }, { "epoch": 0.3389361106609902, "grad_norm": 1.589325002323626, "learning_rate": 8.359616150763903e-06, "loss": 0.3459, "step": 11504 }, { "epoch": 0.3389655731184538, "grad_norm": 1.50359898541319, "learning_rate": 8.359235276440134e-06, "loss": 0.512, "step": 11505 }, { "epoch": 0.33899503557591737, "grad_norm": 1.5311338190839954, "learning_rate": 8.3588543665834e-06, "loss": 0.5151, "step": 11506 }, { "epoch": 0.33902449803338097, "grad_norm": 1.611808303630763, "learning_rate": 8.358473421197725e-06, "loss": 0.4733, "step": 11507 }, { "epoch": 0.3390539604908445, "grad_norm": 1.6560917320048125, "learning_rate": 8.358092440287145e-06, "loss": 0.5661, "step": 11508 }, { "epoch": 0.3390834229483081, "grad_norm": 1.5056633375777615, "learning_rate": 8.357711423855683e-06, "loss": 0.5367, "step": 11509 }, { "epoch": 0.33911288540577167, "grad_norm": 1.6320103541038924, "learning_rate": 8.357330371907375e-06, "loss": 0.4888, "step": 11510 }, { "epoch": 0.33914234786323527, "grad_norm": 1.6314904292284407, "learning_rate": 8.35694928444625e-06, "loss": 0.5073, "step": 11511 }, { "epoch": 0.3391718103206989, "grad_norm": 1.4849269056444392, "learning_rate": 8.356568161476336e-06, "loss": 0.3995, "step": 11512 }, { "epoch": 0.3392012727781624, "grad_norm": 1.610040764198047, "learning_rate": 8.35618700300167e-06, "loss": 0.4728, "step": 11513 }, { "epoch": 0.339230735235626, "grad_norm": 1.5350660716346336, "learning_rate": 8.35580580902628e-06, "loss": 0.4328, "step": 11514 }, { "epoch": 0.33926019769308957, "grad_norm": 1.419217211288449, "learning_rate": 8.355424579554197e-06, "loss": 0.394, "step": 11515 }, { "epoch": 0.3392896601505532, "grad_norm": 1.4573940655894602, "learning_rate": 8.355043314589458e-06, "loss": 0.4897, "step": 11516 }, { "epoch": 0.3393191226080167, "grad_norm": 1.8634129967204527, "learning_rate": 8.35466201413609e-06, "loss": 0.5286, "step": 11517 }, { "epoch": 0.3393485850654803, "grad_norm": 1.4840894866146206, "learning_rate": 8.354280678198133e-06, "loss": 0.4715, "step": 11518 }, { "epoch": 0.33937804752294387, "grad_norm": 1.547882434326658, "learning_rate": 8.353899306779615e-06, "loss": 0.4956, "step": 11519 }, { "epoch": 0.3394075099804075, "grad_norm": 1.5928936500520223, "learning_rate": 8.353517899884573e-06, "loss": 0.5449, "step": 11520 }, { "epoch": 0.339436972437871, "grad_norm": 1.7581281798895116, "learning_rate": 8.35313645751704e-06, "loss": 0.4862, "step": 11521 }, { "epoch": 0.3394664348953346, "grad_norm": 1.6208760650265268, "learning_rate": 8.352754979681053e-06, "loss": 0.4714, "step": 11522 }, { "epoch": 0.33949589735279817, "grad_norm": 1.4928886177569805, "learning_rate": 8.352373466380645e-06, "loss": 0.322, "step": 11523 }, { "epoch": 0.3395253598102618, "grad_norm": 1.6577175123426506, "learning_rate": 8.351991917619851e-06, "loss": 0.5809, "step": 11524 }, { "epoch": 0.3395548222677254, "grad_norm": 1.4096161553364008, "learning_rate": 8.351610333402709e-06, "loss": 0.4186, "step": 11525 }, { "epoch": 0.3395842847251889, "grad_norm": 1.5557200328997685, "learning_rate": 8.351228713733254e-06, "loss": 0.3794, "step": 11526 }, { "epoch": 0.3396137471826525, "grad_norm": 1.3216132545189017, "learning_rate": 8.350847058615524e-06, "loss": 0.4268, "step": 11527 }, { "epoch": 0.3396432096401161, "grad_norm": 1.4113591694524936, "learning_rate": 8.350465368053552e-06, "loss": 0.4252, "step": 11528 }, { "epoch": 0.3396726720975797, "grad_norm": 1.6553307540983278, "learning_rate": 8.350083642051382e-06, "loss": 0.4602, "step": 11529 }, { "epoch": 0.3397021345550432, "grad_norm": 1.3618137948484845, "learning_rate": 8.349701880613045e-06, "loss": 0.4494, "step": 11530 }, { "epoch": 0.3397315970125068, "grad_norm": 1.5946368699519395, "learning_rate": 8.349320083742584e-06, "loss": 0.3684, "step": 11531 }, { "epoch": 0.3397610594699704, "grad_norm": 1.534145356335552, "learning_rate": 8.348938251444037e-06, "loss": 0.494, "step": 11532 }, { "epoch": 0.339790521927434, "grad_norm": 1.3602383549233148, "learning_rate": 8.348556383721442e-06, "loss": 0.4727, "step": 11533 }, { "epoch": 0.3398199843848975, "grad_norm": 1.4466004604156653, "learning_rate": 8.348174480578837e-06, "loss": 0.3796, "step": 11534 }, { "epoch": 0.3398494468423611, "grad_norm": 1.9009904993538267, "learning_rate": 8.347792542020262e-06, "loss": 0.6023, "step": 11535 }, { "epoch": 0.33987890929982467, "grad_norm": 1.451206555117901, "learning_rate": 8.347410568049759e-06, "loss": 0.4099, "step": 11536 }, { "epoch": 0.3399083717572883, "grad_norm": 1.567058885797298, "learning_rate": 8.347028558671366e-06, "loss": 0.5063, "step": 11537 }, { "epoch": 0.3399378342147519, "grad_norm": 1.5787624064300265, "learning_rate": 8.346646513889126e-06, "loss": 0.3965, "step": 11538 }, { "epoch": 0.3399672966722154, "grad_norm": 1.6201346908805685, "learning_rate": 8.346264433707078e-06, "loss": 0.5107, "step": 11539 }, { "epoch": 0.339996759129679, "grad_norm": 1.4196191242981553, "learning_rate": 8.345882318129266e-06, "loss": 0.4197, "step": 11540 }, { "epoch": 0.3400262215871426, "grad_norm": 1.4463950143694495, "learning_rate": 8.34550016715973e-06, "loss": 0.4957, "step": 11541 }, { "epoch": 0.3400556840446062, "grad_norm": 1.4766500469542507, "learning_rate": 8.345117980802512e-06, "loss": 0.4593, "step": 11542 }, { "epoch": 0.3400851465020697, "grad_norm": 1.5388978128875934, "learning_rate": 8.344735759061656e-06, "loss": 0.4693, "step": 11543 }, { "epoch": 0.3401146089595333, "grad_norm": 1.5754220124252056, "learning_rate": 8.344353501941205e-06, "loss": 0.5623, "step": 11544 }, { "epoch": 0.3401440714169969, "grad_norm": 1.3794825186234623, "learning_rate": 8.3439712094452e-06, "loss": 0.3214, "step": 11545 }, { "epoch": 0.3401735338744605, "grad_norm": 1.5705611991140873, "learning_rate": 8.343588881577688e-06, "loss": 0.3883, "step": 11546 }, { "epoch": 0.340202996331924, "grad_norm": 1.6874429606606043, "learning_rate": 8.343206518342713e-06, "loss": 0.4557, "step": 11547 }, { "epoch": 0.3402324587893876, "grad_norm": 1.6648183563849928, "learning_rate": 8.342824119744317e-06, "loss": 0.4951, "step": 11548 }, { "epoch": 0.3402619212468512, "grad_norm": 1.3607450133544534, "learning_rate": 8.342441685786547e-06, "loss": 0.3046, "step": 11549 }, { "epoch": 0.3402913837043148, "grad_norm": 1.6858263659413308, "learning_rate": 8.342059216473446e-06, "loss": 0.4, "step": 11550 }, { "epoch": 0.3403208461617784, "grad_norm": 1.4370865392794818, "learning_rate": 8.341676711809061e-06, "loss": 0.4851, "step": 11551 }, { "epoch": 0.3403503086192419, "grad_norm": 1.398403526200083, "learning_rate": 8.341294171797441e-06, "loss": 0.3734, "step": 11552 }, { "epoch": 0.34037977107670553, "grad_norm": 1.6661485706749801, "learning_rate": 8.340911596442627e-06, "loss": 0.4884, "step": 11553 }, { "epoch": 0.3404092335341691, "grad_norm": 1.4485006398224367, "learning_rate": 8.34052898574867e-06, "loss": 0.4807, "step": 11554 }, { "epoch": 0.3404386959916327, "grad_norm": 1.5442720710822329, "learning_rate": 8.340146339719615e-06, "loss": 0.4893, "step": 11555 }, { "epoch": 0.3404681584490962, "grad_norm": 1.5829030319725257, "learning_rate": 8.339763658359507e-06, "loss": 0.5338, "step": 11556 }, { "epoch": 0.34049762090655983, "grad_norm": 1.4850206849754974, "learning_rate": 8.3393809416724e-06, "loss": 0.3849, "step": 11557 }, { "epoch": 0.3405270833640234, "grad_norm": 1.3599479603291398, "learning_rate": 8.33899818966234e-06, "loss": 0.3205, "step": 11558 }, { "epoch": 0.340556545821487, "grad_norm": 1.580155513385796, "learning_rate": 8.338615402333372e-06, "loss": 0.3444, "step": 11559 }, { "epoch": 0.3405860082789505, "grad_norm": 1.639266801369025, "learning_rate": 8.338232579689549e-06, "loss": 0.5179, "step": 11560 }, { "epoch": 0.3406154707364141, "grad_norm": 1.711394332032749, "learning_rate": 8.337849721734918e-06, "loss": 0.4268, "step": 11561 }, { "epoch": 0.3406449331938777, "grad_norm": 1.574955596527558, "learning_rate": 8.337466828473534e-06, "loss": 0.4407, "step": 11562 }, { "epoch": 0.3406743956513413, "grad_norm": 1.742732448869457, "learning_rate": 8.337083899909438e-06, "loss": 0.5771, "step": 11563 }, { "epoch": 0.3407038581088049, "grad_norm": 1.6350116187172805, "learning_rate": 8.336700936046687e-06, "loss": 0.612, "step": 11564 }, { "epoch": 0.3407333205662684, "grad_norm": 1.6394028730980368, "learning_rate": 8.336317936889331e-06, "loss": 0.5566, "step": 11565 }, { "epoch": 0.34076278302373203, "grad_norm": 1.655926827196393, "learning_rate": 8.33593490244142e-06, "loss": 0.5008, "step": 11566 }, { "epoch": 0.3407922454811956, "grad_norm": 1.6609691507803985, "learning_rate": 8.335551832707007e-06, "loss": 0.5891, "step": 11567 }, { "epoch": 0.3408217079386592, "grad_norm": 1.331909674993185, "learning_rate": 8.335168727690144e-06, "loss": 0.4141, "step": 11568 }, { "epoch": 0.3408511703961227, "grad_norm": 1.6071945426361767, "learning_rate": 8.334785587394881e-06, "loss": 0.5343, "step": 11569 }, { "epoch": 0.34088063285358633, "grad_norm": 1.4574649071517443, "learning_rate": 8.334402411825272e-06, "loss": 0.462, "step": 11570 }, { "epoch": 0.3409100953110499, "grad_norm": 1.6032564628602446, "learning_rate": 8.334019200985372e-06, "loss": 0.4831, "step": 11571 }, { "epoch": 0.3409395577685135, "grad_norm": 1.606107037775596, "learning_rate": 8.333635954879234e-06, "loss": 0.5464, "step": 11572 }, { "epoch": 0.340969020225977, "grad_norm": 1.4518191436513785, "learning_rate": 8.333252673510908e-06, "loss": 0.3991, "step": 11573 }, { "epoch": 0.34099848268344063, "grad_norm": 1.63625892426009, "learning_rate": 8.332869356884452e-06, "loss": 0.5866, "step": 11574 }, { "epoch": 0.3410279451409042, "grad_norm": 1.505260916147287, "learning_rate": 8.332486005003922e-06, "loss": 0.509, "step": 11575 }, { "epoch": 0.3410574075983678, "grad_norm": 1.5036992458352632, "learning_rate": 8.332102617873367e-06, "loss": 0.4806, "step": 11576 }, { "epoch": 0.3410868700558314, "grad_norm": 1.407020444716629, "learning_rate": 8.331719195496848e-06, "loss": 0.4699, "step": 11577 }, { "epoch": 0.34111633251329493, "grad_norm": 1.519742482303434, "learning_rate": 8.331335737878419e-06, "loss": 0.3934, "step": 11578 }, { "epoch": 0.34114579497075853, "grad_norm": 1.562310637723198, "learning_rate": 8.330952245022134e-06, "loss": 0.5127, "step": 11579 }, { "epoch": 0.3411752574282221, "grad_norm": 1.5009355083049156, "learning_rate": 8.330568716932053e-06, "loss": 0.4503, "step": 11580 }, { "epoch": 0.3412047198856857, "grad_norm": 1.5408367765882778, "learning_rate": 8.33018515361223e-06, "loss": 0.5053, "step": 11581 }, { "epoch": 0.34123418234314923, "grad_norm": 1.6271475793289147, "learning_rate": 8.329801555066725e-06, "loss": 0.5016, "step": 11582 }, { "epoch": 0.34126364480061283, "grad_norm": 1.566433631234372, "learning_rate": 8.329417921299593e-06, "loss": 0.5781, "step": 11583 }, { "epoch": 0.3412931072580764, "grad_norm": 2.181517544350853, "learning_rate": 8.329034252314893e-06, "loss": 0.5788, "step": 11584 }, { "epoch": 0.34132256971554, "grad_norm": 1.548123856963544, "learning_rate": 8.328650548116682e-06, "loss": 0.4399, "step": 11585 }, { "epoch": 0.34135203217300353, "grad_norm": 1.6736101192133241, "learning_rate": 8.328266808709021e-06, "loss": 0.4472, "step": 11586 }, { "epoch": 0.34138149463046713, "grad_norm": 1.5947501649867264, "learning_rate": 8.327883034095968e-06, "loss": 0.5307, "step": 11587 }, { "epoch": 0.3414109570879307, "grad_norm": 1.5370628012834022, "learning_rate": 8.327499224281583e-06, "loss": 0.5367, "step": 11588 }, { "epoch": 0.3414404195453943, "grad_norm": 1.4690131923683385, "learning_rate": 8.327115379269925e-06, "loss": 0.3933, "step": 11589 }, { "epoch": 0.3414698820028579, "grad_norm": 1.60309927634022, "learning_rate": 8.326731499065054e-06, "loss": 0.5318, "step": 11590 }, { "epoch": 0.34149934446032143, "grad_norm": 1.7714648456381907, "learning_rate": 8.326347583671031e-06, "loss": 0.5563, "step": 11591 }, { "epoch": 0.34152880691778503, "grad_norm": 1.608752851868269, "learning_rate": 8.325963633091918e-06, "loss": 0.5086, "step": 11592 }, { "epoch": 0.3415582693752486, "grad_norm": 1.3972241411485036, "learning_rate": 8.325579647331773e-06, "loss": 0.3539, "step": 11593 }, { "epoch": 0.3415877318327122, "grad_norm": 1.5832845250574785, "learning_rate": 8.325195626394664e-06, "loss": 0.3649, "step": 11594 }, { "epoch": 0.34161719429017573, "grad_norm": 1.7544107303045464, "learning_rate": 8.324811570284646e-06, "loss": 0.5018, "step": 11595 }, { "epoch": 0.34164665674763933, "grad_norm": 1.5710939129981236, "learning_rate": 8.324427479005785e-06, "loss": 0.5856, "step": 11596 }, { "epoch": 0.3416761192051029, "grad_norm": 1.2654652262893846, "learning_rate": 8.324043352562143e-06, "loss": 0.3926, "step": 11597 }, { "epoch": 0.3417055816625665, "grad_norm": 1.4008437600329424, "learning_rate": 8.323659190957785e-06, "loss": 0.5336, "step": 11598 }, { "epoch": 0.34173504412003003, "grad_norm": 1.7694552373420414, "learning_rate": 8.323274994196771e-06, "loss": 0.6391, "step": 11599 }, { "epoch": 0.34176450657749363, "grad_norm": 1.7113470684827714, "learning_rate": 8.322890762283168e-06, "loss": 0.6232, "step": 11600 }, { "epoch": 0.3417939690349572, "grad_norm": 1.6546266414693866, "learning_rate": 8.322506495221038e-06, "loss": 0.563, "step": 11601 }, { "epoch": 0.3418234314924208, "grad_norm": 1.7297841030839236, "learning_rate": 8.322122193014448e-06, "loss": 0.5385, "step": 11602 }, { "epoch": 0.3418528939498844, "grad_norm": 1.6743652164683733, "learning_rate": 8.321737855667462e-06, "loss": 0.5125, "step": 11603 }, { "epoch": 0.34188235640734793, "grad_norm": 1.622390552765308, "learning_rate": 8.321353483184145e-06, "loss": 0.6294, "step": 11604 }, { "epoch": 0.34191181886481153, "grad_norm": 1.6677033707022388, "learning_rate": 8.320969075568563e-06, "loss": 0.4683, "step": 11605 }, { "epoch": 0.3419412813222751, "grad_norm": 1.5970835177085212, "learning_rate": 8.320584632824782e-06, "loss": 0.5165, "step": 11606 }, { "epoch": 0.3419707437797387, "grad_norm": 1.5758514627181592, "learning_rate": 8.320200154956868e-06, "loss": 0.7008, "step": 11607 }, { "epoch": 0.34200020623720223, "grad_norm": 1.4520654253042908, "learning_rate": 8.319815641968892e-06, "loss": 0.4788, "step": 11608 }, { "epoch": 0.34202966869466583, "grad_norm": 1.355153494017001, "learning_rate": 8.319431093864915e-06, "loss": 0.4055, "step": 11609 }, { "epoch": 0.3420591311521294, "grad_norm": 1.4456514966726592, "learning_rate": 8.319046510649007e-06, "loss": 0.4578, "step": 11610 }, { "epoch": 0.342088593609593, "grad_norm": 1.6383385556220984, "learning_rate": 8.318661892325238e-06, "loss": 0.4526, "step": 11611 }, { "epoch": 0.34211805606705653, "grad_norm": 1.785495015799456, "learning_rate": 8.318277238897676e-06, "loss": 0.5707, "step": 11612 }, { "epoch": 0.34214751852452013, "grad_norm": 1.4857305526741804, "learning_rate": 8.317892550370386e-06, "loss": 0.482, "step": 11613 }, { "epoch": 0.3421769809819837, "grad_norm": 1.5218636406308035, "learning_rate": 8.317507826747442e-06, "loss": 0.4305, "step": 11614 }, { "epoch": 0.3422064434394473, "grad_norm": 1.4893809954306965, "learning_rate": 8.31712306803291e-06, "loss": 0.5301, "step": 11615 }, { "epoch": 0.3422359058969109, "grad_norm": 1.5706959798555205, "learning_rate": 8.31673827423086e-06, "loss": 0.5074, "step": 11616 }, { "epoch": 0.34226536835437443, "grad_norm": 1.4170055240195663, "learning_rate": 8.316353445345365e-06, "loss": 0.417, "step": 11617 }, { "epoch": 0.34229483081183804, "grad_norm": 1.8842957876518058, "learning_rate": 8.315968581380493e-06, "loss": 0.518, "step": 11618 }, { "epoch": 0.3423242932693016, "grad_norm": 1.4683283002680363, "learning_rate": 8.315583682340319e-06, "loss": 0.4306, "step": 11619 }, { "epoch": 0.3423537557267652, "grad_norm": 1.7857849662605385, "learning_rate": 8.315198748228908e-06, "loss": 0.521, "step": 11620 }, { "epoch": 0.34238321818422873, "grad_norm": 1.6638766690731412, "learning_rate": 8.314813779050335e-06, "loss": 0.3594, "step": 11621 }, { "epoch": 0.34241268064169234, "grad_norm": 1.6369563780459542, "learning_rate": 8.314428774808672e-06, "loss": 0.4243, "step": 11622 }, { "epoch": 0.3424421430991559, "grad_norm": 1.6867064457458383, "learning_rate": 8.314043735507993e-06, "loss": 0.671, "step": 11623 }, { "epoch": 0.3424716055566195, "grad_norm": 1.4380311991377774, "learning_rate": 8.313658661152368e-06, "loss": 0.5492, "step": 11624 }, { "epoch": 0.34250106801408303, "grad_norm": 1.6824073676875972, "learning_rate": 8.313273551745873e-06, "loss": 0.5453, "step": 11625 }, { "epoch": 0.34253053047154663, "grad_norm": 1.5616113066573383, "learning_rate": 8.31288840729258e-06, "loss": 0.3493, "step": 11626 }, { "epoch": 0.3425599929290102, "grad_norm": 1.5090606327120981, "learning_rate": 8.31250322779656e-06, "loss": 0.4479, "step": 11627 }, { "epoch": 0.3425894553864738, "grad_norm": 1.5027897943888031, "learning_rate": 8.312118013261894e-06, "loss": 0.4522, "step": 11628 }, { "epoch": 0.3426189178439374, "grad_norm": 1.5235343924062839, "learning_rate": 8.311732763692652e-06, "loss": 0.5153, "step": 11629 }, { "epoch": 0.34264838030140093, "grad_norm": 1.668928756542708, "learning_rate": 8.31134747909291e-06, "loss": 0.6014, "step": 11630 }, { "epoch": 0.34267784275886454, "grad_norm": 1.4306178027307879, "learning_rate": 8.310962159466744e-06, "loss": 0.4778, "step": 11631 }, { "epoch": 0.3427073052163281, "grad_norm": 1.8575747156709514, "learning_rate": 8.310576804818229e-06, "loss": 0.4446, "step": 11632 }, { "epoch": 0.3427367676737917, "grad_norm": 1.7387673517060984, "learning_rate": 8.310191415151441e-06, "loss": 0.4769, "step": 11633 }, { "epoch": 0.34276623013125523, "grad_norm": 1.4626208354089771, "learning_rate": 8.309805990470457e-06, "loss": 0.3831, "step": 11634 }, { "epoch": 0.34279569258871884, "grad_norm": 1.6752796717349128, "learning_rate": 8.309420530779354e-06, "loss": 0.5784, "step": 11635 }, { "epoch": 0.3428251550461824, "grad_norm": 1.3574446198557875, "learning_rate": 8.30903503608221e-06, "loss": 0.4275, "step": 11636 }, { "epoch": 0.342854617503646, "grad_norm": 1.5066027802919475, "learning_rate": 8.3086495063831e-06, "loss": 0.5115, "step": 11637 }, { "epoch": 0.34288407996110953, "grad_norm": 1.5436362906560825, "learning_rate": 8.308263941686106e-06, "loss": 0.5949, "step": 11638 }, { "epoch": 0.34291354241857314, "grad_norm": 1.603061356097737, "learning_rate": 8.307878341995304e-06, "loss": 0.5729, "step": 11639 }, { "epoch": 0.3429430048760367, "grad_norm": 1.6995925596786272, "learning_rate": 8.307492707314772e-06, "loss": 0.6988, "step": 11640 }, { "epoch": 0.3429724673335003, "grad_norm": 1.5523777948511217, "learning_rate": 8.307107037648591e-06, "loss": 0.3954, "step": 11641 }, { "epoch": 0.3430019297909639, "grad_norm": 1.658231028270385, "learning_rate": 8.30672133300084e-06, "loss": 0.4185, "step": 11642 }, { "epoch": 0.34303139224842744, "grad_norm": 1.697053884215471, "learning_rate": 8.306335593375597e-06, "loss": 0.518, "step": 11643 }, { "epoch": 0.34306085470589104, "grad_norm": 1.5892998826728575, "learning_rate": 8.305949818776943e-06, "loss": 0.4233, "step": 11644 }, { "epoch": 0.3430903171633546, "grad_norm": 1.5983433333187707, "learning_rate": 8.305564009208961e-06, "loss": 0.4208, "step": 11645 }, { "epoch": 0.3431197796208182, "grad_norm": 1.4521692123009753, "learning_rate": 8.305178164675732e-06, "loss": 0.5083, "step": 11646 }, { "epoch": 0.34314924207828174, "grad_norm": 1.5663105611311858, "learning_rate": 8.304792285181331e-06, "loss": 0.4413, "step": 11647 }, { "epoch": 0.34317870453574534, "grad_norm": 1.5731207047818083, "learning_rate": 8.304406370729849e-06, "loss": 0.5457, "step": 11648 }, { "epoch": 0.3432081669932089, "grad_norm": 1.6346139209133481, "learning_rate": 8.30402042132536e-06, "loss": 0.3888, "step": 11649 }, { "epoch": 0.3432376294506725, "grad_norm": 1.3990404127061429, "learning_rate": 8.303634436971952e-06, "loss": 0.4086, "step": 11650 }, { "epoch": 0.34326709190813604, "grad_norm": 1.4809868493611984, "learning_rate": 8.303248417673706e-06, "loss": 0.3192, "step": 11651 }, { "epoch": 0.34329655436559964, "grad_norm": 1.3958215474502493, "learning_rate": 8.302862363434705e-06, "loss": 0.3382, "step": 11652 }, { "epoch": 0.3433260168230632, "grad_norm": 1.3499227035749717, "learning_rate": 8.302476274259031e-06, "loss": 0.3611, "step": 11653 }, { "epoch": 0.3433554792805268, "grad_norm": 1.5319931671092653, "learning_rate": 8.30209015015077e-06, "loss": 0.4224, "step": 11654 }, { "epoch": 0.3433849417379904, "grad_norm": 1.5011617743962702, "learning_rate": 8.301703991114006e-06, "loss": 0.5568, "step": 11655 }, { "epoch": 0.34341440419545394, "grad_norm": 1.8614410580654646, "learning_rate": 8.301317797152823e-06, "loss": 0.3098, "step": 11656 }, { "epoch": 0.34344386665291754, "grad_norm": 1.7807569401218573, "learning_rate": 8.300931568271306e-06, "loss": 0.4489, "step": 11657 }, { "epoch": 0.3434733291103811, "grad_norm": 1.5146264842835448, "learning_rate": 8.300545304473541e-06, "loss": 0.4818, "step": 11658 }, { "epoch": 0.3435027915678447, "grad_norm": 1.4174470815595712, "learning_rate": 8.300159005763615e-06, "loss": 0.3863, "step": 11659 }, { "epoch": 0.34353225402530824, "grad_norm": 1.6697669888012268, "learning_rate": 8.299772672145612e-06, "loss": 0.6256, "step": 11660 }, { "epoch": 0.34356171648277184, "grad_norm": 1.38348086856351, "learning_rate": 8.29938630362362e-06, "loss": 0.3496, "step": 11661 }, { "epoch": 0.3435911789402354, "grad_norm": 1.566278089538547, "learning_rate": 8.298999900201724e-06, "loss": 0.4407, "step": 11662 }, { "epoch": 0.343620641397699, "grad_norm": 1.5882476972894939, "learning_rate": 8.298613461884013e-06, "loss": 0.4666, "step": 11663 }, { "epoch": 0.34365010385516254, "grad_norm": 1.7472999470591797, "learning_rate": 8.298226988674573e-06, "loss": 0.556, "step": 11664 }, { "epoch": 0.34367956631262614, "grad_norm": 1.7647709876414324, "learning_rate": 8.297840480577495e-06, "loss": 0.6156, "step": 11665 }, { "epoch": 0.3437090287700897, "grad_norm": 1.659177485188278, "learning_rate": 8.297453937596864e-06, "loss": 0.4503, "step": 11666 }, { "epoch": 0.3437384912275533, "grad_norm": 1.5090279951011494, "learning_rate": 8.297067359736772e-06, "loss": 0.47, "step": 11667 }, { "epoch": 0.3437679536850169, "grad_norm": 1.397700809073956, "learning_rate": 8.296680747001304e-06, "loss": 0.3407, "step": 11668 }, { "epoch": 0.34379741614248044, "grad_norm": 1.5347645913031205, "learning_rate": 8.296294099394553e-06, "loss": 0.4252, "step": 11669 }, { "epoch": 0.34382687859994404, "grad_norm": 1.697224662305821, "learning_rate": 8.295907416920608e-06, "loss": 0.5206, "step": 11670 }, { "epoch": 0.3438563410574076, "grad_norm": 1.4746932213395123, "learning_rate": 8.295520699583558e-06, "loss": 0.4194, "step": 11671 }, { "epoch": 0.3438858035148712, "grad_norm": 1.777480952730393, "learning_rate": 8.295133947387495e-06, "loss": 0.5151, "step": 11672 }, { "epoch": 0.34391526597233474, "grad_norm": 1.4883866079873445, "learning_rate": 8.294747160336509e-06, "loss": 0.4456, "step": 11673 }, { "epoch": 0.34394472842979834, "grad_norm": 1.5956677371493047, "learning_rate": 8.294360338434691e-06, "loss": 0.3643, "step": 11674 }, { "epoch": 0.3439741908872619, "grad_norm": 1.4606078778015779, "learning_rate": 8.293973481686133e-06, "loss": 0.4047, "step": 11675 }, { "epoch": 0.3440036533447255, "grad_norm": 1.6736023290768773, "learning_rate": 8.293586590094928e-06, "loss": 0.4606, "step": 11676 }, { "epoch": 0.34403311580218904, "grad_norm": 1.4371566578672361, "learning_rate": 8.293199663665169e-06, "loss": 0.4721, "step": 11677 }, { "epoch": 0.34406257825965264, "grad_norm": 1.6765568986275567, "learning_rate": 8.292812702400947e-06, "loss": 0.4137, "step": 11678 }, { "epoch": 0.3440920407171162, "grad_norm": 1.3900431615993694, "learning_rate": 8.292425706306355e-06, "loss": 0.3406, "step": 11679 }, { "epoch": 0.3441215031745798, "grad_norm": 1.483487985173318, "learning_rate": 8.292038675385488e-06, "loss": 0.3616, "step": 11680 }, { "epoch": 0.3441509656320434, "grad_norm": 1.5136542577320025, "learning_rate": 8.291651609642438e-06, "loss": 0.4598, "step": 11681 }, { "epoch": 0.34418042808950694, "grad_norm": 1.5577499818404947, "learning_rate": 8.291264509081302e-06, "loss": 0.4459, "step": 11682 }, { "epoch": 0.34420989054697054, "grad_norm": 1.4252657433266362, "learning_rate": 8.290877373706172e-06, "loss": 0.4575, "step": 11683 }, { "epoch": 0.3442393530044341, "grad_norm": 1.4648474023828844, "learning_rate": 8.290490203521144e-06, "loss": 0.5498, "step": 11684 }, { "epoch": 0.3442688154618977, "grad_norm": 1.402989491235409, "learning_rate": 8.290102998530313e-06, "loss": 0.3487, "step": 11685 }, { "epoch": 0.34429827791936124, "grad_norm": 1.481022663223081, "learning_rate": 8.289715758737778e-06, "loss": 0.4592, "step": 11686 }, { "epoch": 0.34432774037682484, "grad_norm": 1.3776919026754604, "learning_rate": 8.28932848414763e-06, "loss": 0.3909, "step": 11687 }, { "epoch": 0.3443572028342884, "grad_norm": 1.7055033611171928, "learning_rate": 8.288941174763968e-06, "loss": 0.5479, "step": 11688 }, { "epoch": 0.344386665291752, "grad_norm": 1.6391299420902488, "learning_rate": 8.288553830590888e-06, "loss": 0.5214, "step": 11689 }, { "epoch": 0.34441612774921554, "grad_norm": 1.6515008681119365, "learning_rate": 8.288166451632489e-06, "loss": 0.4046, "step": 11690 }, { "epoch": 0.34444559020667914, "grad_norm": 1.5294156356618829, "learning_rate": 8.287779037892867e-06, "loss": 0.4814, "step": 11691 }, { "epoch": 0.3444750526641427, "grad_norm": 1.601105884369384, "learning_rate": 8.287391589376121e-06, "loss": 0.5748, "step": 11692 }, { "epoch": 0.3445045151216063, "grad_norm": 1.588907141056514, "learning_rate": 8.287004106086346e-06, "loss": 0.5127, "step": 11693 }, { "epoch": 0.3445339775790699, "grad_norm": 1.5531727788098408, "learning_rate": 8.286616588027646e-06, "loss": 0.592, "step": 11694 }, { "epoch": 0.34456344003653344, "grad_norm": 1.4779959616378175, "learning_rate": 8.286229035204117e-06, "loss": 0.4784, "step": 11695 }, { "epoch": 0.34459290249399704, "grad_norm": 1.5132414657488225, "learning_rate": 8.285841447619858e-06, "loss": 0.5182, "step": 11696 }, { "epoch": 0.3446223649514606, "grad_norm": 1.5118339563950653, "learning_rate": 8.285453825278971e-06, "loss": 0.4757, "step": 11697 }, { "epoch": 0.3446518274089242, "grad_norm": 1.5618440636552737, "learning_rate": 8.28506616818555e-06, "loss": 0.4415, "step": 11698 }, { "epoch": 0.34468128986638774, "grad_norm": 1.4653126991575383, "learning_rate": 8.284678476343706e-06, "loss": 0.426, "step": 11699 }, { "epoch": 0.34471075232385134, "grad_norm": 1.4606800522994061, "learning_rate": 8.284290749757531e-06, "loss": 0.4315, "step": 11700 }, { "epoch": 0.3447402147813149, "grad_norm": 1.7007828764308517, "learning_rate": 8.283902988431131e-06, "loss": 0.5876, "step": 11701 }, { "epoch": 0.3447696772387785, "grad_norm": 1.3035777728483176, "learning_rate": 8.283515192368605e-06, "loss": 0.435, "step": 11702 }, { "epoch": 0.34479913969624204, "grad_norm": 1.4367809350312097, "learning_rate": 8.283127361574056e-06, "loss": 0.4113, "step": 11703 }, { "epoch": 0.34482860215370564, "grad_norm": 1.435241909161851, "learning_rate": 8.282739496051586e-06, "loss": 0.4419, "step": 11704 }, { "epoch": 0.3448580646111692, "grad_norm": 1.7201883133957832, "learning_rate": 8.2823515958053e-06, "loss": 0.4801, "step": 11705 }, { "epoch": 0.3448875270686328, "grad_norm": 1.638423222893592, "learning_rate": 8.281963660839297e-06, "loss": 0.435, "step": 11706 }, { "epoch": 0.3449169895260964, "grad_norm": 1.5230688969569333, "learning_rate": 8.281575691157684e-06, "loss": 0.4899, "step": 11707 }, { "epoch": 0.34494645198355994, "grad_norm": 1.6736538494812838, "learning_rate": 8.281187686764562e-06, "loss": 0.634, "step": 11708 }, { "epoch": 0.34497591444102355, "grad_norm": 1.6888237115948987, "learning_rate": 8.280799647664037e-06, "loss": 0.45, "step": 11709 }, { "epoch": 0.3450053768984871, "grad_norm": 1.5316097414254022, "learning_rate": 8.280411573860214e-06, "loss": 0.5072, "step": 11710 }, { "epoch": 0.3450348393559507, "grad_norm": 1.5521997165125174, "learning_rate": 8.280023465357196e-06, "loss": 0.5512, "step": 11711 }, { "epoch": 0.34506430181341424, "grad_norm": 1.7461801662327088, "learning_rate": 8.27963532215909e-06, "loss": 0.5488, "step": 11712 }, { "epoch": 0.34509376427087785, "grad_norm": 1.3189207547553081, "learning_rate": 8.279247144270001e-06, "loss": 0.3853, "step": 11713 }, { "epoch": 0.3451232267283414, "grad_norm": 1.5060874021958879, "learning_rate": 8.278858931694036e-06, "loss": 0.4068, "step": 11714 }, { "epoch": 0.345152689185805, "grad_norm": 1.4710551577277793, "learning_rate": 8.2784706844353e-06, "loss": 0.4609, "step": 11715 }, { "epoch": 0.34518215164326854, "grad_norm": 1.8684994715338206, "learning_rate": 8.2780824024979e-06, "loss": 0.5948, "step": 11716 }, { "epoch": 0.34521161410073214, "grad_norm": 1.5315748976341232, "learning_rate": 8.277694085885943e-06, "loss": 0.482, "step": 11717 }, { "epoch": 0.3452410765581957, "grad_norm": 1.5066952306793666, "learning_rate": 8.277305734603538e-06, "loss": 0.4896, "step": 11718 }, { "epoch": 0.3452705390156593, "grad_norm": 1.5317632979253026, "learning_rate": 8.276917348654789e-06, "loss": 0.4984, "step": 11719 }, { "epoch": 0.3453000014731229, "grad_norm": 1.4634273036288943, "learning_rate": 8.27652892804381e-06, "loss": 0.4241, "step": 11720 }, { "epoch": 0.34532946393058644, "grad_norm": 1.5694248101630244, "learning_rate": 8.276140472774705e-06, "loss": 0.4595, "step": 11721 }, { "epoch": 0.34535892638805005, "grad_norm": 1.5794136366785019, "learning_rate": 8.275751982851586e-06, "loss": 0.5175, "step": 11722 }, { "epoch": 0.3453883888455136, "grad_norm": 1.4347016219610993, "learning_rate": 8.275363458278561e-06, "loss": 0.4312, "step": 11723 }, { "epoch": 0.3454178513029772, "grad_norm": 1.4530991994283227, "learning_rate": 8.274974899059737e-06, "loss": 0.4074, "step": 11724 }, { "epoch": 0.34544731376044074, "grad_norm": 1.5585094325904962, "learning_rate": 8.274586305199227e-06, "loss": 0.5238, "step": 11725 }, { "epoch": 0.34547677621790435, "grad_norm": 1.4955939533102067, "learning_rate": 8.274197676701142e-06, "loss": 0.4605, "step": 11726 }, { "epoch": 0.3455062386753679, "grad_norm": 1.506455902515842, "learning_rate": 8.273809013569593e-06, "loss": 0.419, "step": 11727 }, { "epoch": 0.3455357011328315, "grad_norm": 1.4177476929179669, "learning_rate": 8.273420315808688e-06, "loss": 0.4571, "step": 11728 }, { "epoch": 0.34556516359029504, "grad_norm": 1.3938647839472866, "learning_rate": 8.273031583422542e-06, "loss": 0.4313, "step": 11729 }, { "epoch": 0.34559462604775865, "grad_norm": 1.5914569200112558, "learning_rate": 8.272642816415264e-06, "loss": 0.3904, "step": 11730 }, { "epoch": 0.3456240885052222, "grad_norm": 1.7795935192345904, "learning_rate": 8.272254014790969e-06, "loss": 0.4738, "step": 11731 }, { "epoch": 0.3456535509626858, "grad_norm": 1.442432088536856, "learning_rate": 8.271865178553768e-06, "loss": 0.3692, "step": 11732 }, { "epoch": 0.3456830134201494, "grad_norm": 1.5486462149366018, "learning_rate": 8.271476307707772e-06, "loss": 0.5108, "step": 11733 }, { "epoch": 0.34571247587761295, "grad_norm": 1.4773700053955792, "learning_rate": 8.2710874022571e-06, "loss": 0.4376, "step": 11734 }, { "epoch": 0.34574193833507655, "grad_norm": 1.8359599288146564, "learning_rate": 8.270698462205859e-06, "loss": 0.5241, "step": 11735 }, { "epoch": 0.3457714007925401, "grad_norm": 1.605192340501354, "learning_rate": 8.27030948755817e-06, "loss": 0.5089, "step": 11736 }, { "epoch": 0.3458008632500037, "grad_norm": 1.4783519867041235, "learning_rate": 8.269920478318142e-06, "loss": 0.5366, "step": 11737 }, { "epoch": 0.34583032570746725, "grad_norm": 1.6600090153751854, "learning_rate": 8.269531434489893e-06, "loss": 0.5364, "step": 11738 }, { "epoch": 0.34585978816493085, "grad_norm": 1.5237985760397021, "learning_rate": 8.269142356077535e-06, "loss": 0.5597, "step": 11739 }, { "epoch": 0.3458892506223944, "grad_norm": 1.6085464041959283, "learning_rate": 8.268753243085186e-06, "loss": 0.4038, "step": 11740 }, { "epoch": 0.345918713079858, "grad_norm": 1.6115167303749875, "learning_rate": 8.268364095516961e-06, "loss": 0.5207, "step": 11741 }, { "epoch": 0.34594817553732155, "grad_norm": 1.657598907030858, "learning_rate": 8.267974913376979e-06, "loss": 0.5688, "step": 11742 }, { "epoch": 0.34597763799478515, "grad_norm": 1.4389311185235083, "learning_rate": 8.267585696669352e-06, "loss": 0.3313, "step": 11743 }, { "epoch": 0.3460071004522487, "grad_norm": 1.3507185883427735, "learning_rate": 8.2671964453982e-06, "loss": 0.3798, "step": 11744 }, { "epoch": 0.3460365629097123, "grad_norm": 1.460606717308112, "learning_rate": 8.26680715956764e-06, "loss": 0.4764, "step": 11745 }, { "epoch": 0.3460660253671759, "grad_norm": 1.5762639818058268, "learning_rate": 8.266417839181788e-06, "loss": 0.5329, "step": 11746 }, { "epoch": 0.34609548782463945, "grad_norm": 1.3702550306892383, "learning_rate": 8.266028484244765e-06, "loss": 0.3693, "step": 11747 }, { "epoch": 0.34612495028210305, "grad_norm": 1.6940899530953082, "learning_rate": 8.265639094760687e-06, "loss": 0.574, "step": 11748 }, { "epoch": 0.3461544127395666, "grad_norm": 1.5155810165339891, "learning_rate": 8.265249670733675e-06, "loss": 0.462, "step": 11749 }, { "epoch": 0.3461838751970302, "grad_norm": 1.5846233775101488, "learning_rate": 8.264860212167847e-06, "loss": 0.3798, "step": 11750 }, { "epoch": 0.34621333765449375, "grad_norm": 1.4676211192222177, "learning_rate": 8.264470719067323e-06, "loss": 0.4978, "step": 11751 }, { "epoch": 0.34624280011195735, "grad_norm": 1.4775242930766068, "learning_rate": 8.264081191436221e-06, "loss": 0.4073, "step": 11752 }, { "epoch": 0.3462722625694209, "grad_norm": 1.4092274864092549, "learning_rate": 8.263691629278665e-06, "loss": 0.5393, "step": 11753 }, { "epoch": 0.3463017250268845, "grad_norm": 1.4329065169964164, "learning_rate": 8.263302032598773e-06, "loss": 0.3761, "step": 11754 }, { "epoch": 0.34633118748434805, "grad_norm": 1.6314161826295135, "learning_rate": 8.262912401400665e-06, "loss": 0.5734, "step": 11755 }, { "epoch": 0.34636064994181165, "grad_norm": 1.612416978619146, "learning_rate": 8.262522735688465e-06, "loss": 0.4908, "step": 11756 }, { "epoch": 0.3463901123992752, "grad_norm": 1.59717251643083, "learning_rate": 8.262133035466295e-06, "loss": 0.4352, "step": 11757 }, { "epoch": 0.3464195748567388, "grad_norm": 1.5165420865203383, "learning_rate": 8.261743300738276e-06, "loss": 0.5748, "step": 11758 }, { "epoch": 0.3464490373142024, "grad_norm": 1.6123366996532216, "learning_rate": 8.26135353150853e-06, "loss": 0.5959, "step": 11759 }, { "epoch": 0.34647849977166595, "grad_norm": 1.6371480271244447, "learning_rate": 8.260963727781177e-06, "loss": 0.5548, "step": 11760 }, { "epoch": 0.34650796222912955, "grad_norm": 1.7675881116445928, "learning_rate": 8.260573889560347e-06, "loss": 0.5509, "step": 11761 }, { "epoch": 0.3465374246865931, "grad_norm": 1.622526295645151, "learning_rate": 8.260184016850159e-06, "loss": 0.5475, "step": 11762 }, { "epoch": 0.3465668871440567, "grad_norm": 1.5520683072385344, "learning_rate": 8.25979410965474e-06, "loss": 0.4269, "step": 11763 }, { "epoch": 0.34659634960152025, "grad_norm": 1.496460788177083, "learning_rate": 8.25940416797821e-06, "loss": 0.524, "step": 11764 }, { "epoch": 0.34662581205898385, "grad_norm": 1.4272340264298857, "learning_rate": 8.259014191824695e-06, "loss": 0.5463, "step": 11765 }, { "epoch": 0.3466552745164474, "grad_norm": 1.417209355732419, "learning_rate": 8.258624181198323e-06, "loss": 0.3393, "step": 11766 }, { "epoch": 0.346684736973911, "grad_norm": 1.516260948435025, "learning_rate": 8.258234136103219e-06, "loss": 0.4652, "step": 11767 }, { "epoch": 0.34671419943137455, "grad_norm": 1.5476454066308494, "learning_rate": 8.257844056543504e-06, "loss": 0.6265, "step": 11768 }, { "epoch": 0.34674366188883815, "grad_norm": 1.5526630517946372, "learning_rate": 8.25745394252331e-06, "loss": 0.5267, "step": 11769 }, { "epoch": 0.3467731243463017, "grad_norm": 1.684868941227329, "learning_rate": 8.257063794046759e-06, "loss": 0.5775, "step": 11770 }, { "epoch": 0.3468025868037653, "grad_norm": 1.5803579642235093, "learning_rate": 8.256673611117981e-06, "loss": 0.452, "step": 11771 }, { "epoch": 0.3468320492612289, "grad_norm": 1.483631916418928, "learning_rate": 8.256283393741103e-06, "loss": 0.5973, "step": 11772 }, { "epoch": 0.34686151171869245, "grad_norm": 1.4243449526796148, "learning_rate": 8.25589314192025e-06, "loss": 0.5007, "step": 11773 }, { "epoch": 0.34689097417615605, "grad_norm": 1.778171084804694, "learning_rate": 8.255502855659552e-06, "loss": 0.5333, "step": 11774 }, { "epoch": 0.3469204366336196, "grad_norm": 1.5169740629164128, "learning_rate": 8.255112534963136e-06, "loss": 0.4397, "step": 11775 }, { "epoch": 0.3469498990910832, "grad_norm": 1.7652037322945493, "learning_rate": 8.254722179835133e-06, "loss": 0.4798, "step": 11776 }, { "epoch": 0.34697936154854675, "grad_norm": 1.570048369610597, "learning_rate": 8.254331790279671e-06, "loss": 0.5261, "step": 11777 }, { "epoch": 0.34700882400601035, "grad_norm": 1.5869658369604582, "learning_rate": 8.253941366300876e-06, "loss": 0.487, "step": 11778 }, { "epoch": 0.3470382864634739, "grad_norm": 1.6898207557936582, "learning_rate": 8.253550907902885e-06, "loss": 0.5831, "step": 11779 }, { "epoch": 0.3470677489209375, "grad_norm": 1.4521008577495893, "learning_rate": 8.253160415089822e-06, "loss": 0.5036, "step": 11780 }, { "epoch": 0.34709721137840105, "grad_norm": 1.3753464561175324, "learning_rate": 8.252769887865819e-06, "loss": 0.4787, "step": 11781 }, { "epoch": 0.34712667383586465, "grad_norm": 1.6467868545953073, "learning_rate": 8.252379326235009e-06, "loss": 0.5413, "step": 11782 }, { "epoch": 0.3471561362933282, "grad_norm": 1.4235198089956151, "learning_rate": 8.25198873020152e-06, "loss": 0.4443, "step": 11783 }, { "epoch": 0.3471855987507918, "grad_norm": 1.5177406636787385, "learning_rate": 8.251598099769487e-06, "loss": 0.3911, "step": 11784 }, { "epoch": 0.3472150612082554, "grad_norm": 1.5819866394404565, "learning_rate": 8.25120743494304e-06, "loss": 0.4839, "step": 11785 }, { "epoch": 0.34724452366571895, "grad_norm": 1.519559576589525, "learning_rate": 8.25081673572631e-06, "loss": 0.4318, "step": 11786 }, { "epoch": 0.34727398612318255, "grad_norm": 1.5205103537205642, "learning_rate": 8.250426002123435e-06, "loss": 0.4955, "step": 11787 }, { "epoch": 0.3473034485806461, "grad_norm": 1.7269938624908474, "learning_rate": 8.250035234138541e-06, "loss": 0.4047, "step": 11788 }, { "epoch": 0.3473329110381097, "grad_norm": 1.6556112752295933, "learning_rate": 8.249644431775765e-06, "loss": 0.4649, "step": 11789 }, { "epoch": 0.34736237349557325, "grad_norm": 1.629127331831269, "learning_rate": 8.24925359503924e-06, "loss": 0.5139, "step": 11790 }, { "epoch": 0.34739183595303685, "grad_norm": 1.6549138550455809, "learning_rate": 8.248862723933103e-06, "loss": 0.5496, "step": 11791 }, { "epoch": 0.3474212984105004, "grad_norm": 1.4517162152197982, "learning_rate": 8.248471818461486e-06, "loss": 0.3377, "step": 11792 }, { "epoch": 0.347450760867964, "grad_norm": 1.4717456307432975, "learning_rate": 8.248080878628521e-06, "loss": 0.5617, "step": 11793 }, { "epoch": 0.34748022332542755, "grad_norm": 1.4464367920048997, "learning_rate": 8.24768990443835e-06, "loss": 0.4035, "step": 11794 }, { "epoch": 0.34750968578289115, "grad_norm": 1.3332877938924168, "learning_rate": 8.247298895895104e-06, "loss": 0.3901, "step": 11795 }, { "epoch": 0.3475391482403547, "grad_norm": 1.4554308481896823, "learning_rate": 8.246907853002918e-06, "loss": 0.5536, "step": 11796 }, { "epoch": 0.3475686106978183, "grad_norm": 1.499043740318533, "learning_rate": 8.246516775765933e-06, "loss": 0.5522, "step": 11797 }, { "epoch": 0.3475980731552819, "grad_norm": 1.6728737919221854, "learning_rate": 8.24612566418828e-06, "loss": 0.607, "step": 11798 }, { "epoch": 0.34762753561274545, "grad_norm": 1.324613285957601, "learning_rate": 8.245734518274101e-06, "loss": 0.4014, "step": 11799 }, { "epoch": 0.34765699807020906, "grad_norm": 1.3886338641583014, "learning_rate": 8.245343338027532e-06, "loss": 0.4257, "step": 11800 }, { "epoch": 0.3476864605276726, "grad_norm": 1.5624857011642455, "learning_rate": 8.24495212345271e-06, "loss": 0.4346, "step": 11801 }, { "epoch": 0.3477159229851362, "grad_norm": 1.5328255234590884, "learning_rate": 8.244560874553772e-06, "loss": 0.4559, "step": 11802 }, { "epoch": 0.34774538544259975, "grad_norm": 1.5853339928722394, "learning_rate": 8.24416959133486e-06, "loss": 0.4621, "step": 11803 }, { "epoch": 0.34777484790006336, "grad_norm": 1.4942815399618503, "learning_rate": 8.243778273800107e-06, "loss": 0.6583, "step": 11804 }, { "epoch": 0.3478043103575269, "grad_norm": 1.602381213229368, "learning_rate": 8.243386921953658e-06, "loss": 0.5691, "step": 11805 }, { "epoch": 0.3478337728149905, "grad_norm": 1.4915048186453093, "learning_rate": 8.242995535799651e-06, "loss": 0.4003, "step": 11806 }, { "epoch": 0.34786323527245405, "grad_norm": 1.6071743838175117, "learning_rate": 8.242604115342225e-06, "loss": 0.3734, "step": 11807 }, { "epoch": 0.34789269772991765, "grad_norm": 1.649292770118409, "learning_rate": 8.242212660585522e-06, "loss": 0.5023, "step": 11808 }, { "epoch": 0.3479221601873812, "grad_norm": 1.4820417300947055, "learning_rate": 8.241821171533681e-06, "loss": 0.586, "step": 11809 }, { "epoch": 0.3479516226448448, "grad_norm": 1.695291266031644, "learning_rate": 8.241429648190845e-06, "loss": 0.5222, "step": 11810 }, { "epoch": 0.3479810851023084, "grad_norm": 1.5084852078750528, "learning_rate": 8.241038090561151e-06, "loss": 0.4422, "step": 11811 }, { "epoch": 0.34801054755977195, "grad_norm": 1.605968801425173, "learning_rate": 8.240646498648746e-06, "loss": 0.5417, "step": 11812 }, { "epoch": 0.34804001001723556, "grad_norm": 1.6119673921463449, "learning_rate": 8.24025487245777e-06, "loss": 0.5063, "step": 11813 }, { "epoch": 0.3480694724746991, "grad_norm": 1.602028269369739, "learning_rate": 8.239863211992364e-06, "loss": 0.4163, "step": 11814 }, { "epoch": 0.3480989349321627, "grad_norm": 1.6994648798269738, "learning_rate": 8.239471517256672e-06, "loss": 0.4136, "step": 11815 }, { "epoch": 0.34812839738962625, "grad_norm": 1.63415853430124, "learning_rate": 8.23907978825484e-06, "loss": 0.5998, "step": 11816 }, { "epoch": 0.34815785984708986, "grad_norm": 1.5566994502473153, "learning_rate": 8.238688024991008e-06, "loss": 0.5112, "step": 11817 }, { "epoch": 0.3481873223045534, "grad_norm": 1.592376830353313, "learning_rate": 8.23829622746932e-06, "loss": 0.4571, "step": 11818 }, { "epoch": 0.348216784762017, "grad_norm": 1.4924949796644542, "learning_rate": 8.237904395693924e-06, "loss": 0.4721, "step": 11819 }, { "epoch": 0.34824624721948055, "grad_norm": 1.8407457148440476, "learning_rate": 8.237512529668958e-06, "loss": 0.6565, "step": 11820 }, { "epoch": 0.34827570967694416, "grad_norm": 1.4646949566160712, "learning_rate": 8.237120629398574e-06, "loss": 0.4895, "step": 11821 }, { "epoch": 0.3483051721344077, "grad_norm": 1.6412486307445637, "learning_rate": 8.236728694886916e-06, "loss": 0.5327, "step": 11822 }, { "epoch": 0.3483346345918713, "grad_norm": 1.5367360313052707, "learning_rate": 8.236336726138126e-06, "loss": 0.4512, "step": 11823 }, { "epoch": 0.3483640970493349, "grad_norm": 1.565363181426835, "learning_rate": 8.235944723156351e-06, "loss": 0.4596, "step": 11824 }, { "epoch": 0.34839355950679846, "grad_norm": 1.471432891578481, "learning_rate": 8.23555268594574e-06, "loss": 0.4055, "step": 11825 }, { "epoch": 0.34842302196426206, "grad_norm": 1.606981478072609, "learning_rate": 8.23516061451044e-06, "loss": 0.4808, "step": 11826 }, { "epoch": 0.3484524844217256, "grad_norm": 1.4521496798445228, "learning_rate": 8.234768508854598e-06, "loss": 0.528, "step": 11827 }, { "epoch": 0.3484819468791892, "grad_norm": 1.5034917594782164, "learning_rate": 8.234376368982358e-06, "loss": 0.4305, "step": 11828 }, { "epoch": 0.34851140933665276, "grad_norm": 1.6147760442141872, "learning_rate": 8.233984194897872e-06, "loss": 0.4667, "step": 11829 }, { "epoch": 0.34854087179411636, "grad_norm": 1.5963521936300797, "learning_rate": 8.233591986605285e-06, "loss": 0.5965, "step": 11830 }, { "epoch": 0.3485703342515799, "grad_norm": 1.4288338773144482, "learning_rate": 8.233199744108749e-06, "loss": 0.4174, "step": 11831 }, { "epoch": 0.3485997967090435, "grad_norm": 1.3823633605950993, "learning_rate": 8.232807467412412e-06, "loss": 0.3611, "step": 11832 }, { "epoch": 0.34862925916650706, "grad_norm": 1.526658545623855, "learning_rate": 8.232415156520422e-06, "loss": 0.5085, "step": 11833 }, { "epoch": 0.34865872162397066, "grad_norm": 1.484882952966506, "learning_rate": 8.232022811436928e-06, "loss": 0.454, "step": 11834 }, { "epoch": 0.3486881840814342, "grad_norm": 1.5859399096248643, "learning_rate": 8.231630432166084e-06, "loss": 0.5478, "step": 11835 }, { "epoch": 0.3487176465388978, "grad_norm": 1.3370614039608293, "learning_rate": 8.231238018712036e-06, "loss": 0.3446, "step": 11836 }, { "epoch": 0.3487471089963614, "grad_norm": 1.538027818118012, "learning_rate": 8.23084557107894e-06, "loss": 0.5223, "step": 11837 }, { "epoch": 0.34877657145382496, "grad_norm": 1.51864132601254, "learning_rate": 8.230453089270942e-06, "loss": 0.6138, "step": 11838 }, { "epoch": 0.34880603391128856, "grad_norm": 1.4711668924413361, "learning_rate": 8.230060573292195e-06, "loss": 0.4368, "step": 11839 }, { "epoch": 0.3488354963687521, "grad_norm": 1.5458131568142066, "learning_rate": 8.229668023146853e-06, "loss": 0.5813, "step": 11840 }, { "epoch": 0.3488649588262157, "grad_norm": 1.594977406035926, "learning_rate": 8.229275438839068e-06, "loss": 0.4745, "step": 11841 }, { "epoch": 0.34889442128367926, "grad_norm": 1.5208004019437302, "learning_rate": 8.22888282037299e-06, "loss": 0.3969, "step": 11842 }, { "epoch": 0.34892388374114286, "grad_norm": 1.664090859102463, "learning_rate": 8.228490167752775e-06, "loss": 0.6031, "step": 11843 }, { "epoch": 0.3489533461986064, "grad_norm": 1.368334263878681, "learning_rate": 8.228097480982574e-06, "loss": 0.3785, "step": 11844 }, { "epoch": 0.34898280865607, "grad_norm": 1.5565909537513885, "learning_rate": 8.22770476006654e-06, "loss": 0.5878, "step": 11845 }, { "epoch": 0.34901227111353356, "grad_norm": 1.5413410075513116, "learning_rate": 8.227312005008829e-06, "loss": 0.4745, "step": 11846 }, { "epoch": 0.34904173357099716, "grad_norm": 1.6068311096162489, "learning_rate": 8.226919215813598e-06, "loss": 0.4671, "step": 11847 }, { "epoch": 0.3490711960284607, "grad_norm": 1.419238170134916, "learning_rate": 8.226526392484997e-06, "loss": 0.4218, "step": 11848 }, { "epoch": 0.3491006584859243, "grad_norm": 1.556886148287625, "learning_rate": 8.226133535027183e-06, "loss": 0.5049, "step": 11849 }, { "epoch": 0.3491301209433879, "grad_norm": 1.3768079442382175, "learning_rate": 8.225740643444314e-06, "loss": 0.4316, "step": 11850 }, { "epoch": 0.34915958340085146, "grad_norm": 1.5534668423745266, "learning_rate": 8.22534771774054e-06, "loss": 0.588, "step": 11851 }, { "epoch": 0.34918904585831506, "grad_norm": 1.6123560017250982, "learning_rate": 8.224954757920024e-06, "loss": 0.4954, "step": 11852 }, { "epoch": 0.3492185083157786, "grad_norm": 1.8414652206127893, "learning_rate": 8.224561763986917e-06, "loss": 0.5803, "step": 11853 }, { "epoch": 0.3492479707732422, "grad_norm": 1.6744088972677516, "learning_rate": 8.22416873594538e-06, "loss": 0.5744, "step": 11854 }, { "epoch": 0.34927743323070576, "grad_norm": 1.429030962501197, "learning_rate": 8.223775673799567e-06, "loss": 0.2894, "step": 11855 }, { "epoch": 0.34930689568816936, "grad_norm": 1.4536151724045554, "learning_rate": 8.223382577553637e-06, "loss": 0.4221, "step": 11856 }, { "epoch": 0.3493363581456329, "grad_norm": 1.6158049369112075, "learning_rate": 8.222989447211752e-06, "loss": 0.4543, "step": 11857 }, { "epoch": 0.3493658206030965, "grad_norm": 1.5761141234784133, "learning_rate": 8.222596282778066e-06, "loss": 0.3633, "step": 11858 }, { "epoch": 0.34939528306056006, "grad_norm": 1.6135797774666456, "learning_rate": 8.222203084256736e-06, "loss": 0.3944, "step": 11859 }, { "epoch": 0.34942474551802366, "grad_norm": 1.3797307110804906, "learning_rate": 8.221809851651926e-06, "loss": 0.4701, "step": 11860 }, { "epoch": 0.3494542079754872, "grad_norm": 1.7624701197885466, "learning_rate": 8.221416584967792e-06, "loss": 0.4727, "step": 11861 }, { "epoch": 0.3494836704329508, "grad_norm": 1.6189993582453497, "learning_rate": 8.221023284208496e-06, "loss": 0.4717, "step": 11862 }, { "epoch": 0.3495131328904144, "grad_norm": 1.7767742221098979, "learning_rate": 8.220629949378197e-06, "loss": 0.5369, "step": 11863 }, { "epoch": 0.34954259534787796, "grad_norm": 1.7051555166219399, "learning_rate": 8.220236580481055e-06, "loss": 0.4278, "step": 11864 }, { "epoch": 0.34957205780534156, "grad_norm": 1.5340399583350537, "learning_rate": 8.219843177521231e-06, "loss": 0.5686, "step": 11865 }, { "epoch": 0.3496015202628051, "grad_norm": 1.5378474131429551, "learning_rate": 8.21944974050289e-06, "loss": 0.44, "step": 11866 }, { "epoch": 0.3496309827202687, "grad_norm": 1.365309776449274, "learning_rate": 8.21905626943019e-06, "loss": 0.3815, "step": 11867 }, { "epoch": 0.34966044517773226, "grad_norm": 1.5622253097212122, "learning_rate": 8.218662764307292e-06, "loss": 0.4698, "step": 11868 }, { "epoch": 0.34968990763519586, "grad_norm": 1.5702474363809118, "learning_rate": 8.218269225138363e-06, "loss": 0.5, "step": 11869 }, { "epoch": 0.3497193700926594, "grad_norm": 1.6282127345021093, "learning_rate": 8.21787565192756e-06, "loss": 0.5291, "step": 11870 }, { "epoch": 0.349748832550123, "grad_norm": 1.4934216144488477, "learning_rate": 8.217482044679051e-06, "loss": 0.4273, "step": 11871 }, { "epoch": 0.34977829500758656, "grad_norm": 1.4541367123935007, "learning_rate": 8.217088403396998e-06, "loss": 0.4989, "step": 11872 }, { "epoch": 0.34980775746505016, "grad_norm": 1.653592978063116, "learning_rate": 8.216694728085563e-06, "loss": 0.3972, "step": 11873 }, { "epoch": 0.3498372199225137, "grad_norm": 1.568943595627908, "learning_rate": 8.21630101874891e-06, "loss": 0.5788, "step": 11874 }, { "epoch": 0.3498666823799773, "grad_norm": 1.5884265176743717, "learning_rate": 8.215907275391209e-06, "loss": 0.5946, "step": 11875 }, { "epoch": 0.3498961448374409, "grad_norm": 1.6725070104451543, "learning_rate": 8.215513498016618e-06, "loss": 0.4752, "step": 11876 }, { "epoch": 0.34992560729490446, "grad_norm": 1.4134809065043514, "learning_rate": 8.215119686629307e-06, "loss": 0.4225, "step": 11877 }, { "epoch": 0.34995506975236806, "grad_norm": 1.5921998637861123, "learning_rate": 8.21472584123344e-06, "loss": 0.5202, "step": 11878 }, { "epoch": 0.3499845322098316, "grad_norm": 1.64156211260154, "learning_rate": 8.21433196183318e-06, "loss": 0.5203, "step": 11879 }, { "epoch": 0.3500139946672952, "grad_norm": 1.5510303098916016, "learning_rate": 8.213938048432697e-06, "loss": 0.5561, "step": 11880 }, { "epoch": 0.35004345712475876, "grad_norm": 1.7811780716917567, "learning_rate": 8.213544101036157e-06, "loss": 0.6647, "step": 11881 }, { "epoch": 0.35007291958222236, "grad_norm": 1.6113565576386426, "learning_rate": 8.213150119647728e-06, "loss": 0.5697, "step": 11882 }, { "epoch": 0.3501023820396859, "grad_norm": 1.5866401138288115, "learning_rate": 8.212756104271575e-06, "loss": 0.4898, "step": 11883 }, { "epoch": 0.3501318444971495, "grad_norm": 1.5603159680653962, "learning_rate": 8.21236205491187e-06, "loss": 0.4976, "step": 11884 }, { "epoch": 0.35016130695461306, "grad_norm": 1.5032772477190355, "learning_rate": 8.211967971572774e-06, "loss": 0.4133, "step": 11885 }, { "epoch": 0.35019076941207666, "grad_norm": 1.4701681312910224, "learning_rate": 8.21157385425846e-06, "loss": 0.4594, "step": 11886 }, { "epoch": 0.35022023186954027, "grad_norm": 1.5747907941248318, "learning_rate": 8.2111797029731e-06, "loss": 0.3267, "step": 11887 }, { "epoch": 0.3502496943270038, "grad_norm": 1.3956258614707162, "learning_rate": 8.210785517720859e-06, "loss": 0.392, "step": 11888 }, { "epoch": 0.3502791567844674, "grad_norm": 1.48415597088779, "learning_rate": 8.210391298505904e-06, "loss": 0.4039, "step": 11889 }, { "epoch": 0.35030861924193096, "grad_norm": 1.5238856479505198, "learning_rate": 8.20999704533241e-06, "loss": 0.2874, "step": 11890 }, { "epoch": 0.35033808169939457, "grad_norm": 1.6496223322436614, "learning_rate": 8.209602758204547e-06, "loss": 0.4116, "step": 11891 }, { "epoch": 0.3503675441568581, "grad_norm": 1.589724602929676, "learning_rate": 8.209208437126483e-06, "loss": 0.5343, "step": 11892 }, { "epoch": 0.3503970066143217, "grad_norm": 1.4671634547835437, "learning_rate": 8.20881408210239e-06, "loss": 0.5207, "step": 11893 }, { "epoch": 0.35042646907178526, "grad_norm": 1.4620909369513424, "learning_rate": 8.20841969313644e-06, "loss": 0.4045, "step": 11894 }, { "epoch": 0.35045593152924887, "grad_norm": 1.5165227622547182, "learning_rate": 8.208025270232804e-06, "loss": 0.4628, "step": 11895 }, { "epoch": 0.3504853939867124, "grad_norm": 1.5542895118032478, "learning_rate": 8.207630813395652e-06, "loss": 0.4107, "step": 11896 }, { "epoch": 0.350514856444176, "grad_norm": 1.4943657131677892, "learning_rate": 8.207236322629164e-06, "loss": 0.5483, "step": 11897 }, { "epoch": 0.35054431890163956, "grad_norm": 1.6845960029129057, "learning_rate": 8.206841797937504e-06, "loss": 0.5068, "step": 11898 }, { "epoch": 0.35057378135910316, "grad_norm": 1.686185746968822, "learning_rate": 8.20644723932485e-06, "loss": 0.6159, "step": 11899 }, { "epoch": 0.35060324381656677, "grad_norm": 1.3227657032343454, "learning_rate": 8.206052646795373e-06, "loss": 0.3947, "step": 11900 }, { "epoch": 0.3506327062740303, "grad_norm": 1.4718907636408185, "learning_rate": 8.20565802035325e-06, "loss": 0.5082, "step": 11901 }, { "epoch": 0.3506621687314939, "grad_norm": 1.3226085489501684, "learning_rate": 8.205263360002653e-06, "loss": 0.3703, "step": 11902 }, { "epoch": 0.35069163118895746, "grad_norm": 1.5370845332568577, "learning_rate": 8.204868665747757e-06, "loss": 0.5599, "step": 11903 }, { "epoch": 0.35072109364642107, "grad_norm": 1.580033064798265, "learning_rate": 8.204473937592736e-06, "loss": 0.678, "step": 11904 }, { "epoch": 0.3507505561038846, "grad_norm": 1.3946959117193811, "learning_rate": 8.204079175541765e-06, "loss": 0.3964, "step": 11905 }, { "epoch": 0.3507800185613482, "grad_norm": 1.498209308737662, "learning_rate": 8.203684379599023e-06, "loss": 0.4116, "step": 11906 }, { "epoch": 0.35080948101881176, "grad_norm": 1.4516330502285026, "learning_rate": 8.203289549768684e-06, "loss": 0.3991, "step": 11907 }, { "epoch": 0.35083894347627537, "grad_norm": 1.4174204839451474, "learning_rate": 8.202894686054924e-06, "loss": 0.495, "step": 11908 }, { "epoch": 0.3508684059337389, "grad_norm": 1.5930444060036746, "learning_rate": 8.202499788461922e-06, "loss": 0.3314, "step": 11909 }, { "epoch": 0.3508978683912025, "grad_norm": 1.402170774276973, "learning_rate": 8.202104856993851e-06, "loss": 0.3208, "step": 11910 }, { "epoch": 0.35092733084866606, "grad_norm": 1.5315792640447028, "learning_rate": 8.20170989165489e-06, "loss": 0.4712, "step": 11911 }, { "epoch": 0.35095679330612967, "grad_norm": 1.6818651418705937, "learning_rate": 8.20131489244922e-06, "loss": 0.5292, "step": 11912 }, { "epoch": 0.35098625576359327, "grad_norm": 1.551825196712747, "learning_rate": 8.200919859381014e-06, "loss": 0.4956, "step": 11913 }, { "epoch": 0.3510157182210568, "grad_norm": 1.4172633600776707, "learning_rate": 8.200524792454455e-06, "loss": 0.4505, "step": 11914 }, { "epoch": 0.3510451806785204, "grad_norm": 1.5509680403442914, "learning_rate": 8.200129691673719e-06, "loss": 0.5501, "step": 11915 }, { "epoch": 0.35107464313598397, "grad_norm": 1.517523156408311, "learning_rate": 8.199734557042988e-06, "loss": 0.4781, "step": 11916 }, { "epoch": 0.35110410559344757, "grad_norm": 1.7464729187959236, "learning_rate": 8.19933938856644e-06, "loss": 0.6097, "step": 11917 }, { "epoch": 0.3511335680509111, "grad_norm": 1.8566707045249462, "learning_rate": 8.198944186248255e-06, "loss": 0.4292, "step": 11918 }, { "epoch": 0.3511630305083747, "grad_norm": 1.6730032909464483, "learning_rate": 8.19854895009261e-06, "loss": 0.5099, "step": 11919 }, { "epoch": 0.35119249296583827, "grad_norm": 1.411198341322674, "learning_rate": 8.198153680103692e-06, "loss": 0.4121, "step": 11920 }, { "epoch": 0.35122195542330187, "grad_norm": 1.5502617875837787, "learning_rate": 8.197758376285679e-06, "loss": 0.5628, "step": 11921 }, { "epoch": 0.3512514178807654, "grad_norm": 1.5473873361004489, "learning_rate": 8.197363038642752e-06, "loss": 0.4152, "step": 11922 }, { "epoch": 0.351280880338229, "grad_norm": 1.4236778568632151, "learning_rate": 8.196967667179092e-06, "loss": 0.4076, "step": 11923 }, { "epoch": 0.35131034279569257, "grad_norm": 1.639409031233977, "learning_rate": 8.196572261898885e-06, "loss": 0.4847, "step": 11924 }, { "epoch": 0.35133980525315617, "grad_norm": 1.735106122245211, "learning_rate": 8.196176822806308e-06, "loss": 0.5041, "step": 11925 }, { "epoch": 0.35136926771061977, "grad_norm": 1.5165966576104273, "learning_rate": 8.195781349905546e-06, "loss": 0.5447, "step": 11926 }, { "epoch": 0.3513987301680833, "grad_norm": 1.5204712370867826, "learning_rate": 8.195385843200787e-06, "loss": 0.4287, "step": 11927 }, { "epoch": 0.3514281926255469, "grad_norm": 1.4684486716187144, "learning_rate": 8.194990302696209e-06, "loss": 0.4934, "step": 11928 }, { "epoch": 0.35145765508301047, "grad_norm": 1.5543038431639784, "learning_rate": 8.194594728395994e-06, "loss": 0.5802, "step": 11929 }, { "epoch": 0.35148711754047407, "grad_norm": 1.7877552842302777, "learning_rate": 8.194199120304334e-06, "loss": 0.61, "step": 11930 }, { "epoch": 0.3515165799979376, "grad_norm": 1.723816268742352, "learning_rate": 8.193803478425406e-06, "loss": 0.5081, "step": 11931 }, { "epoch": 0.3515460424554012, "grad_norm": 1.5546956803157828, "learning_rate": 8.1934078027634e-06, "loss": 0.5046, "step": 11932 }, { "epoch": 0.35157550491286477, "grad_norm": 1.6803146580439678, "learning_rate": 8.1930120933225e-06, "loss": 0.5852, "step": 11933 }, { "epoch": 0.35160496737032837, "grad_norm": 1.7808717516241903, "learning_rate": 8.192616350106888e-06, "loss": 0.6126, "step": 11934 }, { "epoch": 0.3516344298277919, "grad_norm": 1.3641449623381638, "learning_rate": 8.192220573120755e-06, "loss": 0.3387, "step": 11935 }, { "epoch": 0.3516638922852555, "grad_norm": 1.4381372705574365, "learning_rate": 8.191824762368286e-06, "loss": 0.4537, "step": 11936 }, { "epoch": 0.35169335474271907, "grad_norm": 1.39867263479295, "learning_rate": 8.191428917853668e-06, "loss": 0.4397, "step": 11937 }, { "epoch": 0.35172281720018267, "grad_norm": 1.4985217378913038, "learning_rate": 8.191033039581086e-06, "loss": 0.4867, "step": 11938 }, { "epoch": 0.35175227965764627, "grad_norm": 1.5026149128111213, "learning_rate": 8.190637127554731e-06, "loss": 0.4918, "step": 11939 }, { "epoch": 0.3517817421151098, "grad_norm": 1.8726903105187787, "learning_rate": 8.190241181778787e-06, "loss": 0.4988, "step": 11940 }, { "epoch": 0.3518112045725734, "grad_norm": 1.4412415865420902, "learning_rate": 8.189845202257443e-06, "loss": 0.5086, "step": 11941 }, { "epoch": 0.35184066703003697, "grad_norm": 1.6112899740928843, "learning_rate": 8.189449188994891e-06, "loss": 0.4196, "step": 11942 }, { "epoch": 0.35187012948750057, "grad_norm": 1.6612707117699135, "learning_rate": 8.189053141995316e-06, "loss": 0.6265, "step": 11943 }, { "epoch": 0.3518995919449641, "grad_norm": 1.6518348941565717, "learning_rate": 8.18865706126291e-06, "loss": 0.4712, "step": 11944 }, { "epoch": 0.3519290544024277, "grad_norm": 1.5066852170427787, "learning_rate": 8.188260946801861e-06, "loss": 0.4024, "step": 11945 }, { "epoch": 0.35195851685989127, "grad_norm": 1.7487832226226465, "learning_rate": 8.18786479861636e-06, "loss": 0.4298, "step": 11946 }, { "epoch": 0.35198797931735487, "grad_norm": 1.491149933983025, "learning_rate": 8.187468616710596e-06, "loss": 0.4514, "step": 11947 }, { "epoch": 0.3520174417748184, "grad_norm": 1.6360080028025439, "learning_rate": 8.187072401088759e-06, "loss": 0.5373, "step": 11948 }, { "epoch": 0.352046904232282, "grad_norm": 1.5735632224526417, "learning_rate": 8.186676151755041e-06, "loss": 0.6028, "step": 11949 }, { "epoch": 0.35207636668974557, "grad_norm": 1.3795945329994914, "learning_rate": 8.186279868713637e-06, "loss": 0.4424, "step": 11950 }, { "epoch": 0.35210582914720917, "grad_norm": 1.5377116753482525, "learning_rate": 8.185883551968735e-06, "loss": 0.5228, "step": 11951 }, { "epoch": 0.3521352916046728, "grad_norm": 1.5304453221458345, "learning_rate": 8.185487201524526e-06, "loss": 0.5938, "step": 11952 }, { "epoch": 0.3521647540621363, "grad_norm": 1.3965799204436586, "learning_rate": 8.185090817385207e-06, "loss": 0.49, "step": 11953 }, { "epoch": 0.3521942165195999, "grad_norm": 1.766169497031729, "learning_rate": 8.184694399554964e-06, "loss": 0.5851, "step": 11954 }, { "epoch": 0.35222367897706347, "grad_norm": 1.5158868000505832, "learning_rate": 8.184297948037997e-06, "loss": 0.4675, "step": 11955 }, { "epoch": 0.3522531414345271, "grad_norm": 1.5174669242838057, "learning_rate": 8.183901462838495e-06, "loss": 0.3761, "step": 11956 }, { "epoch": 0.3522826038919906, "grad_norm": 1.3527964204945955, "learning_rate": 8.183504943960655e-06, "loss": 0.4169, "step": 11957 }, { "epoch": 0.3523120663494542, "grad_norm": 1.5116653210739146, "learning_rate": 8.18310839140867e-06, "loss": 0.4482, "step": 11958 }, { "epoch": 0.35234152880691777, "grad_norm": 1.8547606664857408, "learning_rate": 8.182711805186733e-06, "loss": 0.6171, "step": 11959 }, { "epoch": 0.3523709912643814, "grad_norm": 1.5975393772858508, "learning_rate": 8.182315185299042e-06, "loss": 0.4965, "step": 11960 }, { "epoch": 0.3524004537218449, "grad_norm": 1.5736315480430754, "learning_rate": 8.181918531749791e-06, "loss": 0.412, "step": 11961 }, { "epoch": 0.3524299161793085, "grad_norm": 1.4760564825080118, "learning_rate": 8.181521844543174e-06, "loss": 0.404, "step": 11962 }, { "epoch": 0.35245937863677207, "grad_norm": 1.3804934939163989, "learning_rate": 8.18112512368339e-06, "loss": 0.4039, "step": 11963 }, { "epoch": 0.35248884109423567, "grad_norm": 1.4752412054462216, "learning_rate": 8.180728369174634e-06, "loss": 0.4166, "step": 11964 }, { "epoch": 0.3525183035516993, "grad_norm": 1.5020728394512535, "learning_rate": 8.180331581021102e-06, "loss": 0.455, "step": 11965 }, { "epoch": 0.3525477660091628, "grad_norm": 1.7784904650925373, "learning_rate": 8.17993475922699e-06, "loss": 0.6302, "step": 11966 }, { "epoch": 0.3525772284666264, "grad_norm": 1.4270502376489318, "learning_rate": 8.179537903796499e-06, "loss": 0.3383, "step": 11967 }, { "epoch": 0.35260669092408997, "grad_norm": 2.076185698848112, "learning_rate": 8.179141014733826e-06, "loss": 0.4416, "step": 11968 }, { "epoch": 0.3526361533815536, "grad_norm": 1.5975107724423332, "learning_rate": 8.178744092043168e-06, "loss": 0.5929, "step": 11969 }, { "epoch": 0.3526656158390171, "grad_norm": 1.4487971806849762, "learning_rate": 8.178347135728722e-06, "loss": 0.4287, "step": 11970 }, { "epoch": 0.3526950782964807, "grad_norm": 1.6124127692508263, "learning_rate": 8.177950145794689e-06, "loss": 0.5698, "step": 11971 }, { "epoch": 0.35272454075394427, "grad_norm": 1.5340010756677849, "learning_rate": 8.17755312224527e-06, "loss": 0.4013, "step": 11972 }, { "epoch": 0.3527540032114079, "grad_norm": 1.4288122048634888, "learning_rate": 8.17715606508466e-06, "loss": 0.398, "step": 11973 }, { "epoch": 0.3527834656688714, "grad_norm": 1.4318293409653284, "learning_rate": 8.176758974317062e-06, "loss": 0.4693, "step": 11974 }, { "epoch": 0.352812928126335, "grad_norm": 1.6392149946798118, "learning_rate": 8.176361849946676e-06, "loss": 0.4301, "step": 11975 }, { "epoch": 0.35284239058379857, "grad_norm": 1.3849642718236554, "learning_rate": 8.175964691977702e-06, "loss": 0.4652, "step": 11976 }, { "epoch": 0.3528718530412622, "grad_norm": 1.6292479607964927, "learning_rate": 8.175567500414343e-06, "loss": 0.6183, "step": 11977 }, { "epoch": 0.3529013154987258, "grad_norm": 1.6533982455954963, "learning_rate": 8.175170275260797e-06, "loss": 0.4769, "step": 11978 }, { "epoch": 0.3529307779561893, "grad_norm": 1.6417267009247989, "learning_rate": 8.174773016521269e-06, "loss": 0.5043, "step": 11979 }, { "epoch": 0.3529602404136529, "grad_norm": 1.5315905641056802, "learning_rate": 8.174375724199959e-06, "loss": 0.4225, "step": 11980 }, { "epoch": 0.3529897028711165, "grad_norm": 1.6430209163136231, "learning_rate": 8.17397839830107e-06, "loss": 0.5696, "step": 11981 }, { "epoch": 0.3530191653285801, "grad_norm": 1.6562554378154217, "learning_rate": 8.173581038828804e-06, "loss": 0.4454, "step": 11982 }, { "epoch": 0.3530486277860436, "grad_norm": 1.5478084473655414, "learning_rate": 8.173183645787365e-06, "loss": 0.5515, "step": 11983 }, { "epoch": 0.3530780902435072, "grad_norm": 1.6768113398490498, "learning_rate": 8.172786219180957e-06, "loss": 0.5214, "step": 11984 }, { "epoch": 0.3531075527009708, "grad_norm": 1.434147789757127, "learning_rate": 8.172388759013783e-06, "loss": 0.4567, "step": 11985 }, { "epoch": 0.3531370151584344, "grad_norm": 1.6302495170024072, "learning_rate": 8.171991265290048e-06, "loss": 0.4669, "step": 11986 }, { "epoch": 0.3531664776158979, "grad_norm": 1.6734950434511964, "learning_rate": 8.171593738013956e-06, "loss": 0.451, "step": 11987 }, { "epoch": 0.3531959400733615, "grad_norm": 1.4533192326726154, "learning_rate": 8.171196177189712e-06, "loss": 0.4979, "step": 11988 }, { "epoch": 0.35322540253082507, "grad_norm": 1.39468281044274, "learning_rate": 8.170798582821522e-06, "loss": 0.4257, "step": 11989 }, { "epoch": 0.3532548649882887, "grad_norm": 1.5183530159314436, "learning_rate": 8.170400954913588e-06, "loss": 0.5158, "step": 11990 }, { "epoch": 0.3532843274457523, "grad_norm": 1.2748563721731079, "learning_rate": 8.17000329347012e-06, "loss": 0.4031, "step": 11991 }, { "epoch": 0.3533137899032158, "grad_norm": 1.545538476559438, "learning_rate": 8.169605598495328e-06, "loss": 0.5461, "step": 11992 }, { "epoch": 0.3533432523606794, "grad_norm": 1.526492752695001, "learning_rate": 8.169207869993409e-06, "loss": 0.3282, "step": 11993 }, { "epoch": 0.353372714818143, "grad_norm": 1.522355968347903, "learning_rate": 8.168810107968577e-06, "loss": 0.4903, "step": 11994 }, { "epoch": 0.3534021772756066, "grad_norm": 1.7628753983176553, "learning_rate": 8.168412312425035e-06, "loss": 0.6419, "step": 11995 }, { "epoch": 0.3534316397330701, "grad_norm": 1.6637734603337604, "learning_rate": 8.168014483366995e-06, "loss": 0.6653, "step": 11996 }, { "epoch": 0.3534611021905337, "grad_norm": 1.504066577347042, "learning_rate": 8.167616620798663e-06, "loss": 0.3842, "step": 11997 }, { "epoch": 0.3534905646479973, "grad_norm": 1.4170719259905773, "learning_rate": 8.167218724724249e-06, "loss": 0.3481, "step": 11998 }, { "epoch": 0.3535200271054609, "grad_norm": 1.5661705428552073, "learning_rate": 8.166820795147958e-06, "loss": 0.4972, "step": 11999 }, { "epoch": 0.3535494895629244, "grad_norm": 1.6111158202518647, "learning_rate": 8.166422832074004e-06, "loss": 0.4846, "step": 12000 }, { "epoch": 0.353578952020388, "grad_norm": 1.8161673572548902, "learning_rate": 8.166024835506592e-06, "loss": 0.6448, "step": 12001 }, { "epoch": 0.3536084144778516, "grad_norm": 1.5337566297851573, "learning_rate": 8.165626805449936e-06, "loss": 0.3831, "step": 12002 }, { "epoch": 0.3536378769353152, "grad_norm": 1.5891012239702895, "learning_rate": 8.165228741908245e-06, "loss": 0.5693, "step": 12003 }, { "epoch": 0.3536673393927788, "grad_norm": 1.4181662899289007, "learning_rate": 8.164830644885728e-06, "loss": 0.5085, "step": 12004 }, { "epoch": 0.3536968018502423, "grad_norm": 1.5475308687187888, "learning_rate": 8.164432514386595e-06, "loss": 0.4914, "step": 12005 }, { "epoch": 0.35372626430770593, "grad_norm": 1.5959081549803622, "learning_rate": 8.164034350415062e-06, "loss": 0.5564, "step": 12006 }, { "epoch": 0.3537557267651695, "grad_norm": 1.5632091652969022, "learning_rate": 8.163636152975339e-06, "loss": 0.333, "step": 12007 }, { "epoch": 0.3537851892226331, "grad_norm": 1.4109077517748747, "learning_rate": 8.163237922071635e-06, "loss": 0.5119, "step": 12008 }, { "epoch": 0.3538146516800966, "grad_norm": 1.594694243527755, "learning_rate": 8.162839657708164e-06, "loss": 0.5456, "step": 12009 }, { "epoch": 0.35384411413756023, "grad_norm": 1.4762475569108433, "learning_rate": 8.162441359889138e-06, "loss": 0.4463, "step": 12010 }, { "epoch": 0.3538735765950238, "grad_norm": 1.6541125572840956, "learning_rate": 8.162043028618773e-06, "loss": 0.5632, "step": 12011 }, { "epoch": 0.3539030390524874, "grad_norm": 1.798590314228321, "learning_rate": 8.16164466390128e-06, "loss": 0.5285, "step": 12012 }, { "epoch": 0.3539325015099509, "grad_norm": 1.647274290753267, "learning_rate": 8.161246265740873e-06, "loss": 0.4564, "step": 12013 }, { "epoch": 0.35396196396741453, "grad_norm": 1.476764664103738, "learning_rate": 8.160847834141767e-06, "loss": 0.3056, "step": 12014 }, { "epoch": 0.3539914264248781, "grad_norm": 1.7037875806258336, "learning_rate": 8.160449369108174e-06, "loss": 0.3836, "step": 12015 }, { "epoch": 0.3540208888823417, "grad_norm": 1.4298377772476027, "learning_rate": 8.160050870644314e-06, "loss": 0.5223, "step": 12016 }, { "epoch": 0.3540503513398053, "grad_norm": 1.785482966287881, "learning_rate": 8.159652338754396e-06, "loss": 0.5986, "step": 12017 }, { "epoch": 0.3540798137972688, "grad_norm": 1.5317430359878845, "learning_rate": 8.15925377344264e-06, "loss": 0.4434, "step": 12018 }, { "epoch": 0.35410927625473243, "grad_norm": 1.3865544889377117, "learning_rate": 8.15885517471326e-06, "loss": 0.4719, "step": 12019 }, { "epoch": 0.354138738712196, "grad_norm": 1.6636165385772885, "learning_rate": 8.158456542570472e-06, "loss": 0.6026, "step": 12020 }, { "epoch": 0.3541682011696596, "grad_norm": 1.8438794331839965, "learning_rate": 8.158057877018494e-06, "loss": 0.6898, "step": 12021 }, { "epoch": 0.3541976636271231, "grad_norm": 1.6068968802819816, "learning_rate": 8.157659178061543e-06, "loss": 0.6909, "step": 12022 }, { "epoch": 0.35422712608458673, "grad_norm": 1.6462783065913908, "learning_rate": 8.157260445703835e-06, "loss": 0.4811, "step": 12023 }, { "epoch": 0.3542565885420503, "grad_norm": 1.6425752355839465, "learning_rate": 8.156861679949588e-06, "loss": 0.4852, "step": 12024 }, { "epoch": 0.3542860509995139, "grad_norm": 1.4173409490045201, "learning_rate": 8.15646288080302e-06, "loss": 0.4092, "step": 12025 }, { "epoch": 0.3543155134569774, "grad_norm": 1.3212696850478418, "learning_rate": 8.15606404826835e-06, "loss": 0.3793, "step": 12026 }, { "epoch": 0.35434497591444103, "grad_norm": 1.4546583890928706, "learning_rate": 8.155665182349796e-06, "loss": 0.437, "step": 12027 }, { "epoch": 0.3543744383719046, "grad_norm": 1.4802945223859776, "learning_rate": 8.155266283051577e-06, "loss": 0.4355, "step": 12028 }, { "epoch": 0.3544039008293682, "grad_norm": 1.4566386269709788, "learning_rate": 8.154867350377915e-06, "loss": 0.3655, "step": 12029 }, { "epoch": 0.3544333632868318, "grad_norm": 1.6439830457714506, "learning_rate": 8.154468384333025e-06, "loss": 0.6754, "step": 12030 }, { "epoch": 0.35446282574429533, "grad_norm": 1.5581057474836726, "learning_rate": 8.154069384921132e-06, "loss": 0.5144, "step": 12031 }, { "epoch": 0.35449228820175893, "grad_norm": 1.504966867538981, "learning_rate": 8.153670352146452e-06, "loss": 0.4763, "step": 12032 }, { "epoch": 0.3545217506592225, "grad_norm": 1.5573313062121013, "learning_rate": 8.15327128601321e-06, "loss": 0.4811, "step": 12033 }, { "epoch": 0.3545512131166861, "grad_norm": 1.7412978062850146, "learning_rate": 8.152872186525625e-06, "loss": 0.5224, "step": 12034 }, { "epoch": 0.35458067557414963, "grad_norm": 1.4318761457952083, "learning_rate": 8.152473053687918e-06, "loss": 0.3406, "step": 12035 }, { "epoch": 0.35461013803161323, "grad_norm": 1.5534474681505999, "learning_rate": 8.152073887504312e-06, "loss": 0.5558, "step": 12036 }, { "epoch": 0.3546396004890768, "grad_norm": 1.4601781187341176, "learning_rate": 8.151674687979029e-06, "loss": 0.4268, "step": 12037 }, { "epoch": 0.3546690629465404, "grad_norm": 1.4570160204136626, "learning_rate": 8.151275455116292e-06, "loss": 0.4033, "step": 12038 }, { "epoch": 0.35469852540400393, "grad_norm": 1.648895134420861, "learning_rate": 8.150876188920324e-06, "loss": 0.5603, "step": 12039 }, { "epoch": 0.35472798786146753, "grad_norm": 1.6382827429117026, "learning_rate": 8.150476889395347e-06, "loss": 0.4472, "step": 12040 }, { "epoch": 0.3547574503189311, "grad_norm": 1.8849413174990035, "learning_rate": 8.150077556545589e-06, "loss": 0.6843, "step": 12041 }, { "epoch": 0.3547869127763947, "grad_norm": 1.422709490126246, "learning_rate": 8.149678190375265e-06, "loss": 0.3247, "step": 12042 }, { "epoch": 0.3548163752338583, "grad_norm": 1.5163883844682533, "learning_rate": 8.149278790888608e-06, "loss": 0.4798, "step": 12043 }, { "epoch": 0.35484583769132183, "grad_norm": 1.513512315182402, "learning_rate": 8.14887935808984e-06, "loss": 0.4485, "step": 12044 }, { "epoch": 0.35487530014878543, "grad_norm": 1.3806844239233313, "learning_rate": 8.148479891983186e-06, "loss": 0.3804, "step": 12045 }, { "epoch": 0.354904762606249, "grad_norm": 1.4919708615187242, "learning_rate": 8.14808039257287e-06, "loss": 0.5393, "step": 12046 }, { "epoch": 0.3549342250637126, "grad_norm": 1.5546397313151925, "learning_rate": 8.14768085986312e-06, "loss": 0.5159, "step": 12047 }, { "epoch": 0.35496368752117613, "grad_norm": 1.7709750184580897, "learning_rate": 8.147281293858161e-06, "loss": 0.3858, "step": 12048 }, { "epoch": 0.35499314997863973, "grad_norm": 1.6496201470941687, "learning_rate": 8.146881694562219e-06, "loss": 0.4658, "step": 12049 }, { "epoch": 0.3550226124361033, "grad_norm": 1.6587371454676996, "learning_rate": 8.146482061979521e-06, "loss": 0.5519, "step": 12050 }, { "epoch": 0.3550520748935669, "grad_norm": 1.6091487670212565, "learning_rate": 8.146082396114296e-06, "loss": 0.5414, "step": 12051 }, { "epoch": 0.35508153735103043, "grad_norm": 1.46238629653398, "learning_rate": 8.145682696970769e-06, "loss": 0.5354, "step": 12052 }, { "epoch": 0.35511099980849403, "grad_norm": 1.2239852177028399, "learning_rate": 8.145282964553169e-06, "loss": 0.3489, "step": 12053 }, { "epoch": 0.3551404622659576, "grad_norm": 1.6098318389612338, "learning_rate": 8.144883198865727e-06, "loss": 0.5464, "step": 12054 }, { "epoch": 0.3551699247234212, "grad_norm": 1.6206704141351995, "learning_rate": 8.144483399912665e-06, "loss": 0.5319, "step": 12055 }, { "epoch": 0.3551993871808848, "grad_norm": 1.5461007185098148, "learning_rate": 8.14408356769822e-06, "loss": 0.4738, "step": 12056 }, { "epoch": 0.35522884963834833, "grad_norm": 1.711565721730303, "learning_rate": 8.143683702226615e-06, "loss": 0.5383, "step": 12057 }, { "epoch": 0.35525831209581193, "grad_norm": 1.36354940520069, "learning_rate": 8.143283803502082e-06, "loss": 0.4957, "step": 12058 }, { "epoch": 0.3552877745532755, "grad_norm": 1.3209155066133997, "learning_rate": 8.14288387152885e-06, "loss": 0.3803, "step": 12059 }, { "epoch": 0.3553172370107391, "grad_norm": 1.4970636761761613, "learning_rate": 8.142483906311151e-06, "loss": 0.4899, "step": 12060 }, { "epoch": 0.35534669946820263, "grad_norm": 1.8121003538560263, "learning_rate": 8.142083907853216e-06, "loss": 0.4286, "step": 12061 }, { "epoch": 0.35537616192566623, "grad_norm": 1.678480163553365, "learning_rate": 8.141683876159275e-06, "loss": 0.5262, "step": 12062 }, { "epoch": 0.3554056243831298, "grad_norm": 1.4959579539428207, "learning_rate": 8.141283811233558e-06, "loss": 0.4554, "step": 12063 }, { "epoch": 0.3554350868405934, "grad_norm": 1.3987117880983393, "learning_rate": 8.1408837130803e-06, "loss": 0.4776, "step": 12064 }, { "epoch": 0.35546454929805693, "grad_norm": 1.5153432209352482, "learning_rate": 8.140483581703728e-06, "loss": 0.3351, "step": 12065 }, { "epoch": 0.35549401175552053, "grad_norm": 1.6896387619360056, "learning_rate": 8.14008341710808e-06, "loss": 0.3786, "step": 12066 }, { "epoch": 0.3555234742129841, "grad_norm": 1.4124583939820545, "learning_rate": 8.139683219297587e-06, "loss": 0.4336, "step": 12067 }, { "epoch": 0.3555529366704477, "grad_norm": 1.5581179510131264, "learning_rate": 8.139282988276482e-06, "loss": 0.6031, "step": 12068 }, { "epoch": 0.3555823991279113, "grad_norm": 1.5344064885933488, "learning_rate": 8.138882724048996e-06, "loss": 0.5005, "step": 12069 }, { "epoch": 0.35561186158537483, "grad_norm": 1.4825776262510413, "learning_rate": 8.138482426619366e-06, "loss": 0.4791, "step": 12070 }, { "epoch": 0.35564132404283844, "grad_norm": 1.698159292321218, "learning_rate": 8.138082095991826e-06, "loss": 0.4677, "step": 12071 }, { "epoch": 0.355670786500302, "grad_norm": 1.4961183765847628, "learning_rate": 8.13768173217061e-06, "loss": 0.3938, "step": 12072 }, { "epoch": 0.3557002489577656, "grad_norm": 1.3816731248959506, "learning_rate": 8.137281335159953e-06, "loss": 0.4491, "step": 12073 }, { "epoch": 0.35572971141522913, "grad_norm": 1.7031739374445605, "learning_rate": 8.13688090496409e-06, "loss": 0.4604, "step": 12074 }, { "epoch": 0.35575917387269274, "grad_norm": 1.528158565085024, "learning_rate": 8.136480441587259e-06, "loss": 0.533, "step": 12075 }, { "epoch": 0.3557886363301563, "grad_norm": 1.8431674943674574, "learning_rate": 8.136079945033691e-06, "loss": 0.6037, "step": 12076 }, { "epoch": 0.3558180987876199, "grad_norm": 1.5242794174289938, "learning_rate": 8.135679415307625e-06, "loss": 0.5009, "step": 12077 }, { "epoch": 0.35584756124508343, "grad_norm": 1.727182164658452, "learning_rate": 8.135278852413297e-06, "loss": 0.5412, "step": 12078 }, { "epoch": 0.35587702370254704, "grad_norm": 1.8655962912284576, "learning_rate": 8.134878256354947e-06, "loss": 0.4561, "step": 12079 }, { "epoch": 0.3559064861600106, "grad_norm": 1.5780400823895409, "learning_rate": 8.13447762713681e-06, "loss": 0.5143, "step": 12080 }, { "epoch": 0.3559359486174742, "grad_norm": 1.4565140153523102, "learning_rate": 8.134076964763122e-06, "loss": 0.3955, "step": 12081 }, { "epoch": 0.3559654110749378, "grad_norm": 1.6733041232562471, "learning_rate": 8.133676269238124e-06, "loss": 0.5875, "step": 12082 }, { "epoch": 0.35599487353240133, "grad_norm": 1.5620374990321004, "learning_rate": 8.133275540566055e-06, "loss": 0.5042, "step": 12083 }, { "epoch": 0.35602433598986494, "grad_norm": 1.4526367950049484, "learning_rate": 8.13287477875115e-06, "loss": 0.4185, "step": 12084 }, { "epoch": 0.3560537984473285, "grad_norm": 1.7045756166982031, "learning_rate": 8.13247398379765e-06, "loss": 0.4534, "step": 12085 }, { "epoch": 0.3560832609047921, "grad_norm": 1.6530433214296798, "learning_rate": 8.132073155709798e-06, "loss": 0.5189, "step": 12086 }, { "epoch": 0.35611272336225563, "grad_norm": 1.530739308262788, "learning_rate": 8.131672294491828e-06, "loss": 0.4896, "step": 12087 }, { "epoch": 0.35614218581971924, "grad_norm": 1.4502911828332212, "learning_rate": 8.131271400147983e-06, "loss": 0.4475, "step": 12088 }, { "epoch": 0.3561716482771828, "grad_norm": 1.5606320158347453, "learning_rate": 8.130870472682506e-06, "loss": 0.5534, "step": 12089 }, { "epoch": 0.3562011107346464, "grad_norm": 1.7044178803660135, "learning_rate": 8.130469512099633e-06, "loss": 0.4765, "step": 12090 }, { "epoch": 0.35623057319210993, "grad_norm": 1.4618202075063131, "learning_rate": 8.13006851840361e-06, "loss": 0.4563, "step": 12091 }, { "epoch": 0.35626003564957354, "grad_norm": 1.4621191651584826, "learning_rate": 8.129667491598672e-06, "loss": 0.4221, "step": 12092 }, { "epoch": 0.3562894981070371, "grad_norm": 1.6498449892917209, "learning_rate": 8.129266431689067e-06, "loss": 0.306, "step": 12093 }, { "epoch": 0.3563189605645007, "grad_norm": 1.4403293482270039, "learning_rate": 8.128865338679035e-06, "loss": 0.4805, "step": 12094 }, { "epoch": 0.3563484230219643, "grad_norm": 1.4943771618187736, "learning_rate": 8.12846421257282e-06, "loss": 0.4522, "step": 12095 }, { "epoch": 0.35637788547942784, "grad_norm": 1.8722703396840992, "learning_rate": 8.128063053374665e-06, "loss": 0.7313, "step": 12096 }, { "epoch": 0.35640734793689144, "grad_norm": 1.4813869617141955, "learning_rate": 8.12766186108881e-06, "loss": 0.5457, "step": 12097 }, { "epoch": 0.356436810394355, "grad_norm": 1.7400009437417763, "learning_rate": 8.1272606357195e-06, "loss": 0.6131, "step": 12098 }, { "epoch": 0.3564662728518186, "grad_norm": 1.7806344452419218, "learning_rate": 8.126859377270983e-06, "loss": 0.6194, "step": 12099 }, { "epoch": 0.35649573530928214, "grad_norm": 1.5930169542234554, "learning_rate": 8.1264580857475e-06, "loss": 0.5446, "step": 12100 }, { "epoch": 0.35652519776674574, "grad_norm": 1.5825115429634566, "learning_rate": 8.126056761153296e-06, "loss": 0.4138, "step": 12101 }, { "epoch": 0.3565546602242093, "grad_norm": 1.602269867162237, "learning_rate": 8.125655403492616e-06, "loss": 0.496, "step": 12102 }, { "epoch": 0.3565841226816729, "grad_norm": 1.6300712615607156, "learning_rate": 8.125254012769705e-06, "loss": 0.4936, "step": 12103 }, { "epoch": 0.35661358513913644, "grad_norm": 1.5351965586891556, "learning_rate": 8.12485258898881e-06, "loss": 0.4549, "step": 12104 }, { "epoch": 0.35664304759660004, "grad_norm": 1.6948779036442008, "learning_rate": 8.124451132154176e-06, "loss": 0.5809, "step": 12105 }, { "epoch": 0.3566725100540636, "grad_norm": 1.7076907996045874, "learning_rate": 8.124049642270052e-06, "loss": 0.3637, "step": 12106 }, { "epoch": 0.3567019725115272, "grad_norm": 1.4446878438161197, "learning_rate": 8.123648119340682e-06, "loss": 0.5127, "step": 12107 }, { "epoch": 0.3567314349689908, "grad_norm": 1.5525817250530873, "learning_rate": 8.123246563370314e-06, "loss": 0.4851, "step": 12108 }, { "epoch": 0.35676089742645434, "grad_norm": 1.5975150258326216, "learning_rate": 8.122844974363195e-06, "loss": 0.5685, "step": 12109 }, { "epoch": 0.35679035988391794, "grad_norm": 1.4828246275158696, "learning_rate": 8.122443352323574e-06, "loss": 0.4692, "step": 12110 }, { "epoch": 0.3568198223413815, "grad_norm": 1.352386810553229, "learning_rate": 8.122041697255701e-06, "loss": 0.4173, "step": 12111 }, { "epoch": 0.3568492847988451, "grad_norm": 1.7697995368385477, "learning_rate": 8.121640009163819e-06, "loss": 0.4882, "step": 12112 }, { "epoch": 0.35687874725630864, "grad_norm": 1.3962526978379128, "learning_rate": 8.121238288052182e-06, "loss": 0.4305, "step": 12113 }, { "epoch": 0.35690820971377224, "grad_norm": 1.5808419431971317, "learning_rate": 8.120836533925037e-06, "loss": 0.4382, "step": 12114 }, { "epoch": 0.3569376721712358, "grad_norm": 1.4643863948148317, "learning_rate": 8.120434746786634e-06, "loss": 0.3513, "step": 12115 }, { "epoch": 0.3569671346286994, "grad_norm": 1.3963164516955042, "learning_rate": 8.120032926641225e-06, "loss": 0.3967, "step": 12116 }, { "epoch": 0.35699659708616294, "grad_norm": 1.9050050509807224, "learning_rate": 8.119631073493057e-06, "loss": 0.6038, "step": 12117 }, { "epoch": 0.35702605954362654, "grad_norm": 1.5749770006725168, "learning_rate": 8.119229187346382e-06, "loss": 0.4331, "step": 12118 }, { "epoch": 0.3570555220010901, "grad_norm": 1.6767331511666839, "learning_rate": 8.118827268205454e-06, "loss": 0.4688, "step": 12119 }, { "epoch": 0.3570849844585537, "grad_norm": 1.6721635671336166, "learning_rate": 8.118425316074519e-06, "loss": 0.3896, "step": 12120 }, { "epoch": 0.3571144469160173, "grad_norm": 1.522648399320631, "learning_rate": 8.118023330957831e-06, "loss": 0.3987, "step": 12121 }, { "epoch": 0.35714390937348084, "grad_norm": 1.4079992667950372, "learning_rate": 8.117621312859645e-06, "loss": 0.4517, "step": 12122 }, { "epoch": 0.35717337183094444, "grad_norm": 1.4553730140689605, "learning_rate": 8.117219261784209e-06, "loss": 0.4776, "step": 12123 }, { "epoch": 0.357202834288408, "grad_norm": 1.4639858913957478, "learning_rate": 8.116817177735778e-06, "loss": 0.4642, "step": 12124 }, { "epoch": 0.3572322967458716, "grad_norm": 1.485186098313005, "learning_rate": 8.116415060718605e-06, "loss": 0.4149, "step": 12125 }, { "epoch": 0.35726175920333514, "grad_norm": 1.7754134907976489, "learning_rate": 8.116012910736943e-06, "loss": 0.4555, "step": 12126 }, { "epoch": 0.35729122166079874, "grad_norm": 1.5014859643988263, "learning_rate": 8.115610727795045e-06, "loss": 0.4311, "step": 12127 }, { "epoch": 0.3573206841182623, "grad_norm": 2.103146328364695, "learning_rate": 8.115208511897169e-06, "loss": 0.5712, "step": 12128 }, { "epoch": 0.3573501465757259, "grad_norm": 1.7078987863072053, "learning_rate": 8.114806263047563e-06, "loss": 0.4954, "step": 12129 }, { "epoch": 0.35737960903318944, "grad_norm": 1.363308524423608, "learning_rate": 8.114403981250488e-06, "loss": 0.3837, "step": 12130 }, { "epoch": 0.35740907149065304, "grad_norm": 1.4522518847769101, "learning_rate": 8.114001666510197e-06, "loss": 0.4383, "step": 12131 }, { "epoch": 0.3574385339481166, "grad_norm": 1.7661742766478457, "learning_rate": 8.113599318830945e-06, "loss": 0.4207, "step": 12132 }, { "epoch": 0.3574679964055802, "grad_norm": 1.5129514173956677, "learning_rate": 8.113196938216989e-06, "loss": 0.4662, "step": 12133 }, { "epoch": 0.3574974588630438, "grad_norm": 1.6723795264960861, "learning_rate": 8.112794524672582e-06, "loss": 0.4442, "step": 12134 }, { "epoch": 0.35752692132050734, "grad_norm": 1.6604121232339335, "learning_rate": 8.112392078201985e-06, "loss": 0.4136, "step": 12135 }, { "epoch": 0.35755638377797094, "grad_norm": 1.74081571795335, "learning_rate": 8.111989598809454e-06, "loss": 0.5928, "step": 12136 }, { "epoch": 0.3575858462354345, "grad_norm": 1.6114057829282311, "learning_rate": 8.111587086499243e-06, "loss": 0.5317, "step": 12137 }, { "epoch": 0.3576153086928981, "grad_norm": 1.4813665461440855, "learning_rate": 8.111184541275614e-06, "loss": 0.4009, "step": 12138 }, { "epoch": 0.35764477115036164, "grad_norm": 1.9365555684951845, "learning_rate": 8.110781963142824e-06, "loss": 0.504, "step": 12139 }, { "epoch": 0.35767423360782524, "grad_norm": 1.5129999544358295, "learning_rate": 8.110379352105129e-06, "loss": 0.4726, "step": 12140 }, { "epoch": 0.3577036960652888, "grad_norm": 1.676047623493858, "learning_rate": 8.10997670816679e-06, "loss": 0.5919, "step": 12141 }, { "epoch": 0.3577331585227524, "grad_norm": 1.466891143589177, "learning_rate": 8.109574031332064e-06, "loss": 0.4863, "step": 12142 }, { "epoch": 0.35776262098021594, "grad_norm": 1.4950575293085449, "learning_rate": 8.10917132160521e-06, "loss": 0.3979, "step": 12143 }, { "epoch": 0.35779208343767954, "grad_norm": 1.3618848403738784, "learning_rate": 8.108768578990493e-06, "loss": 0.3619, "step": 12144 }, { "epoch": 0.3578215458951431, "grad_norm": 1.3922292881646916, "learning_rate": 8.108365803492167e-06, "loss": 0.3663, "step": 12145 }, { "epoch": 0.3578510083526067, "grad_norm": 1.4288036568264806, "learning_rate": 8.107962995114495e-06, "loss": 0.4659, "step": 12146 }, { "epoch": 0.3578804708100703, "grad_norm": 1.5699248412700022, "learning_rate": 8.10756015386174e-06, "loss": 0.389, "step": 12147 }, { "epoch": 0.35790993326753384, "grad_norm": 1.500406106286767, "learning_rate": 8.107157279738159e-06, "loss": 0.5836, "step": 12148 }, { "epoch": 0.35793939572499744, "grad_norm": 1.5652455176154352, "learning_rate": 8.106754372748013e-06, "loss": 0.5106, "step": 12149 }, { "epoch": 0.357968858182461, "grad_norm": 1.520230655923739, "learning_rate": 8.10635143289557e-06, "loss": 0.3786, "step": 12150 }, { "epoch": 0.3579983206399246, "grad_norm": 1.5539526314669991, "learning_rate": 8.105948460185087e-06, "loss": 0.538, "step": 12151 }, { "epoch": 0.35802778309738814, "grad_norm": 1.6272198463942107, "learning_rate": 8.105545454620827e-06, "loss": 0.4288, "step": 12152 }, { "epoch": 0.35805724555485174, "grad_norm": 1.564633501661541, "learning_rate": 8.105142416207053e-06, "loss": 0.4281, "step": 12153 }, { "epoch": 0.3580867080123153, "grad_norm": 1.6519841920557394, "learning_rate": 8.104739344948029e-06, "loss": 0.471, "step": 12154 }, { "epoch": 0.3581161704697789, "grad_norm": 1.569415620079644, "learning_rate": 8.10433624084802e-06, "loss": 0.4122, "step": 12155 }, { "epoch": 0.35814563292724244, "grad_norm": 1.4010177626540146, "learning_rate": 8.103933103911287e-06, "loss": 0.4311, "step": 12156 }, { "epoch": 0.35817509538470604, "grad_norm": 1.535113517012287, "learning_rate": 8.103529934142096e-06, "loss": 0.4552, "step": 12157 }, { "epoch": 0.3582045578421696, "grad_norm": 1.4272515957794425, "learning_rate": 8.103126731544711e-06, "loss": 0.3685, "step": 12158 }, { "epoch": 0.3582340202996332, "grad_norm": 1.3767613108468197, "learning_rate": 8.102723496123397e-06, "loss": 0.4343, "step": 12159 }, { "epoch": 0.3582634827570968, "grad_norm": 1.5256122565187895, "learning_rate": 8.10232022788242e-06, "loss": 0.4814, "step": 12160 }, { "epoch": 0.35829294521456034, "grad_norm": 1.5412870166259425, "learning_rate": 8.101916926826046e-06, "loss": 0.5342, "step": 12161 }, { "epoch": 0.35832240767202395, "grad_norm": 1.3873819470438045, "learning_rate": 8.10151359295854e-06, "loss": 0.3566, "step": 12162 }, { "epoch": 0.3583518701294875, "grad_norm": 1.5887783851778645, "learning_rate": 8.101110226284166e-06, "loss": 0.5439, "step": 12163 }, { "epoch": 0.3583813325869511, "grad_norm": 1.611449446243306, "learning_rate": 8.100706826807195e-06, "loss": 0.4654, "step": 12164 }, { "epoch": 0.35841079504441464, "grad_norm": 1.5792357936075143, "learning_rate": 8.10030339453189e-06, "loss": 0.4436, "step": 12165 }, { "epoch": 0.35844025750187825, "grad_norm": 1.4498395229853123, "learning_rate": 8.099899929462522e-06, "loss": 0.4652, "step": 12166 }, { "epoch": 0.3584697199593418, "grad_norm": 1.4894605879738192, "learning_rate": 8.099496431603358e-06, "loss": 0.5008, "step": 12167 }, { "epoch": 0.3584991824168054, "grad_norm": 1.4917248403120362, "learning_rate": 8.099092900958664e-06, "loss": 0.4803, "step": 12168 }, { "epoch": 0.35852864487426894, "grad_norm": 1.9169155847175232, "learning_rate": 8.098689337532713e-06, "loss": 0.6866, "step": 12169 }, { "epoch": 0.35855810733173255, "grad_norm": 1.736973385059008, "learning_rate": 8.098285741329765e-06, "loss": 0.571, "step": 12170 }, { "epoch": 0.3585875697891961, "grad_norm": 1.5313763493559105, "learning_rate": 8.0978821123541e-06, "loss": 0.4951, "step": 12171 }, { "epoch": 0.3586170322466597, "grad_norm": 1.4476871771105753, "learning_rate": 8.09747845060998e-06, "loss": 0.3299, "step": 12172 }, { "epoch": 0.3586464947041233, "grad_norm": 1.5956046743763752, "learning_rate": 8.097074756101676e-06, "loss": 0.4593, "step": 12173 }, { "epoch": 0.35867595716158684, "grad_norm": 1.4649924088089996, "learning_rate": 8.096671028833461e-06, "loss": 0.4672, "step": 12174 }, { "epoch": 0.35870541961905045, "grad_norm": 1.7387474078883853, "learning_rate": 8.096267268809601e-06, "loss": 0.4426, "step": 12175 }, { "epoch": 0.358734882076514, "grad_norm": 1.4065918096888226, "learning_rate": 8.095863476034372e-06, "loss": 0.4905, "step": 12176 }, { "epoch": 0.3587643445339776, "grad_norm": 1.6108330139801175, "learning_rate": 8.09545965051204e-06, "loss": 0.4226, "step": 12177 }, { "epoch": 0.35879380699144114, "grad_norm": 1.8251207094414934, "learning_rate": 8.095055792246882e-06, "loss": 0.6232, "step": 12178 }, { "epoch": 0.35882326944890475, "grad_norm": 1.710139548148933, "learning_rate": 8.094651901243166e-06, "loss": 0.6454, "step": 12179 }, { "epoch": 0.3588527319063683, "grad_norm": 1.432637911938058, "learning_rate": 8.094247977505166e-06, "loss": 0.4468, "step": 12180 }, { "epoch": 0.3588821943638319, "grad_norm": 1.6264084835543335, "learning_rate": 8.093844021037153e-06, "loss": 0.5319, "step": 12181 }, { "epoch": 0.35891165682129544, "grad_norm": 1.730230579604579, "learning_rate": 8.0934400318434e-06, "loss": 0.4727, "step": 12182 }, { "epoch": 0.35894111927875905, "grad_norm": 1.5906854863099753, "learning_rate": 8.093036009928182e-06, "loss": 0.4393, "step": 12183 }, { "epoch": 0.3589705817362226, "grad_norm": 1.493847184018068, "learning_rate": 8.092631955295771e-06, "loss": 0.4631, "step": 12184 }, { "epoch": 0.3590000441936862, "grad_norm": 1.5210213378971784, "learning_rate": 8.092227867950444e-06, "loss": 0.2438, "step": 12185 }, { "epoch": 0.3590295066511498, "grad_norm": 1.53860009005297, "learning_rate": 8.091823747896473e-06, "loss": 0.4915, "step": 12186 }, { "epoch": 0.35905896910861335, "grad_norm": 1.6490891323434191, "learning_rate": 8.09141959513813e-06, "loss": 0.5168, "step": 12187 }, { "epoch": 0.35908843156607695, "grad_norm": 1.7446265368339033, "learning_rate": 8.091015409679693e-06, "loss": 0.6106, "step": 12188 }, { "epoch": 0.3591178940235405, "grad_norm": 1.4334713580703, "learning_rate": 8.090611191525437e-06, "loss": 0.451, "step": 12189 }, { "epoch": 0.3591473564810041, "grad_norm": 1.4946049292490131, "learning_rate": 8.090206940679638e-06, "loss": 0.3701, "step": 12190 }, { "epoch": 0.35917681893846765, "grad_norm": 1.5003813434120004, "learning_rate": 8.089802657146574e-06, "loss": 0.5157, "step": 12191 }, { "epoch": 0.35920628139593125, "grad_norm": 1.488333456566574, "learning_rate": 8.089398340930516e-06, "loss": 0.4214, "step": 12192 }, { "epoch": 0.3592357438533948, "grad_norm": 1.5108468156153214, "learning_rate": 8.088993992035746e-06, "loss": 0.4773, "step": 12193 }, { "epoch": 0.3592652063108584, "grad_norm": 1.4098503048060222, "learning_rate": 8.088589610466537e-06, "loss": 0.4473, "step": 12194 }, { "epoch": 0.35929466876832195, "grad_norm": 1.4253704862985366, "learning_rate": 8.08818519622717e-06, "loss": 0.394, "step": 12195 }, { "epoch": 0.35932413122578555, "grad_norm": 1.6065753806972163, "learning_rate": 8.087780749321921e-06, "loss": 0.6187, "step": 12196 }, { "epoch": 0.3593535936832491, "grad_norm": 1.6871929619517487, "learning_rate": 8.08737626975507e-06, "loss": 0.4138, "step": 12197 }, { "epoch": 0.3593830561407127, "grad_norm": 1.5857070214831543, "learning_rate": 8.08697175753089e-06, "loss": 0.4673, "step": 12198 }, { "epoch": 0.3594125185981763, "grad_norm": 1.4915271993826336, "learning_rate": 8.086567212653665e-06, "loss": 0.4592, "step": 12199 }, { "epoch": 0.35944198105563985, "grad_norm": 1.3503403413125417, "learning_rate": 8.086162635127674e-06, "loss": 0.4178, "step": 12200 }, { "epoch": 0.35947144351310345, "grad_norm": 1.7033631911041833, "learning_rate": 8.085758024957194e-06, "loss": 0.4712, "step": 12201 }, { "epoch": 0.359500905970567, "grad_norm": 1.4478649811000242, "learning_rate": 8.085353382146506e-06, "loss": 0.4146, "step": 12202 }, { "epoch": 0.3595303684280306, "grad_norm": 1.4100227530860223, "learning_rate": 8.08494870669989e-06, "loss": 0.4434, "step": 12203 }, { "epoch": 0.35955983088549415, "grad_norm": 1.4023488660686554, "learning_rate": 8.084543998621627e-06, "loss": 0.5202, "step": 12204 }, { "epoch": 0.35958929334295775, "grad_norm": 1.5239564107862051, "learning_rate": 8.084139257915997e-06, "loss": 0.4749, "step": 12205 }, { "epoch": 0.3596187558004213, "grad_norm": 1.4213996108146845, "learning_rate": 8.083734484587285e-06, "loss": 0.4913, "step": 12206 }, { "epoch": 0.3596482182578849, "grad_norm": 1.70837421491526, "learning_rate": 8.083329678639767e-06, "loss": 0.7026, "step": 12207 }, { "epoch": 0.35967768071534845, "grad_norm": 1.4705456933024676, "learning_rate": 8.082924840077727e-06, "loss": 0.5043, "step": 12208 }, { "epoch": 0.35970714317281205, "grad_norm": 1.4442018843957047, "learning_rate": 8.082519968905447e-06, "loss": 0.4558, "step": 12209 }, { "epoch": 0.3597366056302756, "grad_norm": 1.7070325324354445, "learning_rate": 8.08211506512721e-06, "loss": 0.5215, "step": 12210 }, { "epoch": 0.3597660680877392, "grad_norm": 1.4897448585790667, "learning_rate": 8.081710128747302e-06, "loss": 0.3553, "step": 12211 }, { "epoch": 0.3597955305452028, "grad_norm": 1.336934891292748, "learning_rate": 8.081305159770001e-06, "loss": 0.4212, "step": 12212 }, { "epoch": 0.35982499300266635, "grad_norm": 1.4801100150147763, "learning_rate": 8.080900158199594e-06, "loss": 0.4846, "step": 12213 }, { "epoch": 0.35985445546012995, "grad_norm": 1.6489413995048925, "learning_rate": 8.080495124040363e-06, "loss": 0.4636, "step": 12214 }, { "epoch": 0.3598839179175935, "grad_norm": 1.4600436136768948, "learning_rate": 8.080090057296595e-06, "loss": 0.4226, "step": 12215 }, { "epoch": 0.3599133803750571, "grad_norm": 1.8185785966294996, "learning_rate": 8.079684957972574e-06, "loss": 0.4739, "step": 12216 }, { "epoch": 0.35994284283252065, "grad_norm": 1.6421769821926568, "learning_rate": 8.079279826072582e-06, "loss": 0.4266, "step": 12217 }, { "epoch": 0.35997230528998425, "grad_norm": 1.618339500719089, "learning_rate": 8.078874661600907e-06, "loss": 0.3831, "step": 12218 }, { "epoch": 0.3600017677474478, "grad_norm": 1.7837803303229338, "learning_rate": 8.078469464561833e-06, "loss": 0.4567, "step": 12219 }, { "epoch": 0.3600312302049114, "grad_norm": 1.4404021722434315, "learning_rate": 8.078064234959648e-06, "loss": 0.3346, "step": 12220 }, { "epoch": 0.36006069266237495, "grad_norm": 1.5576237197896445, "learning_rate": 8.077658972798638e-06, "loss": 0.5236, "step": 12221 }, { "epoch": 0.36009015511983855, "grad_norm": 1.588699639311173, "learning_rate": 8.07725367808309e-06, "loss": 0.4901, "step": 12222 }, { "epoch": 0.3601196175773021, "grad_norm": 1.7172125373601872, "learning_rate": 8.076848350817287e-06, "loss": 0.646, "step": 12223 }, { "epoch": 0.3601490800347657, "grad_norm": 1.5975411455572852, "learning_rate": 8.076442991005524e-06, "loss": 0.5418, "step": 12224 }, { "epoch": 0.3601785424922293, "grad_norm": 1.61121297179828, "learning_rate": 8.076037598652082e-06, "loss": 0.5727, "step": 12225 }, { "epoch": 0.36020800494969285, "grad_norm": 1.6320395826533616, "learning_rate": 8.075632173761254e-06, "loss": 0.454, "step": 12226 }, { "epoch": 0.36023746740715645, "grad_norm": 1.5155307106583256, "learning_rate": 8.075226716337325e-06, "loss": 0.44, "step": 12227 }, { "epoch": 0.36026692986462, "grad_norm": 1.5863144379689444, "learning_rate": 8.074821226384583e-06, "loss": 0.4419, "step": 12228 }, { "epoch": 0.3602963923220836, "grad_norm": 2.311243719717266, "learning_rate": 8.074415703907321e-06, "loss": 0.4523, "step": 12229 }, { "epoch": 0.36032585477954715, "grad_norm": 1.7474945784702165, "learning_rate": 8.074010148909827e-06, "loss": 0.5664, "step": 12230 }, { "epoch": 0.36035531723701075, "grad_norm": 1.5720872392126373, "learning_rate": 8.073604561396391e-06, "loss": 0.4687, "step": 12231 }, { "epoch": 0.3603847796944743, "grad_norm": 1.3923820139611893, "learning_rate": 8.073198941371301e-06, "loss": 0.3746, "step": 12232 }, { "epoch": 0.3604142421519379, "grad_norm": 1.653641248185327, "learning_rate": 8.072793288838851e-06, "loss": 0.5235, "step": 12233 }, { "epoch": 0.36044370460940145, "grad_norm": 1.5740760717803817, "learning_rate": 8.072387603803329e-06, "loss": 0.4263, "step": 12234 }, { "epoch": 0.36047316706686505, "grad_norm": 1.4739722222307543, "learning_rate": 8.071981886269028e-06, "loss": 0.4598, "step": 12235 }, { "epoch": 0.3605026295243286, "grad_norm": 1.6183847365256046, "learning_rate": 8.071576136240239e-06, "loss": 0.5539, "step": 12236 }, { "epoch": 0.3605320919817922, "grad_norm": 1.4544684409491047, "learning_rate": 8.071170353721251e-06, "loss": 0.5064, "step": 12237 }, { "epoch": 0.3605615544392558, "grad_norm": 1.3810746538841827, "learning_rate": 8.070764538716362e-06, "loss": 0.4119, "step": 12238 }, { "epoch": 0.36059101689671935, "grad_norm": 1.5489234670925762, "learning_rate": 8.070358691229861e-06, "loss": 0.472, "step": 12239 }, { "epoch": 0.36062047935418295, "grad_norm": 1.4825516611098035, "learning_rate": 8.06995281126604e-06, "loss": 0.3868, "step": 12240 }, { "epoch": 0.3606499418116465, "grad_norm": 1.721960437375298, "learning_rate": 8.069546898829195e-06, "loss": 0.4901, "step": 12241 }, { "epoch": 0.3606794042691101, "grad_norm": 1.4895021052533208, "learning_rate": 8.069140953923618e-06, "loss": 0.3496, "step": 12242 }, { "epoch": 0.36070886672657365, "grad_norm": 1.5352771345840879, "learning_rate": 8.068734976553603e-06, "loss": 0.4476, "step": 12243 }, { "epoch": 0.36073832918403725, "grad_norm": 1.4012965013598289, "learning_rate": 8.068328966723445e-06, "loss": 0.4535, "step": 12244 }, { "epoch": 0.3607677916415008, "grad_norm": 1.4474496628775295, "learning_rate": 8.067922924437438e-06, "loss": 0.5072, "step": 12245 }, { "epoch": 0.3607972540989644, "grad_norm": 1.242456586344535, "learning_rate": 8.067516849699879e-06, "loss": 0.3274, "step": 12246 }, { "epoch": 0.36082671655642795, "grad_norm": 1.9183469311054482, "learning_rate": 8.06711074251506e-06, "loss": 0.5857, "step": 12247 }, { "epoch": 0.36085617901389155, "grad_norm": 1.6138227344334817, "learning_rate": 8.066704602887279e-06, "loss": 0.5461, "step": 12248 }, { "epoch": 0.3608856414713551, "grad_norm": 1.385997856866938, "learning_rate": 8.066298430820829e-06, "loss": 0.4259, "step": 12249 }, { "epoch": 0.3609151039288187, "grad_norm": 1.407150868406306, "learning_rate": 8.06589222632001e-06, "loss": 0.4731, "step": 12250 }, { "epoch": 0.3609445663862823, "grad_norm": 1.4558447287074128, "learning_rate": 8.06548598938912e-06, "loss": 0.4091, "step": 12251 }, { "epoch": 0.36097402884374585, "grad_norm": 1.568608996947099, "learning_rate": 8.06507972003245e-06, "loss": 0.4428, "step": 12252 }, { "epoch": 0.36100349130120946, "grad_norm": 1.7421921979962145, "learning_rate": 8.064673418254302e-06, "loss": 0.5759, "step": 12253 }, { "epoch": 0.361032953758673, "grad_norm": 1.3833043220934556, "learning_rate": 8.064267084058973e-06, "loss": 0.4109, "step": 12254 }, { "epoch": 0.3610624162161366, "grad_norm": 1.4697705373171581, "learning_rate": 8.063860717450762e-06, "loss": 0.3011, "step": 12255 }, { "epoch": 0.36109187867360015, "grad_norm": 1.628731848628575, "learning_rate": 8.063454318433963e-06, "loss": 0.4152, "step": 12256 }, { "epoch": 0.36112134113106376, "grad_norm": 1.5105137005946028, "learning_rate": 8.063047887012881e-06, "loss": 0.5253, "step": 12257 }, { "epoch": 0.3611508035885273, "grad_norm": 1.4433904559184556, "learning_rate": 8.062641423191808e-06, "loss": 0.4454, "step": 12258 }, { "epoch": 0.3611802660459909, "grad_norm": 1.6328165684222407, "learning_rate": 8.06223492697505e-06, "loss": 0.4534, "step": 12259 }, { "epoch": 0.36120972850345445, "grad_norm": 1.4369393003497828, "learning_rate": 8.061828398366905e-06, "loss": 0.403, "step": 12260 }, { "epoch": 0.36123919096091806, "grad_norm": 1.4236638033960671, "learning_rate": 8.061421837371672e-06, "loss": 0.4937, "step": 12261 }, { "epoch": 0.3612686534183816, "grad_norm": 1.6660950450658318, "learning_rate": 8.06101524399365e-06, "loss": 0.6771, "step": 12262 }, { "epoch": 0.3612981158758452, "grad_norm": 1.5644595925927784, "learning_rate": 8.060608618237143e-06, "loss": 0.499, "step": 12263 }, { "epoch": 0.3613275783333088, "grad_norm": 1.4967283068067156, "learning_rate": 8.06020196010645e-06, "loss": 0.5092, "step": 12264 }, { "epoch": 0.36135704079077235, "grad_norm": 1.5176986062838613, "learning_rate": 8.059795269605874e-06, "loss": 0.4311, "step": 12265 }, { "epoch": 0.36138650324823596, "grad_norm": 1.5815944683578538, "learning_rate": 8.059388546739716e-06, "loss": 0.4336, "step": 12266 }, { "epoch": 0.3614159657056995, "grad_norm": 2.0008189645150143, "learning_rate": 8.058981791512277e-06, "loss": 0.4653, "step": 12267 }, { "epoch": 0.3614454281631631, "grad_norm": 1.5222572464388817, "learning_rate": 8.058575003927862e-06, "loss": 0.3793, "step": 12268 }, { "epoch": 0.36147489062062665, "grad_norm": 1.9195631737466574, "learning_rate": 8.058168183990773e-06, "loss": 0.6686, "step": 12269 }, { "epoch": 0.36150435307809026, "grad_norm": 1.6452362530177762, "learning_rate": 8.057761331705313e-06, "loss": 0.3646, "step": 12270 }, { "epoch": 0.3615338155355538, "grad_norm": 1.8744629634426755, "learning_rate": 8.057354447075786e-06, "loss": 0.4412, "step": 12271 }, { "epoch": 0.3615632779930174, "grad_norm": 1.667298681300807, "learning_rate": 8.056947530106494e-06, "loss": 0.528, "step": 12272 }, { "epoch": 0.36159274045048095, "grad_norm": 1.3217158993674998, "learning_rate": 8.056540580801743e-06, "loss": 0.4328, "step": 12273 }, { "epoch": 0.36162220290794456, "grad_norm": 1.5347262318809953, "learning_rate": 8.056133599165839e-06, "loss": 0.4805, "step": 12274 }, { "epoch": 0.3616516653654081, "grad_norm": 1.4492876653206004, "learning_rate": 8.055726585203084e-06, "loss": 0.405, "step": 12275 }, { "epoch": 0.3616811278228717, "grad_norm": 1.7229856465309072, "learning_rate": 8.055319538917783e-06, "loss": 0.6075, "step": 12276 }, { "epoch": 0.3617105902803353, "grad_norm": 1.5190626514783416, "learning_rate": 8.054912460314244e-06, "loss": 0.5927, "step": 12277 }, { "epoch": 0.36174005273779886, "grad_norm": 1.6446421585188344, "learning_rate": 8.054505349396773e-06, "loss": 0.592, "step": 12278 }, { "epoch": 0.36176951519526246, "grad_norm": 1.3938018879883418, "learning_rate": 8.054098206169674e-06, "loss": 0.4417, "step": 12279 }, { "epoch": 0.361798977652726, "grad_norm": 1.5593353446973748, "learning_rate": 8.053691030637255e-06, "loss": 0.5028, "step": 12280 }, { "epoch": 0.3618284401101896, "grad_norm": 1.540632877579364, "learning_rate": 8.053283822803824e-06, "loss": 0.5327, "step": 12281 }, { "epoch": 0.36185790256765316, "grad_norm": 1.4287210790040394, "learning_rate": 8.052876582673686e-06, "loss": 0.4406, "step": 12282 }, { "epoch": 0.36188736502511676, "grad_norm": 1.3840356799375615, "learning_rate": 8.052469310251151e-06, "loss": 0.3971, "step": 12283 }, { "epoch": 0.3619168274825803, "grad_norm": 1.5545424268474624, "learning_rate": 8.052062005540527e-06, "loss": 0.574, "step": 12284 }, { "epoch": 0.3619462899400439, "grad_norm": 1.5020076812419674, "learning_rate": 8.051654668546118e-06, "loss": 0.4184, "step": 12285 }, { "epoch": 0.36197575239750746, "grad_norm": 1.3666674199651994, "learning_rate": 8.051247299272238e-06, "loss": 0.3548, "step": 12286 }, { "epoch": 0.36200521485497106, "grad_norm": 1.6226239182764124, "learning_rate": 8.050839897723194e-06, "loss": 0.4862, "step": 12287 }, { "epoch": 0.3620346773124346, "grad_norm": 1.6792313563382948, "learning_rate": 8.050432463903295e-06, "loss": 0.4279, "step": 12288 }, { "epoch": 0.3620641397698982, "grad_norm": 1.3266390676849433, "learning_rate": 8.05002499781685e-06, "loss": 0.4468, "step": 12289 }, { "epoch": 0.3620936022273618, "grad_norm": 1.6202609355371957, "learning_rate": 8.049617499468171e-06, "loss": 0.4113, "step": 12290 }, { "epoch": 0.36212306468482536, "grad_norm": 1.396136125753145, "learning_rate": 8.04920996886157e-06, "loss": 0.3326, "step": 12291 }, { "epoch": 0.36215252714228896, "grad_norm": 1.6485560673288109, "learning_rate": 8.048802406001352e-06, "loss": 0.544, "step": 12292 }, { "epoch": 0.3621819895997525, "grad_norm": 1.7430537319948525, "learning_rate": 8.048394810891831e-06, "loss": 0.5345, "step": 12293 }, { "epoch": 0.3622114520572161, "grad_norm": 1.6555301885167901, "learning_rate": 8.047987183537321e-06, "loss": 0.4669, "step": 12294 }, { "epoch": 0.36224091451467966, "grad_norm": 1.321643229491904, "learning_rate": 8.04757952394213e-06, "loss": 0.3903, "step": 12295 }, { "epoch": 0.36227037697214326, "grad_norm": 1.4417661166530649, "learning_rate": 8.047171832110572e-06, "loss": 0.4993, "step": 12296 }, { "epoch": 0.3622998394296068, "grad_norm": 1.676510778136105, "learning_rate": 8.046764108046959e-06, "loss": 0.5959, "step": 12297 }, { "epoch": 0.3623293018870704, "grad_norm": 1.4229582976340927, "learning_rate": 8.046356351755603e-06, "loss": 0.4595, "step": 12298 }, { "epoch": 0.36235876434453396, "grad_norm": 1.3622306516831948, "learning_rate": 8.04594856324082e-06, "loss": 0.4194, "step": 12299 }, { "epoch": 0.36238822680199756, "grad_norm": 1.749230583136997, "learning_rate": 8.045540742506919e-06, "loss": 0.641, "step": 12300 }, { "epoch": 0.3624176892594611, "grad_norm": 1.4111745378381442, "learning_rate": 8.045132889558219e-06, "loss": 0.5142, "step": 12301 }, { "epoch": 0.3624471517169247, "grad_norm": 1.4227141408323278, "learning_rate": 8.044725004399028e-06, "loss": 0.4367, "step": 12302 }, { "epoch": 0.3624766141743883, "grad_norm": 1.4802131031650407, "learning_rate": 8.044317087033664e-06, "loss": 0.4378, "step": 12303 }, { "epoch": 0.36250607663185186, "grad_norm": 1.4501502487940174, "learning_rate": 8.043909137466443e-06, "loss": 0.4924, "step": 12304 }, { "epoch": 0.36253553908931546, "grad_norm": 1.4001012281753917, "learning_rate": 8.043501155701679e-06, "loss": 0.4378, "step": 12305 }, { "epoch": 0.362565001546779, "grad_norm": 1.4029333171986882, "learning_rate": 8.043093141743688e-06, "loss": 0.402, "step": 12306 }, { "epoch": 0.3625944640042426, "grad_norm": 1.5181206982160913, "learning_rate": 8.042685095596785e-06, "loss": 0.4235, "step": 12307 }, { "epoch": 0.36262392646170616, "grad_norm": 1.5792634231782292, "learning_rate": 8.042277017265286e-06, "loss": 0.4482, "step": 12308 }, { "epoch": 0.36265338891916976, "grad_norm": 1.384696237196591, "learning_rate": 8.041868906753506e-06, "loss": 0.4076, "step": 12309 }, { "epoch": 0.3626828513766333, "grad_norm": 1.5957862429966962, "learning_rate": 8.041460764065765e-06, "loss": 0.4928, "step": 12310 }, { "epoch": 0.3627123138340969, "grad_norm": 1.5084628900875778, "learning_rate": 8.04105258920638e-06, "loss": 0.418, "step": 12311 }, { "epoch": 0.36274177629156046, "grad_norm": 1.613846434407902, "learning_rate": 8.040644382179666e-06, "loss": 0.4957, "step": 12312 }, { "epoch": 0.36277123874902406, "grad_norm": 1.6099562057973358, "learning_rate": 8.040236142989944e-06, "loss": 0.4361, "step": 12313 }, { "epoch": 0.3628007012064876, "grad_norm": 1.5525307677317035, "learning_rate": 8.039827871641529e-06, "loss": 0.5211, "step": 12314 }, { "epoch": 0.3628301636639512, "grad_norm": 1.4159687965859542, "learning_rate": 8.039419568138742e-06, "loss": 0.4065, "step": 12315 }, { "epoch": 0.3628596261214148, "grad_norm": 1.3087492850812459, "learning_rate": 8.039011232485901e-06, "loss": 0.3568, "step": 12316 }, { "epoch": 0.36288908857887836, "grad_norm": 1.5888296145127405, "learning_rate": 8.038602864687325e-06, "loss": 0.4864, "step": 12317 }, { "epoch": 0.36291855103634196, "grad_norm": 1.6417843519096016, "learning_rate": 8.038194464747334e-06, "loss": 0.4567, "step": 12318 }, { "epoch": 0.3629480134938055, "grad_norm": 1.5083349047811918, "learning_rate": 8.037786032670248e-06, "loss": 0.5003, "step": 12319 }, { "epoch": 0.3629774759512691, "grad_norm": 1.4065311360641064, "learning_rate": 8.037377568460386e-06, "loss": 0.2999, "step": 12320 }, { "epoch": 0.36300693840873266, "grad_norm": 1.5523591032007114, "learning_rate": 8.036969072122073e-06, "loss": 0.5425, "step": 12321 }, { "epoch": 0.36303640086619626, "grad_norm": 1.5845831078403128, "learning_rate": 8.036560543659624e-06, "loss": 0.4863, "step": 12322 }, { "epoch": 0.3630658633236598, "grad_norm": 1.4330539226887211, "learning_rate": 8.036151983077366e-06, "loss": 0.3986, "step": 12323 }, { "epoch": 0.3630953257811234, "grad_norm": 1.6190147127959877, "learning_rate": 8.035743390379614e-06, "loss": 0.5357, "step": 12324 }, { "epoch": 0.36312478823858696, "grad_norm": 1.6708023352374066, "learning_rate": 8.035334765570694e-06, "loss": 0.4407, "step": 12325 }, { "epoch": 0.36315425069605056, "grad_norm": 1.4184621032152127, "learning_rate": 8.03492610865493e-06, "loss": 0.4551, "step": 12326 }, { "epoch": 0.3631837131535141, "grad_norm": 1.5147911953205195, "learning_rate": 8.03451741963664e-06, "loss": 0.5674, "step": 12327 }, { "epoch": 0.3632131756109777, "grad_norm": 1.639811167477934, "learning_rate": 8.034108698520152e-06, "loss": 0.5912, "step": 12328 }, { "epoch": 0.3632426380684413, "grad_norm": 1.6793116915867075, "learning_rate": 8.033699945309785e-06, "loss": 0.4883, "step": 12329 }, { "epoch": 0.36327210052590486, "grad_norm": 1.5902072774903695, "learning_rate": 8.033291160009867e-06, "loss": 0.5411, "step": 12330 }, { "epoch": 0.36330156298336846, "grad_norm": 1.5744700959886049, "learning_rate": 8.032882342624717e-06, "loss": 0.4566, "step": 12331 }, { "epoch": 0.363331025440832, "grad_norm": 1.4679196600336388, "learning_rate": 8.032473493158663e-06, "loss": 0.5146, "step": 12332 }, { "epoch": 0.3633604878982956, "grad_norm": 1.7011928455437624, "learning_rate": 8.032064611616029e-06, "loss": 0.4127, "step": 12333 }, { "epoch": 0.36338995035575916, "grad_norm": 1.4213319721467563, "learning_rate": 8.031655698001137e-06, "loss": 0.4611, "step": 12334 }, { "epoch": 0.36341941281322276, "grad_norm": 1.5153017504270474, "learning_rate": 8.031246752318317e-06, "loss": 0.547, "step": 12335 }, { "epoch": 0.3634488752706863, "grad_norm": 1.4249258454744513, "learning_rate": 8.030837774571893e-06, "loss": 0.2959, "step": 12336 }, { "epoch": 0.3634783377281499, "grad_norm": 1.459632177209248, "learning_rate": 8.03042876476619e-06, "loss": 0.4358, "step": 12337 }, { "epoch": 0.36350780018561346, "grad_norm": 1.610790705859084, "learning_rate": 8.030019722905535e-06, "loss": 0.5406, "step": 12338 }, { "epoch": 0.36353726264307706, "grad_norm": 1.4886371425259879, "learning_rate": 8.029610648994255e-06, "loss": 0.4944, "step": 12339 }, { "epoch": 0.3635667251005406, "grad_norm": 1.6535236069959218, "learning_rate": 8.029201543036677e-06, "loss": 0.4483, "step": 12340 }, { "epoch": 0.3635961875580042, "grad_norm": 1.6079734250108058, "learning_rate": 8.028792405037128e-06, "loss": 0.4043, "step": 12341 }, { "epoch": 0.3636256500154678, "grad_norm": 1.6662036648066194, "learning_rate": 8.028383234999938e-06, "loss": 0.4423, "step": 12342 }, { "epoch": 0.36365511247293136, "grad_norm": 1.3656480863595795, "learning_rate": 8.027974032929431e-06, "loss": 0.4341, "step": 12343 }, { "epoch": 0.36368457493039497, "grad_norm": 1.3230114528533736, "learning_rate": 8.027564798829936e-06, "loss": 0.4953, "step": 12344 }, { "epoch": 0.3637140373878585, "grad_norm": 1.6701869004928014, "learning_rate": 8.027155532705785e-06, "loss": 0.519, "step": 12345 }, { "epoch": 0.3637434998453221, "grad_norm": 1.5398527765998593, "learning_rate": 8.026746234561307e-06, "loss": 0.5329, "step": 12346 }, { "epoch": 0.36377296230278566, "grad_norm": 1.5250513436830984, "learning_rate": 8.026336904400827e-06, "loss": 0.3899, "step": 12347 }, { "epoch": 0.36380242476024927, "grad_norm": 1.6792911968826072, "learning_rate": 8.02592754222868e-06, "loss": 0.5327, "step": 12348 }, { "epoch": 0.3638318872177128, "grad_norm": 1.4589087387816821, "learning_rate": 8.025518148049191e-06, "loss": 0.4768, "step": 12349 }, { "epoch": 0.3638613496751764, "grad_norm": 1.6241208961510698, "learning_rate": 8.025108721866695e-06, "loss": 0.5139, "step": 12350 }, { "epoch": 0.36389081213263996, "grad_norm": 1.5550945862836956, "learning_rate": 8.02469926368552e-06, "loss": 0.36, "step": 12351 }, { "epoch": 0.36392027459010357, "grad_norm": 1.5169040576312158, "learning_rate": 8.024289773509998e-06, "loss": 0.6042, "step": 12352 }, { "epoch": 0.3639497370475671, "grad_norm": 1.5019810200097512, "learning_rate": 8.02388025134446e-06, "loss": 0.3828, "step": 12353 }, { "epoch": 0.3639791995050307, "grad_norm": 1.5632942911448726, "learning_rate": 8.023470697193239e-06, "loss": 0.5713, "step": 12354 }, { "epoch": 0.3640086619624943, "grad_norm": 1.795301005282316, "learning_rate": 8.023061111060667e-06, "loss": 0.4437, "step": 12355 }, { "epoch": 0.36403812441995786, "grad_norm": 1.3565476457664156, "learning_rate": 8.022651492951074e-06, "loss": 0.4557, "step": 12356 }, { "epoch": 0.36406758687742147, "grad_norm": 1.5814591349432323, "learning_rate": 8.022241842868796e-06, "loss": 0.5066, "step": 12357 }, { "epoch": 0.364097049334885, "grad_norm": 1.6016078068090602, "learning_rate": 8.021832160818166e-06, "loss": 0.4209, "step": 12358 }, { "epoch": 0.3641265117923486, "grad_norm": 1.3682192746399688, "learning_rate": 8.021422446803514e-06, "loss": 0.3815, "step": 12359 }, { "epoch": 0.36415597424981216, "grad_norm": 1.603378286687371, "learning_rate": 8.021012700829178e-06, "loss": 0.47, "step": 12360 }, { "epoch": 0.36418543670727577, "grad_norm": 1.6161578557287055, "learning_rate": 8.020602922899489e-06, "loss": 0.575, "step": 12361 }, { "epoch": 0.3642148991647393, "grad_norm": 1.6405780053016108, "learning_rate": 8.020193113018784e-06, "loss": 0.482, "step": 12362 }, { "epoch": 0.3642443616222029, "grad_norm": 1.628148654892978, "learning_rate": 8.019783271191395e-06, "loss": 0.3976, "step": 12363 }, { "epoch": 0.36427382407966646, "grad_norm": 1.356797684651513, "learning_rate": 8.019373397421661e-06, "loss": 0.4514, "step": 12364 }, { "epoch": 0.36430328653713007, "grad_norm": 1.4676584374148385, "learning_rate": 8.018963491713913e-06, "loss": 0.3891, "step": 12365 }, { "epoch": 0.3643327489945936, "grad_norm": 1.6594061481086777, "learning_rate": 8.018553554072492e-06, "loss": 0.7161, "step": 12366 }, { "epoch": 0.3643622114520572, "grad_norm": 1.4421538360487312, "learning_rate": 8.018143584501729e-06, "loss": 0.4622, "step": 12367 }, { "epoch": 0.3643916739095208, "grad_norm": 1.6358112564649543, "learning_rate": 8.017733583005964e-06, "loss": 0.4151, "step": 12368 }, { "epoch": 0.36442113636698437, "grad_norm": 1.5450996047890413, "learning_rate": 8.017323549589533e-06, "loss": 0.558, "step": 12369 }, { "epoch": 0.36445059882444797, "grad_norm": 1.4345488731696763, "learning_rate": 8.016913484256774e-06, "loss": 0.4508, "step": 12370 }, { "epoch": 0.3644800612819115, "grad_norm": 1.4066033793892507, "learning_rate": 8.016503387012022e-06, "loss": 0.478, "step": 12371 }, { "epoch": 0.3645095237393751, "grad_norm": 1.7764773760325163, "learning_rate": 8.016093257859616e-06, "loss": 0.5155, "step": 12372 }, { "epoch": 0.36453898619683867, "grad_norm": 1.5621406519258487, "learning_rate": 8.015683096803896e-06, "loss": 0.4092, "step": 12373 }, { "epoch": 0.36456844865430227, "grad_norm": 1.616193056964411, "learning_rate": 8.015272903849198e-06, "loss": 0.4712, "step": 12374 }, { "epoch": 0.3645979111117658, "grad_norm": 1.3850571334532478, "learning_rate": 8.014862678999864e-06, "loss": 0.5173, "step": 12375 }, { "epoch": 0.3646273735692294, "grad_norm": 1.462388500022026, "learning_rate": 8.01445242226023e-06, "loss": 0.3677, "step": 12376 }, { "epoch": 0.36465683602669297, "grad_norm": 1.4843539250520716, "learning_rate": 8.014042133634638e-06, "loss": 0.3897, "step": 12377 }, { "epoch": 0.36468629848415657, "grad_norm": 1.5073986065294138, "learning_rate": 8.013631813127427e-06, "loss": 0.475, "step": 12378 }, { "epoch": 0.3647157609416201, "grad_norm": 1.5622131865048898, "learning_rate": 8.013221460742935e-06, "loss": 0.35, "step": 12379 }, { "epoch": 0.3647452233990837, "grad_norm": 1.4778883130377984, "learning_rate": 8.012811076485507e-06, "loss": 0.3617, "step": 12380 }, { "epoch": 0.3647746858565473, "grad_norm": 1.8774979885767293, "learning_rate": 8.01240066035948e-06, "loss": 0.5004, "step": 12381 }, { "epoch": 0.36480414831401087, "grad_norm": 1.3600746412235245, "learning_rate": 8.011990212369198e-06, "loss": 0.3204, "step": 12382 }, { "epoch": 0.36483361077147447, "grad_norm": 1.8065146166535564, "learning_rate": 8.011579732519001e-06, "loss": 0.498, "step": 12383 }, { "epoch": 0.364863073228938, "grad_norm": 1.454268338928801, "learning_rate": 8.011169220813231e-06, "loss": 0.2938, "step": 12384 }, { "epoch": 0.3648925356864016, "grad_norm": 1.5861471035824104, "learning_rate": 8.010758677256231e-06, "loss": 0.5416, "step": 12385 }, { "epoch": 0.36492199814386517, "grad_norm": 1.46197488326006, "learning_rate": 8.010348101852346e-06, "loss": 0.4367, "step": 12386 }, { "epoch": 0.36495146060132877, "grad_norm": 1.684414828506867, "learning_rate": 8.009937494605913e-06, "loss": 0.4884, "step": 12387 }, { "epoch": 0.3649809230587923, "grad_norm": 1.4506087804755974, "learning_rate": 8.009526855521277e-06, "loss": 0.5224, "step": 12388 }, { "epoch": 0.3650103855162559, "grad_norm": 1.3131919667692937, "learning_rate": 8.009116184602785e-06, "loss": 0.4485, "step": 12389 }, { "epoch": 0.36503984797371947, "grad_norm": 1.515725690666705, "learning_rate": 8.00870548185478e-06, "loss": 0.4707, "step": 12390 }, { "epoch": 0.36506931043118307, "grad_norm": 1.4933047229621736, "learning_rate": 8.008294747281605e-06, "loss": 0.4832, "step": 12391 }, { "epoch": 0.3650987728886466, "grad_norm": 1.6701530917338003, "learning_rate": 8.007883980887606e-06, "loss": 0.6086, "step": 12392 }, { "epoch": 0.3651282353461102, "grad_norm": 1.5361683942885116, "learning_rate": 8.007473182677124e-06, "loss": 0.3637, "step": 12393 }, { "epoch": 0.3651576978035738, "grad_norm": 1.4176070957391, "learning_rate": 8.007062352654508e-06, "loss": 0.5098, "step": 12394 }, { "epoch": 0.36518716026103737, "grad_norm": 1.383501617995267, "learning_rate": 8.006651490824104e-06, "loss": 0.4578, "step": 12395 }, { "epoch": 0.36521662271850097, "grad_norm": 1.6602989437682065, "learning_rate": 8.006240597190258e-06, "loss": 0.4561, "step": 12396 }, { "epoch": 0.3652460851759645, "grad_norm": 1.7588337231495965, "learning_rate": 8.005829671757312e-06, "loss": 0.5323, "step": 12397 }, { "epoch": 0.3652755476334281, "grad_norm": 1.5179912182945698, "learning_rate": 8.005418714529619e-06, "loss": 0.4927, "step": 12398 }, { "epoch": 0.36530501009089167, "grad_norm": 1.5711343077762636, "learning_rate": 8.00500772551152e-06, "loss": 0.4095, "step": 12399 }, { "epoch": 0.36533447254835527, "grad_norm": 1.5082528076266184, "learning_rate": 8.004596704707367e-06, "loss": 0.5351, "step": 12400 }, { "epoch": 0.3653639350058188, "grad_norm": 1.5804073545544601, "learning_rate": 8.004185652121504e-06, "loss": 0.4184, "step": 12401 }, { "epoch": 0.3653933974632824, "grad_norm": 1.4435785530781284, "learning_rate": 8.003774567758282e-06, "loss": 0.418, "step": 12402 }, { "epoch": 0.36542285992074597, "grad_norm": 1.280121106636249, "learning_rate": 8.003363451622047e-06, "loss": 0.4184, "step": 12403 }, { "epoch": 0.36545232237820957, "grad_norm": 1.6172536052233435, "learning_rate": 8.002952303717151e-06, "loss": 0.4595, "step": 12404 }, { "epoch": 0.3654817848356731, "grad_norm": 1.3653714996308042, "learning_rate": 8.002541124047938e-06, "loss": 0.3711, "step": 12405 }, { "epoch": 0.3655112472931367, "grad_norm": 1.5068737348936052, "learning_rate": 8.002129912618761e-06, "loss": 0.4494, "step": 12406 }, { "epoch": 0.3655407097506003, "grad_norm": 1.576650258678022, "learning_rate": 8.00171866943397e-06, "loss": 0.5253, "step": 12407 }, { "epoch": 0.36557017220806387, "grad_norm": 1.4431637543065357, "learning_rate": 8.001307394497913e-06, "loss": 0.4488, "step": 12408 }, { "epoch": 0.3655996346655275, "grad_norm": 1.5513583225484235, "learning_rate": 8.00089608781494e-06, "loss": 0.5276, "step": 12409 }, { "epoch": 0.365629097122991, "grad_norm": 1.5685576733380062, "learning_rate": 8.000484749389403e-06, "loss": 0.3175, "step": 12410 }, { "epoch": 0.3656585595804546, "grad_norm": 1.322245820130549, "learning_rate": 8.000073379225654e-06, "loss": 0.5119, "step": 12411 }, { "epoch": 0.36568802203791817, "grad_norm": 1.7882501081344142, "learning_rate": 7.999661977328042e-06, "loss": 0.674, "step": 12412 }, { "epoch": 0.3657174844953818, "grad_norm": 1.648535092524618, "learning_rate": 7.999250543700921e-06, "loss": 0.4433, "step": 12413 }, { "epoch": 0.3657469469528453, "grad_norm": 1.6736432244193837, "learning_rate": 7.998839078348639e-06, "loss": 0.6556, "step": 12414 }, { "epoch": 0.3657764094103089, "grad_norm": 1.5298459271382625, "learning_rate": 7.998427581275554e-06, "loss": 0.4655, "step": 12415 }, { "epoch": 0.36580587186777247, "grad_norm": 1.4838969537953142, "learning_rate": 7.998016052486014e-06, "loss": 0.3857, "step": 12416 }, { "epoch": 0.3658353343252361, "grad_norm": 1.3370896079200332, "learning_rate": 7.997604491984375e-06, "loss": 0.4306, "step": 12417 }, { "epoch": 0.3658647967826996, "grad_norm": 1.7633078726113376, "learning_rate": 7.997192899774989e-06, "loss": 0.4506, "step": 12418 }, { "epoch": 0.3658942592401632, "grad_norm": 1.552457476873974, "learning_rate": 7.99678127586221e-06, "loss": 0.5326, "step": 12419 }, { "epoch": 0.3659237216976268, "grad_norm": 1.9038880687177702, "learning_rate": 7.996369620250393e-06, "loss": 0.4643, "step": 12420 }, { "epoch": 0.36595318415509037, "grad_norm": 1.689247892109416, "learning_rate": 7.995957932943889e-06, "loss": 0.3919, "step": 12421 }, { "epoch": 0.365982646612554, "grad_norm": 1.6867603668062667, "learning_rate": 7.995546213947057e-06, "loss": 0.4669, "step": 12422 }, { "epoch": 0.3660121090700175, "grad_norm": 1.68269536598747, "learning_rate": 7.995134463264249e-06, "loss": 0.5096, "step": 12423 }, { "epoch": 0.3660415715274811, "grad_norm": 1.6672358501861957, "learning_rate": 7.994722680899822e-06, "loss": 0.4693, "step": 12424 }, { "epoch": 0.36607103398494467, "grad_norm": 1.535083182564634, "learning_rate": 7.994310866858131e-06, "loss": 0.4697, "step": 12425 }, { "epoch": 0.3661004964424083, "grad_norm": 1.4968161567212277, "learning_rate": 7.993899021143532e-06, "loss": 0.4856, "step": 12426 }, { "epoch": 0.3661299588998718, "grad_norm": 1.4209179635068985, "learning_rate": 7.993487143760382e-06, "loss": 0.4416, "step": 12427 }, { "epoch": 0.3661594213573354, "grad_norm": 1.3744536432514571, "learning_rate": 7.993075234713038e-06, "loss": 0.3795, "step": 12428 }, { "epoch": 0.36618888381479897, "grad_norm": 1.4958404316374663, "learning_rate": 7.992663294005854e-06, "loss": 0.5102, "step": 12429 }, { "epoch": 0.3662183462722626, "grad_norm": 1.5243649160909156, "learning_rate": 7.992251321643192e-06, "loss": 0.4095, "step": 12430 }, { "epoch": 0.3662478087297261, "grad_norm": 1.4468019687327327, "learning_rate": 7.991839317629407e-06, "loss": 0.3858, "step": 12431 }, { "epoch": 0.3662772711871897, "grad_norm": 1.6547722943630103, "learning_rate": 7.991427281968855e-06, "loss": 0.5865, "step": 12432 }, { "epoch": 0.3663067336446533, "grad_norm": 1.7142917761833931, "learning_rate": 7.991015214665899e-06, "loss": 0.5805, "step": 12433 }, { "epoch": 0.3663361961021169, "grad_norm": 1.4513529367295643, "learning_rate": 7.990603115724895e-06, "loss": 0.4801, "step": 12434 }, { "epoch": 0.3663656585595805, "grad_norm": 1.5155777460053872, "learning_rate": 7.990190985150203e-06, "loss": 0.4761, "step": 12435 }, { "epoch": 0.366395121017044, "grad_norm": 1.5426875707538206, "learning_rate": 7.989778822946182e-06, "loss": 0.4542, "step": 12436 }, { "epoch": 0.3664245834745076, "grad_norm": 1.7597610688234393, "learning_rate": 7.989366629117193e-06, "loss": 0.3578, "step": 12437 }, { "epoch": 0.3664540459319712, "grad_norm": 1.5845335638429994, "learning_rate": 7.988954403667593e-06, "loss": 0.5205, "step": 12438 }, { "epoch": 0.3664835083894348, "grad_norm": 1.4393250699024571, "learning_rate": 7.988542146601745e-06, "loss": 0.3916, "step": 12439 }, { "epoch": 0.3665129708468983, "grad_norm": 1.457313068452808, "learning_rate": 7.98812985792401e-06, "loss": 0.307, "step": 12440 }, { "epoch": 0.3665424333043619, "grad_norm": 1.47485094736645, "learning_rate": 7.987717537638747e-06, "loss": 0.4396, "step": 12441 }, { "epoch": 0.3665718957618255, "grad_norm": 1.352828068117164, "learning_rate": 7.987305185750318e-06, "loss": 0.3816, "step": 12442 }, { "epoch": 0.3666013582192891, "grad_norm": 1.405334984840954, "learning_rate": 7.986892802263085e-06, "loss": 0.4533, "step": 12443 }, { "epoch": 0.3666308206767526, "grad_norm": 1.755155425133403, "learning_rate": 7.98648038718141e-06, "loss": 0.5127, "step": 12444 }, { "epoch": 0.3666602831342162, "grad_norm": 1.5385554814534408, "learning_rate": 7.986067940509658e-06, "loss": 0.4889, "step": 12445 }, { "epoch": 0.3666897455916798, "grad_norm": 1.4420995596651598, "learning_rate": 7.985655462252187e-06, "loss": 0.4309, "step": 12446 }, { "epoch": 0.3667192080491434, "grad_norm": 1.7693991136914866, "learning_rate": 7.985242952413364e-06, "loss": 0.4624, "step": 12447 }, { "epoch": 0.366748670506607, "grad_norm": 1.508722451394777, "learning_rate": 7.984830410997547e-06, "loss": 0.4921, "step": 12448 }, { "epoch": 0.3667781329640705, "grad_norm": 1.4061784853951698, "learning_rate": 7.984417838009108e-06, "loss": 0.3043, "step": 12449 }, { "epoch": 0.3668075954215341, "grad_norm": 1.4608756886041587, "learning_rate": 7.984005233452406e-06, "loss": 0.5251, "step": 12450 }, { "epoch": 0.3668370578789977, "grad_norm": 1.5059626738360894, "learning_rate": 7.983592597331806e-06, "loss": 0.4323, "step": 12451 }, { "epoch": 0.3668665203364613, "grad_norm": 1.4876413475635064, "learning_rate": 7.983179929651671e-06, "loss": 0.3868, "step": 12452 }, { "epoch": 0.3668959827939248, "grad_norm": 1.436472055354292, "learning_rate": 7.98276723041637e-06, "loss": 0.3605, "step": 12453 }, { "epoch": 0.3669254452513884, "grad_norm": 1.431838186832653, "learning_rate": 7.982354499630264e-06, "loss": 0.365, "step": 12454 }, { "epoch": 0.366954907708852, "grad_norm": 1.4916587449291103, "learning_rate": 7.981941737297723e-06, "loss": 0.4454, "step": 12455 }, { "epoch": 0.3669843701663156, "grad_norm": 1.5729590603992654, "learning_rate": 7.98152894342311e-06, "loss": 0.5863, "step": 12456 }, { "epoch": 0.3670138326237791, "grad_norm": 1.5812140638985308, "learning_rate": 7.981116118010793e-06, "loss": 0.4411, "step": 12457 }, { "epoch": 0.3670432950812427, "grad_norm": 1.529110916655661, "learning_rate": 7.980703261065137e-06, "loss": 0.4669, "step": 12458 }, { "epoch": 0.36707275753870633, "grad_norm": 1.4942423664784321, "learning_rate": 7.980290372590511e-06, "loss": 0.5644, "step": 12459 }, { "epoch": 0.3671022199961699, "grad_norm": 1.518765249669039, "learning_rate": 7.979877452591283e-06, "loss": 0.4199, "step": 12460 }, { "epoch": 0.3671316824536335, "grad_norm": 1.6180370252283882, "learning_rate": 7.979464501071819e-06, "loss": 0.5046, "step": 12461 }, { "epoch": 0.367161144911097, "grad_norm": 1.549586641629654, "learning_rate": 7.979051518036486e-06, "loss": 0.4177, "step": 12462 }, { "epoch": 0.36719060736856063, "grad_norm": 1.5706072741338488, "learning_rate": 7.978638503489656e-06, "loss": 0.3635, "step": 12463 }, { "epoch": 0.3672200698260242, "grad_norm": 1.6436854039445183, "learning_rate": 7.978225457435694e-06, "loss": 0.5703, "step": 12464 }, { "epoch": 0.3672495322834878, "grad_norm": 1.431898720373637, "learning_rate": 7.97781237987897e-06, "loss": 0.451, "step": 12465 }, { "epoch": 0.3672789947409513, "grad_norm": 1.4907006338768563, "learning_rate": 7.977399270823855e-06, "loss": 0.4996, "step": 12466 }, { "epoch": 0.36730845719841493, "grad_norm": 1.4428531410177348, "learning_rate": 7.97698613027472e-06, "loss": 0.4303, "step": 12467 }, { "epoch": 0.3673379196558785, "grad_norm": 1.6972439118892795, "learning_rate": 7.976572958235931e-06, "loss": 0.5098, "step": 12468 }, { "epoch": 0.3673673821133421, "grad_norm": 1.6238220056353547, "learning_rate": 7.976159754711861e-06, "loss": 0.5089, "step": 12469 }, { "epoch": 0.3673968445708056, "grad_norm": 1.6517357760344447, "learning_rate": 7.97574651970688e-06, "loss": 0.4611, "step": 12470 }, { "epoch": 0.36742630702826923, "grad_norm": 1.509731248240717, "learning_rate": 7.97533325322536e-06, "loss": 0.4156, "step": 12471 }, { "epoch": 0.36745576948573283, "grad_norm": 1.550885463038985, "learning_rate": 7.97491995527167e-06, "loss": 0.5283, "step": 12472 }, { "epoch": 0.3674852319431964, "grad_norm": 1.6969009807271485, "learning_rate": 7.974506625850184e-06, "loss": 0.4262, "step": 12473 }, { "epoch": 0.36751469440066, "grad_norm": 1.4725161394344348, "learning_rate": 7.974093264965272e-06, "loss": 0.4706, "step": 12474 }, { "epoch": 0.3675441568581235, "grad_norm": 1.4025726370219063, "learning_rate": 7.97367987262131e-06, "loss": 0.4151, "step": 12475 }, { "epoch": 0.36757361931558713, "grad_norm": 1.5532023831683042, "learning_rate": 7.973266448822668e-06, "loss": 0.4848, "step": 12476 }, { "epoch": 0.3676030817730507, "grad_norm": 1.452038477004023, "learning_rate": 7.97285299357372e-06, "loss": 0.32, "step": 12477 }, { "epoch": 0.3676325442305143, "grad_norm": 1.4270534466696403, "learning_rate": 7.972439506878837e-06, "loss": 0.437, "step": 12478 }, { "epoch": 0.3676620066879778, "grad_norm": 1.7102746675982723, "learning_rate": 7.972025988742397e-06, "loss": 0.5825, "step": 12479 }, { "epoch": 0.36769146914544143, "grad_norm": 1.4643005112678207, "learning_rate": 7.971612439168773e-06, "loss": 0.4016, "step": 12480 }, { "epoch": 0.367720931602905, "grad_norm": 1.8104763110226252, "learning_rate": 7.971198858162334e-06, "loss": 0.4973, "step": 12481 }, { "epoch": 0.3677503940603686, "grad_norm": 1.6320979293853124, "learning_rate": 7.970785245727462e-06, "loss": 0.4497, "step": 12482 }, { "epoch": 0.3677798565178321, "grad_norm": 1.5004229084276615, "learning_rate": 7.970371601868529e-06, "loss": 0.6007, "step": 12483 }, { "epoch": 0.36780931897529573, "grad_norm": 1.5090238161923695, "learning_rate": 7.96995792658991e-06, "loss": 0.3862, "step": 12484 }, { "epoch": 0.36783878143275933, "grad_norm": 1.499913149531407, "learning_rate": 7.969544219895981e-06, "loss": 0.3998, "step": 12485 }, { "epoch": 0.3678682438902229, "grad_norm": 1.6306807373771564, "learning_rate": 7.969130481791119e-06, "loss": 0.4088, "step": 12486 }, { "epoch": 0.3678977063476865, "grad_norm": 1.6816687128316434, "learning_rate": 7.968716712279698e-06, "loss": 0.5023, "step": 12487 }, { "epoch": 0.36792716880515003, "grad_norm": 1.461844223770837, "learning_rate": 7.968302911366097e-06, "loss": 0.5442, "step": 12488 }, { "epoch": 0.36795663126261363, "grad_norm": 1.5105142458833807, "learning_rate": 7.967889079054693e-06, "loss": 0.528, "step": 12489 }, { "epoch": 0.3679860937200772, "grad_norm": 1.935962336158546, "learning_rate": 7.967475215349863e-06, "loss": 0.4803, "step": 12490 }, { "epoch": 0.3680155561775408, "grad_norm": 1.4267518694120738, "learning_rate": 7.967061320255985e-06, "loss": 0.3366, "step": 12491 }, { "epoch": 0.36804501863500433, "grad_norm": 1.4845905666639347, "learning_rate": 7.966647393777434e-06, "loss": 0.4658, "step": 12492 }, { "epoch": 0.36807448109246793, "grad_norm": 1.5134407562722827, "learning_rate": 7.966233435918593e-06, "loss": 0.4522, "step": 12493 }, { "epoch": 0.3681039435499315, "grad_norm": 1.5941404533071986, "learning_rate": 7.965819446683837e-06, "loss": 0.5058, "step": 12494 }, { "epoch": 0.3681334060073951, "grad_norm": 1.498306931715108, "learning_rate": 7.965405426077551e-06, "loss": 0.4715, "step": 12495 }, { "epoch": 0.36816286846485863, "grad_norm": 1.4079311285059077, "learning_rate": 7.964991374104107e-06, "loss": 0.3123, "step": 12496 }, { "epoch": 0.36819233092232223, "grad_norm": 1.4384852911464263, "learning_rate": 7.964577290767888e-06, "loss": 0.5135, "step": 12497 }, { "epoch": 0.36822179337978583, "grad_norm": 1.552032683890417, "learning_rate": 7.964163176073274e-06, "loss": 0.5795, "step": 12498 }, { "epoch": 0.3682512558372494, "grad_norm": 1.5347440320852233, "learning_rate": 7.963749030024643e-06, "loss": 0.4764, "step": 12499 }, { "epoch": 0.368280718294713, "grad_norm": 1.7039356072996674, "learning_rate": 7.96333485262638e-06, "loss": 0.2781, "step": 12500 }, { "epoch": 0.36831018075217653, "grad_norm": 1.44414714191236, "learning_rate": 7.962920643882865e-06, "loss": 0.4501, "step": 12501 }, { "epoch": 0.36833964320964013, "grad_norm": 1.470581677235825, "learning_rate": 7.962506403798479e-06, "loss": 0.3732, "step": 12502 }, { "epoch": 0.3683691056671037, "grad_norm": 1.4745664008428647, "learning_rate": 7.962092132377602e-06, "loss": 0.4865, "step": 12503 }, { "epoch": 0.3683985681245673, "grad_norm": 1.5105687909154248, "learning_rate": 7.961677829624617e-06, "loss": 0.4677, "step": 12504 }, { "epoch": 0.36842803058203083, "grad_norm": 1.2819828748360191, "learning_rate": 7.961263495543907e-06, "loss": 0.4189, "step": 12505 }, { "epoch": 0.36845749303949443, "grad_norm": 1.718709005896131, "learning_rate": 7.960849130139852e-06, "loss": 0.4091, "step": 12506 }, { "epoch": 0.368486955496958, "grad_norm": 1.515545814568948, "learning_rate": 7.960434733416839e-06, "loss": 0.4264, "step": 12507 }, { "epoch": 0.3685164179544216, "grad_norm": 1.5643939263974311, "learning_rate": 7.96002030537925e-06, "loss": 0.3921, "step": 12508 }, { "epoch": 0.36854588041188513, "grad_norm": 1.6254021366674078, "learning_rate": 7.95960584603147e-06, "loss": 0.5484, "step": 12509 }, { "epoch": 0.36857534286934873, "grad_norm": 1.4504621262799782, "learning_rate": 7.959191355377878e-06, "loss": 0.4854, "step": 12510 }, { "epoch": 0.36860480532681233, "grad_norm": 1.594455326979323, "learning_rate": 7.958776833422863e-06, "loss": 0.564, "step": 12511 }, { "epoch": 0.3686342677842759, "grad_norm": 1.6636081691072615, "learning_rate": 7.958362280170809e-06, "loss": 0.518, "step": 12512 }, { "epoch": 0.3686637302417395, "grad_norm": 1.397439477968731, "learning_rate": 7.957947695626098e-06, "loss": 0.3935, "step": 12513 }, { "epoch": 0.36869319269920303, "grad_norm": 1.5814518324948876, "learning_rate": 7.95753307979312e-06, "loss": 0.4682, "step": 12514 }, { "epoch": 0.36872265515666663, "grad_norm": 1.4689852718408818, "learning_rate": 7.957118432676259e-06, "loss": 0.4218, "step": 12515 }, { "epoch": 0.3687521176141302, "grad_norm": 1.7009210707072224, "learning_rate": 7.956703754279898e-06, "loss": 0.5594, "step": 12516 }, { "epoch": 0.3687815800715938, "grad_norm": 1.7058208221813151, "learning_rate": 7.956289044608428e-06, "loss": 0.5808, "step": 12517 }, { "epoch": 0.36881104252905733, "grad_norm": 1.519202617800234, "learning_rate": 7.955874303666231e-06, "loss": 0.3938, "step": 12518 }, { "epoch": 0.36884050498652093, "grad_norm": 1.4528548906974494, "learning_rate": 7.955459531457697e-06, "loss": 0.4094, "step": 12519 }, { "epoch": 0.3688699674439845, "grad_norm": 1.490503281080293, "learning_rate": 7.955044727987216e-06, "loss": 0.4362, "step": 12520 }, { "epoch": 0.3688994299014481, "grad_norm": 1.5866129205502757, "learning_rate": 7.95462989325917e-06, "loss": 0.4902, "step": 12521 }, { "epoch": 0.36892889235891163, "grad_norm": 1.716377360162308, "learning_rate": 7.95421502727795e-06, "loss": 0.5079, "step": 12522 }, { "epoch": 0.36895835481637523, "grad_norm": 1.4261337516070802, "learning_rate": 7.953800130047941e-06, "loss": 0.3755, "step": 12523 }, { "epoch": 0.36898781727383884, "grad_norm": 1.5287405490725827, "learning_rate": 7.953385201573538e-06, "loss": 0.4623, "step": 12524 }, { "epoch": 0.3690172797313024, "grad_norm": 1.7164792161623423, "learning_rate": 7.952970241859126e-06, "loss": 0.5019, "step": 12525 }, { "epoch": 0.369046742188766, "grad_norm": 1.5183441630073267, "learning_rate": 7.952555250909093e-06, "loss": 0.3745, "step": 12526 }, { "epoch": 0.36907620464622953, "grad_norm": 1.6283983732078937, "learning_rate": 7.952140228727831e-06, "loss": 0.4751, "step": 12527 }, { "epoch": 0.36910566710369314, "grad_norm": 1.7258332627737947, "learning_rate": 7.95172517531973e-06, "loss": 0.5656, "step": 12528 }, { "epoch": 0.3691351295611567, "grad_norm": 1.4150355035343904, "learning_rate": 7.951310090689179e-06, "loss": 0.3902, "step": 12529 }, { "epoch": 0.3691645920186203, "grad_norm": 1.4001396203983225, "learning_rate": 7.95089497484057e-06, "loss": 0.4109, "step": 12530 }, { "epoch": 0.36919405447608383, "grad_norm": 1.7777963761191098, "learning_rate": 7.950479827778293e-06, "loss": 0.6079, "step": 12531 }, { "epoch": 0.36922351693354744, "grad_norm": 1.400255682250202, "learning_rate": 7.95006464950674e-06, "loss": 0.3643, "step": 12532 }, { "epoch": 0.369252979391011, "grad_norm": 1.465461716549612, "learning_rate": 7.9496494400303e-06, "loss": 0.3437, "step": 12533 }, { "epoch": 0.3692824418484746, "grad_norm": 1.369611923076949, "learning_rate": 7.94923419935337e-06, "loss": 0.4341, "step": 12534 }, { "epoch": 0.36931190430593813, "grad_norm": 1.3913093643984082, "learning_rate": 7.948818927480339e-06, "loss": 0.4676, "step": 12535 }, { "epoch": 0.36934136676340173, "grad_norm": 1.7209212801412337, "learning_rate": 7.948403624415599e-06, "loss": 0.4522, "step": 12536 }, { "epoch": 0.36937082922086534, "grad_norm": 1.5805491210823361, "learning_rate": 7.947988290163545e-06, "loss": 0.5337, "step": 12537 }, { "epoch": 0.3694002916783289, "grad_norm": 1.719780918095369, "learning_rate": 7.947572924728569e-06, "loss": 0.4495, "step": 12538 }, { "epoch": 0.3694297541357925, "grad_norm": 1.4859152591497733, "learning_rate": 7.947157528115063e-06, "loss": 0.4935, "step": 12539 }, { "epoch": 0.36945921659325603, "grad_norm": 1.3970678718394012, "learning_rate": 7.946742100327425e-06, "loss": 0.4657, "step": 12540 }, { "epoch": 0.36948867905071964, "grad_norm": 1.4985990917722534, "learning_rate": 7.946326641370046e-06, "loss": 0.4688, "step": 12541 }, { "epoch": 0.3695181415081832, "grad_norm": 1.2892000562316128, "learning_rate": 7.945911151247323e-06, "loss": 0.2566, "step": 12542 }, { "epoch": 0.3695476039656468, "grad_norm": 1.7679612444966857, "learning_rate": 7.945495629963648e-06, "loss": 0.6422, "step": 12543 }, { "epoch": 0.36957706642311033, "grad_norm": 1.5618466972870089, "learning_rate": 7.94508007752342e-06, "loss": 0.5434, "step": 12544 }, { "epoch": 0.36960652888057394, "grad_norm": 1.6908783547710646, "learning_rate": 7.944664493931031e-06, "loss": 0.5941, "step": 12545 }, { "epoch": 0.3696359913380375, "grad_norm": 1.4085924792057316, "learning_rate": 7.944248879190877e-06, "loss": 0.3832, "step": 12546 }, { "epoch": 0.3696654537955011, "grad_norm": 1.2728980322317989, "learning_rate": 7.943833233307357e-06, "loss": 0.3644, "step": 12547 }, { "epoch": 0.36969491625296463, "grad_norm": 1.8229092806155702, "learning_rate": 7.943417556284867e-06, "loss": 0.5069, "step": 12548 }, { "epoch": 0.36972437871042824, "grad_norm": 1.6037058062421192, "learning_rate": 7.943001848127803e-06, "loss": 0.5164, "step": 12549 }, { "epoch": 0.36975384116789184, "grad_norm": 1.4740483952479702, "learning_rate": 7.942586108840562e-06, "loss": 0.4583, "step": 12550 }, { "epoch": 0.3697833036253554, "grad_norm": 1.2616475365837831, "learning_rate": 7.942170338427541e-06, "loss": 0.2678, "step": 12551 }, { "epoch": 0.369812766082819, "grad_norm": 1.5301956426079253, "learning_rate": 7.94175453689314e-06, "loss": 0.4341, "step": 12552 }, { "epoch": 0.36984222854028254, "grad_norm": 1.4906600753874693, "learning_rate": 7.941338704241756e-06, "loss": 0.332, "step": 12553 }, { "epoch": 0.36987169099774614, "grad_norm": 1.6362335387354188, "learning_rate": 7.940922840477787e-06, "loss": 0.4781, "step": 12554 }, { "epoch": 0.3699011534552097, "grad_norm": 1.611294809108867, "learning_rate": 7.940506945605633e-06, "loss": 0.4788, "step": 12555 }, { "epoch": 0.3699306159126733, "grad_norm": 1.605356377106888, "learning_rate": 7.940091019629693e-06, "loss": 0.5077, "step": 12556 }, { "epoch": 0.36996007837013684, "grad_norm": 1.7803988895692138, "learning_rate": 7.939675062554367e-06, "loss": 0.6922, "step": 12557 }, { "epoch": 0.36998954082760044, "grad_norm": 1.969851245067528, "learning_rate": 7.939259074384051e-06, "loss": 0.7969, "step": 12558 }, { "epoch": 0.370019003285064, "grad_norm": 1.5589403646305002, "learning_rate": 7.938843055123151e-06, "loss": 0.504, "step": 12559 }, { "epoch": 0.3700484657425276, "grad_norm": 1.5785778353867221, "learning_rate": 7.938427004776064e-06, "loss": 0.446, "step": 12560 }, { "epoch": 0.37007792819999114, "grad_norm": 1.7204916659530496, "learning_rate": 7.938010923347192e-06, "loss": 0.3888, "step": 12561 }, { "epoch": 0.37010739065745474, "grad_norm": 1.783824317480932, "learning_rate": 7.937594810840934e-06, "loss": 0.3599, "step": 12562 }, { "epoch": 0.37013685311491834, "grad_norm": 1.5235257670697167, "learning_rate": 7.937178667261694e-06, "loss": 0.5301, "step": 12563 }, { "epoch": 0.3701663155723819, "grad_norm": 1.4795749648048853, "learning_rate": 7.936762492613873e-06, "loss": 0.3659, "step": 12564 }, { "epoch": 0.3701957780298455, "grad_norm": 1.603780621233717, "learning_rate": 7.936346286901877e-06, "loss": 0.4457, "step": 12565 }, { "epoch": 0.37022524048730904, "grad_norm": 1.4033328597326156, "learning_rate": 7.935930050130102e-06, "loss": 0.41, "step": 12566 }, { "epoch": 0.37025470294477264, "grad_norm": 1.5425840771558679, "learning_rate": 7.935513782302953e-06, "loss": 0.4337, "step": 12567 }, { "epoch": 0.3702841654022362, "grad_norm": 1.6657813533007535, "learning_rate": 7.935097483424834e-06, "loss": 0.5379, "step": 12568 }, { "epoch": 0.3703136278596998, "grad_norm": 1.4328835340230206, "learning_rate": 7.93468115350015e-06, "loss": 0.4339, "step": 12569 }, { "epoch": 0.37034309031716334, "grad_norm": 1.564465609211049, "learning_rate": 7.934264792533302e-06, "loss": 0.466, "step": 12570 }, { "epoch": 0.37037255277462694, "grad_norm": 1.4814051467390752, "learning_rate": 7.933848400528696e-06, "loss": 0.3546, "step": 12571 }, { "epoch": 0.3704020152320905, "grad_norm": 1.655579389755619, "learning_rate": 7.933431977490735e-06, "loss": 0.5236, "step": 12572 }, { "epoch": 0.3704314776895541, "grad_norm": 1.4049073709214837, "learning_rate": 7.933015523423825e-06, "loss": 0.5049, "step": 12573 }, { "epoch": 0.37046094014701764, "grad_norm": 1.4471175198326607, "learning_rate": 7.932599038332369e-06, "loss": 0.4752, "step": 12574 }, { "epoch": 0.37049040260448124, "grad_norm": 1.4540921166841776, "learning_rate": 7.932182522220777e-06, "loss": 0.5453, "step": 12575 }, { "epoch": 0.37051986506194484, "grad_norm": 1.602306700952096, "learning_rate": 7.93176597509345e-06, "loss": 0.4908, "step": 12576 }, { "epoch": 0.3705493275194084, "grad_norm": 1.5514884332378338, "learning_rate": 7.931349396954795e-06, "loss": 0.4807, "step": 12577 }, { "epoch": 0.370578789976872, "grad_norm": 1.5735417995332115, "learning_rate": 7.930932787809222e-06, "loss": 0.5567, "step": 12578 }, { "epoch": 0.37060825243433554, "grad_norm": 1.7190465193870685, "learning_rate": 7.930516147661133e-06, "loss": 0.47, "step": 12579 }, { "epoch": 0.37063771489179914, "grad_norm": 1.6824850209457733, "learning_rate": 7.930099476514938e-06, "loss": 0.6527, "step": 12580 }, { "epoch": 0.3706671773492627, "grad_norm": 1.4836926125819325, "learning_rate": 7.929682774375044e-06, "loss": 0.5604, "step": 12581 }, { "epoch": 0.3706966398067263, "grad_norm": 1.6331455571047342, "learning_rate": 7.929266041245859e-06, "loss": 0.5032, "step": 12582 }, { "epoch": 0.37072610226418984, "grad_norm": 1.766615869755817, "learning_rate": 7.928849277131789e-06, "loss": 0.4278, "step": 12583 }, { "epoch": 0.37075556472165344, "grad_norm": 1.4122280694683453, "learning_rate": 7.928432482037244e-06, "loss": 0.4156, "step": 12584 }, { "epoch": 0.370785027179117, "grad_norm": 1.6596415900806283, "learning_rate": 7.928015655966635e-06, "loss": 0.4656, "step": 12585 }, { "epoch": 0.3708144896365806, "grad_norm": 1.3584976722889452, "learning_rate": 7.927598798924367e-06, "loss": 0.311, "step": 12586 }, { "epoch": 0.37084395209404414, "grad_norm": 1.4833105248338663, "learning_rate": 7.927181910914851e-06, "loss": 0.4928, "step": 12587 }, { "epoch": 0.37087341455150774, "grad_norm": 1.5442559908950138, "learning_rate": 7.926764991942498e-06, "loss": 0.4569, "step": 12588 }, { "epoch": 0.37090287700897134, "grad_norm": 1.3618272052923632, "learning_rate": 7.926348042011716e-06, "loss": 0.3874, "step": 12589 }, { "epoch": 0.3709323394664349, "grad_norm": 1.4529648175777856, "learning_rate": 7.925931061126917e-06, "loss": 0.3322, "step": 12590 }, { "epoch": 0.3709618019238985, "grad_norm": 1.7104641413380746, "learning_rate": 7.92551404929251e-06, "loss": 0.4646, "step": 12591 }, { "epoch": 0.37099126438136204, "grad_norm": 1.532130771683884, "learning_rate": 7.925097006512907e-06, "loss": 0.3949, "step": 12592 }, { "epoch": 0.37102072683882564, "grad_norm": 1.6712785010186002, "learning_rate": 7.92467993279252e-06, "loss": 0.5796, "step": 12593 }, { "epoch": 0.3710501892962892, "grad_norm": 1.6932497930136223, "learning_rate": 7.92426282813576e-06, "loss": 0.5934, "step": 12594 }, { "epoch": 0.3710796517537528, "grad_norm": 1.4354802951359267, "learning_rate": 7.923845692547038e-06, "loss": 0.3772, "step": 12595 }, { "epoch": 0.37110911421121634, "grad_norm": 1.6132328438825023, "learning_rate": 7.923428526030768e-06, "loss": 0.4994, "step": 12596 }, { "epoch": 0.37113857666867994, "grad_norm": 1.5976529196484446, "learning_rate": 7.923011328591362e-06, "loss": 0.5105, "step": 12597 }, { "epoch": 0.3711680391261435, "grad_norm": 1.4827263407550566, "learning_rate": 7.922594100233232e-06, "loss": 0.4007, "step": 12598 }, { "epoch": 0.3711975015836071, "grad_norm": 1.6061034361140436, "learning_rate": 7.922176840960793e-06, "loss": 0.4732, "step": 12599 }, { "epoch": 0.37122696404107064, "grad_norm": 1.3826986241699104, "learning_rate": 7.92175955077846e-06, "loss": 0.3926, "step": 12600 }, { "epoch": 0.37125642649853424, "grad_norm": 1.4479115603216917, "learning_rate": 7.921342229690642e-06, "loss": 0.4817, "step": 12601 }, { "epoch": 0.37128588895599784, "grad_norm": 1.4336126884221587, "learning_rate": 7.920924877701759e-06, "loss": 0.5087, "step": 12602 }, { "epoch": 0.3713153514134614, "grad_norm": 1.3978567484843385, "learning_rate": 7.92050749481622e-06, "loss": 0.4829, "step": 12603 }, { "epoch": 0.371344813870925, "grad_norm": 1.3949945235925174, "learning_rate": 7.920090081038442e-06, "loss": 0.4419, "step": 12604 }, { "epoch": 0.37137427632838854, "grad_norm": 1.511146211268452, "learning_rate": 7.919672636372844e-06, "loss": 0.3764, "step": 12605 }, { "epoch": 0.37140373878585214, "grad_norm": 1.6490961306543592, "learning_rate": 7.919255160823837e-06, "loss": 0.4577, "step": 12606 }, { "epoch": 0.3714332012433157, "grad_norm": 1.4246315464322254, "learning_rate": 7.91883765439584e-06, "loss": 0.3096, "step": 12607 }, { "epoch": 0.3714626637007793, "grad_norm": 1.579551446083959, "learning_rate": 7.918420117093265e-06, "loss": 0.5919, "step": 12608 }, { "epoch": 0.37149212615824284, "grad_norm": 1.551227171121503, "learning_rate": 7.918002548920537e-06, "loss": 0.4948, "step": 12609 }, { "epoch": 0.37152158861570644, "grad_norm": 1.6269130065806472, "learning_rate": 7.917584949882063e-06, "loss": 0.5721, "step": 12610 }, { "epoch": 0.37155105107317, "grad_norm": 1.593101909449877, "learning_rate": 7.917167319982263e-06, "loss": 0.4664, "step": 12611 }, { "epoch": 0.3715805135306336, "grad_norm": 1.5021099365766146, "learning_rate": 7.91674965922556e-06, "loss": 0.4101, "step": 12612 }, { "epoch": 0.37160997598809714, "grad_norm": 1.5365155529355337, "learning_rate": 7.916331967616367e-06, "loss": 0.4795, "step": 12613 }, { "epoch": 0.37163943844556074, "grad_norm": 1.5611908546957065, "learning_rate": 7.915914245159102e-06, "loss": 0.4114, "step": 12614 }, { "epoch": 0.37166890090302435, "grad_norm": 1.6670530527683933, "learning_rate": 7.915496491858185e-06, "loss": 0.491, "step": 12615 }, { "epoch": 0.3716983633604879, "grad_norm": 1.6469637318398331, "learning_rate": 7.915078707718036e-06, "loss": 0.5626, "step": 12616 }, { "epoch": 0.3717278258179515, "grad_norm": 1.5126475948106577, "learning_rate": 7.914660892743071e-06, "loss": 0.3455, "step": 12617 }, { "epoch": 0.37175728827541504, "grad_norm": 1.7000579112712086, "learning_rate": 7.914243046937713e-06, "loss": 0.467, "step": 12618 }, { "epoch": 0.37178675073287865, "grad_norm": 1.4816092437564037, "learning_rate": 7.913825170306378e-06, "loss": 0.485, "step": 12619 }, { "epoch": 0.3718162131903422, "grad_norm": 1.6136617630820358, "learning_rate": 7.913407262853492e-06, "loss": 0.4372, "step": 12620 }, { "epoch": 0.3718456756478058, "grad_norm": 1.4461076565311244, "learning_rate": 7.91298932458347e-06, "loss": 0.4818, "step": 12621 }, { "epoch": 0.37187513810526934, "grad_norm": 1.435961321044791, "learning_rate": 7.912571355500733e-06, "loss": 0.4897, "step": 12622 }, { "epoch": 0.37190460056273295, "grad_norm": 1.352902209260275, "learning_rate": 7.912153355609703e-06, "loss": 0.3134, "step": 12623 }, { "epoch": 0.3719340630201965, "grad_norm": 1.788608431791655, "learning_rate": 7.911735324914806e-06, "loss": 0.6127, "step": 12624 }, { "epoch": 0.3719635254776601, "grad_norm": 1.4991174143013413, "learning_rate": 7.91131726342046e-06, "loss": 0.4368, "step": 12625 }, { "epoch": 0.37199298793512364, "grad_norm": 1.6079509055278813, "learning_rate": 7.910899171131086e-06, "loss": 0.5022, "step": 12626 }, { "epoch": 0.37202245039258725, "grad_norm": 1.5344088409338332, "learning_rate": 7.910481048051106e-06, "loss": 0.5203, "step": 12627 }, { "epoch": 0.37205191285005085, "grad_norm": 1.5831691576125004, "learning_rate": 7.910062894184947e-06, "loss": 0.4515, "step": 12628 }, { "epoch": 0.3720813753075144, "grad_norm": 1.6392356416700031, "learning_rate": 7.909644709537028e-06, "loss": 0.4752, "step": 12629 }, { "epoch": 0.372110837764978, "grad_norm": 1.5862633547956955, "learning_rate": 7.909226494111776e-06, "loss": 0.4701, "step": 12630 }, { "epoch": 0.37214030022244154, "grad_norm": 1.5995102090294526, "learning_rate": 7.908808247913611e-06, "loss": 0.4938, "step": 12631 }, { "epoch": 0.37216976267990515, "grad_norm": 1.6985706854268228, "learning_rate": 7.908389970946958e-06, "loss": 0.5878, "step": 12632 }, { "epoch": 0.3721992251373687, "grad_norm": 1.4132609808759125, "learning_rate": 7.907971663216242e-06, "loss": 0.4194, "step": 12633 }, { "epoch": 0.3722286875948323, "grad_norm": 1.4931995708815564, "learning_rate": 7.907553324725891e-06, "loss": 0.4964, "step": 12634 }, { "epoch": 0.37225815005229584, "grad_norm": 1.5481524033429137, "learning_rate": 7.907134955480326e-06, "loss": 0.4442, "step": 12635 }, { "epoch": 0.37228761250975945, "grad_norm": 1.4678067432869426, "learning_rate": 7.906716555483974e-06, "loss": 0.4198, "step": 12636 }, { "epoch": 0.372317074967223, "grad_norm": 1.4483968604530604, "learning_rate": 7.906298124741257e-06, "loss": 0.4875, "step": 12637 }, { "epoch": 0.3723465374246866, "grad_norm": 1.5431934928544027, "learning_rate": 7.905879663256606e-06, "loss": 0.4964, "step": 12638 }, { "epoch": 0.37237599988215014, "grad_norm": 1.660790944301818, "learning_rate": 7.905461171034445e-06, "loss": 0.5849, "step": 12639 }, { "epoch": 0.37240546233961375, "grad_norm": 1.420683625441595, "learning_rate": 7.905042648079204e-06, "loss": 0.534, "step": 12640 }, { "epoch": 0.37243492479707735, "grad_norm": 1.8113541914012068, "learning_rate": 7.904624094395303e-06, "loss": 0.5213, "step": 12641 }, { "epoch": 0.3724643872545409, "grad_norm": 1.5204655419141684, "learning_rate": 7.904205509987177e-06, "loss": 0.5044, "step": 12642 }, { "epoch": 0.3724938497120045, "grad_norm": 1.4642815060681535, "learning_rate": 7.903786894859249e-06, "loss": 0.4577, "step": 12643 }, { "epoch": 0.37252331216946805, "grad_norm": 1.410546915164515, "learning_rate": 7.903368249015949e-06, "loss": 0.3504, "step": 12644 }, { "epoch": 0.37255277462693165, "grad_norm": 1.291402451205311, "learning_rate": 7.902949572461702e-06, "loss": 0.3867, "step": 12645 }, { "epoch": 0.3725822370843952, "grad_norm": 1.6574824570705105, "learning_rate": 7.902530865200941e-06, "loss": 0.5109, "step": 12646 }, { "epoch": 0.3726116995418588, "grad_norm": 1.3739645798552045, "learning_rate": 7.902112127238092e-06, "loss": 0.3797, "step": 12647 }, { "epoch": 0.37264116199932235, "grad_norm": 1.4481746212353812, "learning_rate": 7.901693358577587e-06, "loss": 0.3084, "step": 12648 }, { "epoch": 0.37267062445678595, "grad_norm": 1.6248731253812294, "learning_rate": 7.901274559223853e-06, "loss": 0.5957, "step": 12649 }, { "epoch": 0.3727000869142495, "grad_norm": 1.6718076030385267, "learning_rate": 7.90085572918132e-06, "loss": 0.5368, "step": 12650 }, { "epoch": 0.3727295493717131, "grad_norm": 1.453473464675253, "learning_rate": 7.90043686845442e-06, "loss": 0.5077, "step": 12651 }, { "epoch": 0.37275901182917665, "grad_norm": 1.6079589537764845, "learning_rate": 7.900017977047584e-06, "loss": 0.5089, "step": 12652 }, { "epoch": 0.37278847428664025, "grad_norm": 1.4834229043659786, "learning_rate": 7.89959905496524e-06, "loss": 0.5429, "step": 12653 }, { "epoch": 0.37281793674410385, "grad_norm": 1.514264428808135, "learning_rate": 7.89918010221182e-06, "loss": 0.4277, "step": 12654 }, { "epoch": 0.3728473992015674, "grad_norm": 1.551276862830662, "learning_rate": 7.898761118791761e-06, "loss": 0.6168, "step": 12655 }, { "epoch": 0.372876861659031, "grad_norm": 1.6227262274766698, "learning_rate": 7.898342104709485e-06, "loss": 0.5422, "step": 12656 }, { "epoch": 0.37290632411649455, "grad_norm": 1.4596047294178898, "learning_rate": 7.897923059969432e-06, "loss": 0.4694, "step": 12657 }, { "epoch": 0.37293578657395815, "grad_norm": 1.6521421830360983, "learning_rate": 7.897503984576031e-06, "loss": 0.4876, "step": 12658 }, { "epoch": 0.3729652490314217, "grad_norm": 1.6433200239900283, "learning_rate": 7.897084878533717e-06, "loss": 0.5252, "step": 12659 }, { "epoch": 0.3729947114888853, "grad_norm": 1.6388100320821217, "learning_rate": 7.896665741846923e-06, "loss": 0.4932, "step": 12660 }, { "epoch": 0.37302417394634885, "grad_norm": 1.4535076214604217, "learning_rate": 7.89624657452008e-06, "loss": 0.4954, "step": 12661 }, { "epoch": 0.37305363640381245, "grad_norm": 1.62053797926444, "learning_rate": 7.895827376557623e-06, "loss": 0.3313, "step": 12662 }, { "epoch": 0.373083098861276, "grad_norm": 1.5734887262520767, "learning_rate": 7.895408147963987e-06, "loss": 0.498, "step": 12663 }, { "epoch": 0.3731125613187396, "grad_norm": 1.5545163671820321, "learning_rate": 7.894988888743606e-06, "loss": 0.4711, "step": 12664 }, { "epoch": 0.37314202377620315, "grad_norm": 1.334525380950563, "learning_rate": 7.894569598900916e-06, "loss": 0.3999, "step": 12665 }, { "epoch": 0.37317148623366675, "grad_norm": 1.548368024797691, "learning_rate": 7.89415027844035e-06, "loss": 0.4788, "step": 12666 }, { "epoch": 0.37320094869113035, "grad_norm": 1.4404619755211348, "learning_rate": 7.893730927366344e-06, "loss": 0.3939, "step": 12667 }, { "epoch": 0.3732304111485939, "grad_norm": 1.6982281917142288, "learning_rate": 7.893311545683334e-06, "loss": 0.604, "step": 12668 }, { "epoch": 0.3732598736060575, "grad_norm": 1.5593001627432306, "learning_rate": 7.892892133395758e-06, "loss": 0.6072, "step": 12669 }, { "epoch": 0.37328933606352105, "grad_norm": 1.5824389558394822, "learning_rate": 7.892472690508048e-06, "loss": 0.5128, "step": 12670 }, { "epoch": 0.37331879852098465, "grad_norm": 1.5618142585257138, "learning_rate": 7.892053217024644e-06, "loss": 0.419, "step": 12671 }, { "epoch": 0.3733482609784482, "grad_norm": 1.4372403131831293, "learning_rate": 7.891633712949984e-06, "loss": 0.4013, "step": 12672 }, { "epoch": 0.3733777234359118, "grad_norm": 1.5395423512055368, "learning_rate": 7.891214178288502e-06, "loss": 0.3965, "step": 12673 }, { "epoch": 0.37340718589337535, "grad_norm": 1.5902783434014358, "learning_rate": 7.890794613044638e-06, "loss": 0.5595, "step": 12674 }, { "epoch": 0.37343664835083895, "grad_norm": 1.3866255544179469, "learning_rate": 7.890375017222831e-06, "loss": 0.407, "step": 12675 }, { "epoch": 0.3734661108083025, "grad_norm": 1.6146812109792619, "learning_rate": 7.889955390827516e-06, "loss": 0.4995, "step": 12676 }, { "epoch": 0.3734955732657661, "grad_norm": 1.5849502999233545, "learning_rate": 7.889535733863135e-06, "loss": 0.4682, "step": 12677 }, { "epoch": 0.37352503572322965, "grad_norm": 1.5840001357151345, "learning_rate": 7.889116046334126e-06, "loss": 0.5166, "step": 12678 }, { "epoch": 0.37355449818069325, "grad_norm": 1.7147673100907164, "learning_rate": 7.888696328244927e-06, "loss": 0.5405, "step": 12679 }, { "epoch": 0.37358396063815685, "grad_norm": 1.698981052071265, "learning_rate": 7.88827657959998e-06, "loss": 0.4954, "step": 12680 }, { "epoch": 0.3736134230956204, "grad_norm": 1.660830389321836, "learning_rate": 7.887856800403722e-06, "loss": 0.4191, "step": 12681 }, { "epoch": 0.373642885553084, "grad_norm": 1.39114317137667, "learning_rate": 7.887436990660596e-06, "loss": 0.4495, "step": 12682 }, { "epoch": 0.37367234801054755, "grad_norm": 1.65210111617278, "learning_rate": 7.88701715037504e-06, "loss": 0.4217, "step": 12683 }, { "epoch": 0.37370181046801115, "grad_norm": 1.5732580254496769, "learning_rate": 7.886597279551498e-06, "loss": 0.4956, "step": 12684 }, { "epoch": 0.3737312729254747, "grad_norm": 1.7914243716185465, "learning_rate": 7.886177378194408e-06, "loss": 0.6981, "step": 12685 }, { "epoch": 0.3737607353829383, "grad_norm": 1.3642112681897387, "learning_rate": 7.885757446308215e-06, "loss": 0.3821, "step": 12686 }, { "epoch": 0.37379019784040185, "grad_norm": 1.4710773940799955, "learning_rate": 7.885337483897358e-06, "loss": 0.4772, "step": 12687 }, { "epoch": 0.37381966029786545, "grad_norm": 1.7210270173510176, "learning_rate": 7.88491749096628e-06, "loss": 0.2974, "step": 12688 }, { "epoch": 0.373849122755329, "grad_norm": 1.4320160314254817, "learning_rate": 7.884497467519426e-06, "loss": 0.3678, "step": 12689 }, { "epoch": 0.3738785852127926, "grad_norm": 1.6196425437263278, "learning_rate": 7.884077413561234e-06, "loss": 0.5237, "step": 12690 }, { "epoch": 0.37390804767025615, "grad_norm": 1.6395157221591796, "learning_rate": 7.883657329096152e-06, "loss": 0.5608, "step": 12691 }, { "epoch": 0.37393751012771975, "grad_norm": 1.365127761261177, "learning_rate": 7.883237214128622e-06, "loss": 0.4669, "step": 12692 }, { "epoch": 0.37396697258518335, "grad_norm": 1.6357396379297473, "learning_rate": 7.882817068663086e-06, "loss": 0.4502, "step": 12693 }, { "epoch": 0.3739964350426469, "grad_norm": 1.817651211074136, "learning_rate": 7.882396892703991e-06, "loss": 0.5136, "step": 12694 }, { "epoch": 0.3740258975001105, "grad_norm": 1.5633441184027657, "learning_rate": 7.88197668625578e-06, "loss": 0.4875, "step": 12695 }, { "epoch": 0.37405535995757405, "grad_norm": 1.545990251286221, "learning_rate": 7.881556449322898e-06, "loss": 0.6035, "step": 12696 }, { "epoch": 0.37408482241503765, "grad_norm": 1.5009052833150895, "learning_rate": 7.881136181909788e-06, "loss": 0.4765, "step": 12697 }, { "epoch": 0.3741142848725012, "grad_norm": 1.429892660024359, "learning_rate": 7.880715884020899e-06, "loss": 0.49, "step": 12698 }, { "epoch": 0.3741437473299648, "grad_norm": 1.5418810847189357, "learning_rate": 7.880295555660677e-06, "loss": 0.4746, "step": 12699 }, { "epoch": 0.37417320978742835, "grad_norm": 1.4943527259796823, "learning_rate": 7.879875196833564e-06, "loss": 0.486, "step": 12700 }, { "epoch": 0.37420267224489195, "grad_norm": 1.5445482873467966, "learning_rate": 7.87945480754401e-06, "loss": 0.4211, "step": 12701 }, { "epoch": 0.3742321347023555, "grad_norm": 1.7085766166988987, "learning_rate": 7.879034387796459e-06, "loss": 0.4456, "step": 12702 }, { "epoch": 0.3742615971598191, "grad_norm": 1.7610492848103747, "learning_rate": 7.87861393759536e-06, "loss": 0.512, "step": 12703 }, { "epoch": 0.37429105961728265, "grad_norm": 1.3884219462047593, "learning_rate": 7.878193456945162e-06, "loss": 0.38, "step": 12704 }, { "epoch": 0.37432052207474625, "grad_norm": 1.415547487529204, "learning_rate": 7.87777294585031e-06, "loss": 0.3482, "step": 12705 }, { "epoch": 0.37434998453220986, "grad_norm": 1.464840777052374, "learning_rate": 7.877352404315252e-06, "loss": 0.3374, "step": 12706 }, { "epoch": 0.3743794469896734, "grad_norm": 1.62144009528118, "learning_rate": 7.876931832344438e-06, "loss": 0.5229, "step": 12707 }, { "epoch": 0.374408909447137, "grad_norm": 1.386954278461853, "learning_rate": 7.876511229942315e-06, "loss": 0.3491, "step": 12708 }, { "epoch": 0.37443837190460055, "grad_norm": 1.4323063699071539, "learning_rate": 7.876090597113333e-06, "loss": 0.3966, "step": 12709 }, { "epoch": 0.37446783436206416, "grad_norm": 1.3551400447158473, "learning_rate": 7.875669933861942e-06, "loss": 0.4182, "step": 12710 }, { "epoch": 0.3744972968195277, "grad_norm": 1.7501611087132143, "learning_rate": 7.87524924019259e-06, "loss": 0.4869, "step": 12711 }, { "epoch": 0.3745267592769913, "grad_norm": 1.4128852167063435, "learning_rate": 7.874828516109729e-06, "loss": 0.4491, "step": 12712 }, { "epoch": 0.37455622173445485, "grad_norm": 1.5336787328636177, "learning_rate": 7.874407761617806e-06, "loss": 0.49, "step": 12713 }, { "epoch": 0.37458568419191846, "grad_norm": 1.511517906064124, "learning_rate": 7.873986976721277e-06, "loss": 0.4657, "step": 12714 }, { "epoch": 0.374615146649382, "grad_norm": 1.4390490945080203, "learning_rate": 7.873566161424587e-06, "loss": 0.6114, "step": 12715 }, { "epoch": 0.3746446091068456, "grad_norm": 1.5051836042068596, "learning_rate": 7.873145315732189e-06, "loss": 0.5949, "step": 12716 }, { "epoch": 0.37467407156430915, "grad_norm": 1.506333227807046, "learning_rate": 7.872724439648539e-06, "loss": 0.5409, "step": 12717 }, { "epoch": 0.37470353402177276, "grad_norm": 1.6230620045451243, "learning_rate": 7.872303533178081e-06, "loss": 0.4812, "step": 12718 }, { "epoch": 0.37473299647923636, "grad_norm": 1.5247526358514432, "learning_rate": 7.871882596325275e-06, "loss": 0.6209, "step": 12719 }, { "epoch": 0.3747624589366999, "grad_norm": 1.8400530197749296, "learning_rate": 7.871461629094569e-06, "loss": 0.6105, "step": 12720 }, { "epoch": 0.3747919213941635, "grad_norm": 1.507625793241401, "learning_rate": 7.871040631490417e-06, "loss": 0.4922, "step": 12721 }, { "epoch": 0.37482138385162705, "grad_norm": 1.8469588515456916, "learning_rate": 7.870619603517271e-06, "loss": 0.5955, "step": 12722 }, { "epoch": 0.37485084630909066, "grad_norm": 1.6052322862600978, "learning_rate": 7.870198545179588e-06, "loss": 0.6321, "step": 12723 }, { "epoch": 0.3748803087665542, "grad_norm": 1.7488647039350707, "learning_rate": 7.869777456481818e-06, "loss": 0.6593, "step": 12724 }, { "epoch": 0.3749097712240178, "grad_norm": 1.3651482046374608, "learning_rate": 7.869356337428418e-06, "loss": 0.4094, "step": 12725 }, { "epoch": 0.37493923368148135, "grad_norm": 1.423891795972888, "learning_rate": 7.86893518802384e-06, "loss": 0.4294, "step": 12726 }, { "epoch": 0.37496869613894496, "grad_norm": 1.7360712978054922, "learning_rate": 7.868514008272538e-06, "loss": 0.4238, "step": 12727 }, { "epoch": 0.3749981585964085, "grad_norm": 1.5250412347892408, "learning_rate": 7.868092798178972e-06, "loss": 0.6118, "step": 12728 }, { "epoch": 0.3750276210538721, "grad_norm": 1.3323087850953903, "learning_rate": 7.867671557747594e-06, "loss": 0.4549, "step": 12729 }, { "epoch": 0.37505708351133565, "grad_norm": 1.4787472996389897, "learning_rate": 7.86725028698286e-06, "loss": 0.4118, "step": 12730 }, { "epoch": 0.37508654596879926, "grad_norm": 1.466409790206313, "learning_rate": 7.866828985889226e-06, "loss": 0.5091, "step": 12731 }, { "epoch": 0.37511600842626286, "grad_norm": 1.2876881058482799, "learning_rate": 7.86640765447115e-06, "loss": 0.4314, "step": 12732 }, { "epoch": 0.3751454708837264, "grad_norm": 1.4756785218058746, "learning_rate": 7.865986292733085e-06, "loss": 0.5561, "step": 12733 }, { "epoch": 0.37517493334119, "grad_norm": 1.4182863249569428, "learning_rate": 7.865564900679492e-06, "loss": 0.5261, "step": 12734 }, { "epoch": 0.37520439579865356, "grad_norm": 1.384162929848529, "learning_rate": 7.865143478314826e-06, "loss": 0.505, "step": 12735 }, { "epoch": 0.37523385825611716, "grad_norm": 1.4754664157056567, "learning_rate": 7.864722025643546e-06, "loss": 0.4504, "step": 12736 }, { "epoch": 0.3752633207135807, "grad_norm": 1.7204128813723998, "learning_rate": 7.86430054267011e-06, "loss": 0.637, "step": 12737 }, { "epoch": 0.3752927831710443, "grad_norm": 1.5412877638806795, "learning_rate": 7.863879029398972e-06, "loss": 0.4647, "step": 12738 }, { "epoch": 0.37532224562850786, "grad_norm": 1.3504423046084688, "learning_rate": 7.863457485834601e-06, "loss": 0.3772, "step": 12739 }, { "epoch": 0.37535170808597146, "grad_norm": 1.5301395860437337, "learning_rate": 7.863035911981445e-06, "loss": 0.4485, "step": 12740 }, { "epoch": 0.375381170543435, "grad_norm": 1.360910400564295, "learning_rate": 7.862614307843967e-06, "loss": 0.3655, "step": 12741 }, { "epoch": 0.3754106330008986, "grad_norm": 1.4023913861894775, "learning_rate": 7.86219267342663e-06, "loss": 0.3211, "step": 12742 }, { "epoch": 0.37544009545836216, "grad_norm": 1.350078563839753, "learning_rate": 7.86177100873389e-06, "loss": 0.3705, "step": 12743 }, { "epoch": 0.37546955791582576, "grad_norm": 1.3501535573532124, "learning_rate": 7.861349313770207e-06, "loss": 0.4244, "step": 12744 }, { "epoch": 0.37549902037328936, "grad_norm": 1.7135284007757707, "learning_rate": 7.860927588540045e-06, "loss": 0.637, "step": 12745 }, { "epoch": 0.3755284828307529, "grad_norm": 1.4673334363042654, "learning_rate": 7.860505833047863e-06, "loss": 0.4664, "step": 12746 }, { "epoch": 0.3755579452882165, "grad_norm": 1.5366151768204435, "learning_rate": 7.86008404729812e-06, "loss": 0.4437, "step": 12747 }, { "epoch": 0.37558740774568006, "grad_norm": 1.5881919331517291, "learning_rate": 7.859662231295282e-06, "loss": 0.4594, "step": 12748 }, { "epoch": 0.37561687020314366, "grad_norm": 1.5121823481931846, "learning_rate": 7.859240385043807e-06, "loss": 0.4413, "step": 12749 }, { "epoch": 0.3756463326606072, "grad_norm": 1.6258051840337604, "learning_rate": 7.858818508548159e-06, "loss": 0.543, "step": 12750 }, { "epoch": 0.3756757951180708, "grad_norm": 1.3902941440431968, "learning_rate": 7.858396601812799e-06, "loss": 0.3769, "step": 12751 }, { "epoch": 0.37570525757553436, "grad_norm": 1.4339279948605808, "learning_rate": 7.857974664842193e-06, "loss": 0.4489, "step": 12752 }, { "epoch": 0.37573472003299796, "grad_norm": 1.6053842561677754, "learning_rate": 7.8575526976408e-06, "loss": 0.5461, "step": 12753 }, { "epoch": 0.3757641824904615, "grad_norm": 1.468836410019155, "learning_rate": 7.857130700213088e-06, "loss": 0.456, "step": 12754 }, { "epoch": 0.3757936449479251, "grad_norm": 1.567843029457192, "learning_rate": 7.856708672563516e-06, "loss": 0.4088, "step": 12755 }, { "epoch": 0.37582310740538866, "grad_norm": 1.4548665411020605, "learning_rate": 7.856286614696551e-06, "loss": 0.3929, "step": 12756 }, { "epoch": 0.37585256986285226, "grad_norm": 1.6997767582551884, "learning_rate": 7.855864526616655e-06, "loss": 0.6446, "step": 12757 }, { "epoch": 0.37588203232031586, "grad_norm": 1.5825482770330068, "learning_rate": 7.855442408328296e-06, "loss": 0.4482, "step": 12758 }, { "epoch": 0.3759114947777794, "grad_norm": 1.4602173979907367, "learning_rate": 7.855020259835939e-06, "loss": 0.4519, "step": 12759 }, { "epoch": 0.375940957235243, "grad_norm": 1.4554706966504527, "learning_rate": 7.854598081144044e-06, "loss": 0.4226, "step": 12760 }, { "epoch": 0.37597041969270656, "grad_norm": 1.3867992513006013, "learning_rate": 7.854175872257084e-06, "loss": 0.4651, "step": 12761 }, { "epoch": 0.37599988215017016, "grad_norm": 1.4334507466962216, "learning_rate": 7.85375363317952e-06, "loss": 0.5681, "step": 12762 }, { "epoch": 0.3760293446076337, "grad_norm": 1.668768969538795, "learning_rate": 7.85333136391582e-06, "loss": 0.572, "step": 12763 }, { "epoch": 0.3760588070650973, "grad_norm": 1.6689959058394688, "learning_rate": 7.85290906447045e-06, "loss": 0.4785, "step": 12764 }, { "epoch": 0.37608826952256086, "grad_norm": 1.4950288835226901, "learning_rate": 7.852486734847877e-06, "loss": 0.3902, "step": 12765 }, { "epoch": 0.37611773198002446, "grad_norm": 1.4042195666977857, "learning_rate": 7.852064375052568e-06, "loss": 0.4033, "step": 12766 }, { "epoch": 0.376147194437488, "grad_norm": 1.663899229958971, "learning_rate": 7.851641985088994e-06, "loss": 0.4934, "step": 12767 }, { "epoch": 0.3761766568949516, "grad_norm": 1.6321001361360423, "learning_rate": 7.851219564961618e-06, "loss": 0.5505, "step": 12768 }, { "epoch": 0.37620611935241516, "grad_norm": 1.8660918345289976, "learning_rate": 7.850797114674911e-06, "loss": 0.4336, "step": 12769 }, { "epoch": 0.37623558180987876, "grad_norm": 1.550682691652126, "learning_rate": 7.85037463423334e-06, "loss": 0.5605, "step": 12770 }, { "epoch": 0.37626504426734236, "grad_norm": 1.486916013540989, "learning_rate": 7.849952123641375e-06, "loss": 0.5059, "step": 12771 }, { "epoch": 0.3762945067248059, "grad_norm": 1.5373203334461, "learning_rate": 7.849529582903486e-06, "loss": 0.5229, "step": 12772 }, { "epoch": 0.3763239691822695, "grad_norm": 1.5631425100250351, "learning_rate": 7.849107012024143e-06, "loss": 0.4395, "step": 12773 }, { "epoch": 0.37635343163973306, "grad_norm": 1.5089032490232306, "learning_rate": 7.848684411007812e-06, "loss": 0.554, "step": 12774 }, { "epoch": 0.37638289409719666, "grad_norm": 1.5621666854157596, "learning_rate": 7.848261779858966e-06, "loss": 0.5805, "step": 12775 }, { "epoch": 0.3764123565546602, "grad_norm": 1.4792611782341594, "learning_rate": 7.847839118582075e-06, "loss": 0.4693, "step": 12776 }, { "epoch": 0.3764418190121238, "grad_norm": 1.6121834845729908, "learning_rate": 7.847416427181608e-06, "loss": 0.6148, "step": 12777 }, { "epoch": 0.37647128146958736, "grad_norm": 1.581854631288118, "learning_rate": 7.846993705662041e-06, "loss": 0.4335, "step": 12778 }, { "epoch": 0.37650074392705096, "grad_norm": 1.4606227020281914, "learning_rate": 7.846570954027842e-06, "loss": 0.4715, "step": 12779 }, { "epoch": 0.3765302063845145, "grad_norm": 1.8764997794995333, "learning_rate": 7.84614817228348e-06, "loss": 0.5218, "step": 12780 }, { "epoch": 0.3765596688419781, "grad_norm": 1.6574006814641995, "learning_rate": 7.84572536043343e-06, "loss": 0.5865, "step": 12781 }, { "epoch": 0.37658913129944166, "grad_norm": 1.3110392055133615, "learning_rate": 7.845302518482167e-06, "loss": 0.3451, "step": 12782 }, { "epoch": 0.37661859375690526, "grad_norm": 1.669449377481449, "learning_rate": 7.84487964643416e-06, "loss": 0.5298, "step": 12783 }, { "epoch": 0.37664805621436886, "grad_norm": 1.751287729752346, "learning_rate": 7.844456744293884e-06, "loss": 0.4308, "step": 12784 }, { "epoch": 0.3766775186718324, "grad_norm": 1.4803493044621165, "learning_rate": 7.84403381206581e-06, "loss": 0.4187, "step": 12785 }, { "epoch": 0.376706981129296, "grad_norm": 1.470318771256521, "learning_rate": 7.843610849754413e-06, "loss": 0.496, "step": 12786 }, { "epoch": 0.37673644358675956, "grad_norm": 1.4361426732594882, "learning_rate": 7.843187857364168e-06, "loss": 0.3931, "step": 12787 }, { "epoch": 0.37676590604422316, "grad_norm": 1.5240798504484374, "learning_rate": 7.842764834899548e-06, "loss": 0.4991, "step": 12788 }, { "epoch": 0.3767953685016867, "grad_norm": 1.3924561463595524, "learning_rate": 7.842341782365028e-06, "loss": 0.4723, "step": 12789 }, { "epoch": 0.3768248309591503, "grad_norm": 1.677776648581546, "learning_rate": 7.841918699765083e-06, "loss": 0.2893, "step": 12790 }, { "epoch": 0.37685429341661386, "grad_norm": 1.5957367841122911, "learning_rate": 7.841495587104188e-06, "loss": 0.5819, "step": 12791 }, { "epoch": 0.37688375587407746, "grad_norm": 1.5428919859355943, "learning_rate": 7.841072444386819e-06, "loss": 0.4493, "step": 12792 }, { "epoch": 0.376913218331541, "grad_norm": 1.561223330210573, "learning_rate": 7.84064927161745e-06, "loss": 0.4442, "step": 12793 }, { "epoch": 0.3769426807890046, "grad_norm": 1.7393980059790344, "learning_rate": 7.84022606880056e-06, "loss": 0.4266, "step": 12794 }, { "epoch": 0.37697214324646816, "grad_norm": 1.8506176656742885, "learning_rate": 7.839802835940625e-06, "loss": 0.5985, "step": 12795 }, { "epoch": 0.37700160570393176, "grad_norm": 1.4482786709066169, "learning_rate": 7.839379573042121e-06, "loss": 0.4657, "step": 12796 }, { "epoch": 0.37703106816139537, "grad_norm": 1.456653277862696, "learning_rate": 7.838956280109524e-06, "loss": 0.4001, "step": 12797 }, { "epoch": 0.3770605306188589, "grad_norm": 1.6129725322500297, "learning_rate": 7.838532957147312e-06, "loss": 0.4457, "step": 12798 }, { "epoch": 0.3770899930763225, "grad_norm": 1.6616921547035561, "learning_rate": 7.838109604159966e-06, "loss": 0.5393, "step": 12799 }, { "epoch": 0.37711945553378606, "grad_norm": 1.5033676333132573, "learning_rate": 7.837686221151959e-06, "loss": 0.5546, "step": 12800 }, { "epoch": 0.37714891799124967, "grad_norm": 1.5410272404757839, "learning_rate": 7.837262808127773e-06, "loss": 0.5256, "step": 12801 }, { "epoch": 0.3771783804487132, "grad_norm": 1.5706462746993137, "learning_rate": 7.836839365091886e-06, "loss": 0.4865, "step": 12802 }, { "epoch": 0.3772078429061768, "grad_norm": 1.777250561046209, "learning_rate": 7.836415892048777e-06, "loss": 0.4766, "step": 12803 }, { "epoch": 0.37723730536364036, "grad_norm": 1.4690887542525826, "learning_rate": 7.835992389002924e-06, "loss": 0.468, "step": 12804 }, { "epoch": 0.37726676782110397, "grad_norm": 1.496331376083241, "learning_rate": 7.835568855958808e-06, "loss": 0.3334, "step": 12805 }, { "epoch": 0.3772962302785675, "grad_norm": 1.3409211095325828, "learning_rate": 7.835145292920909e-06, "loss": 0.3297, "step": 12806 }, { "epoch": 0.3773256927360311, "grad_norm": 1.468298757572939, "learning_rate": 7.834721699893708e-06, "loss": 0.4254, "step": 12807 }, { "epoch": 0.37735515519349466, "grad_norm": 1.3858054152947379, "learning_rate": 7.834298076881684e-06, "loss": 0.4112, "step": 12808 }, { "epoch": 0.37738461765095827, "grad_norm": 1.3743390649970233, "learning_rate": 7.833874423889319e-06, "loss": 0.3785, "step": 12809 }, { "epoch": 0.37741408010842187, "grad_norm": 1.6098150074464292, "learning_rate": 7.833450740921093e-06, "loss": 0.4639, "step": 12810 }, { "epoch": 0.3774435425658854, "grad_norm": 1.4538175859212377, "learning_rate": 7.83302702798149e-06, "loss": 0.4053, "step": 12811 }, { "epoch": 0.377473005023349, "grad_norm": 1.4588783863195447, "learning_rate": 7.83260328507499e-06, "loss": 0.4734, "step": 12812 }, { "epoch": 0.37750246748081256, "grad_norm": 1.5242358805078127, "learning_rate": 7.832179512206075e-06, "loss": 0.4412, "step": 12813 }, { "epoch": 0.37753192993827617, "grad_norm": 1.316903630210799, "learning_rate": 7.83175570937923e-06, "loss": 0.3927, "step": 12814 }, { "epoch": 0.3775613923957397, "grad_norm": 1.3797633619848961, "learning_rate": 7.831331876598936e-06, "loss": 0.4336, "step": 12815 }, { "epoch": 0.3775908548532033, "grad_norm": 1.5561780836177397, "learning_rate": 7.830908013869673e-06, "loss": 0.6216, "step": 12816 }, { "epoch": 0.37762031731066686, "grad_norm": 1.5486407669722329, "learning_rate": 7.83048412119593e-06, "loss": 0.6172, "step": 12817 }, { "epoch": 0.37764977976813047, "grad_norm": 1.5494569992727432, "learning_rate": 7.83006019858219e-06, "loss": 0.4075, "step": 12818 }, { "epoch": 0.377679242225594, "grad_norm": 1.6002638236558993, "learning_rate": 7.829636246032934e-06, "loss": 0.6489, "step": 12819 }, { "epoch": 0.3777087046830576, "grad_norm": 1.3577984283154523, "learning_rate": 7.829212263552647e-06, "loss": 0.4726, "step": 12820 }, { "epoch": 0.37773816714052116, "grad_norm": 1.5434045419298086, "learning_rate": 7.828788251145819e-06, "loss": 0.467, "step": 12821 }, { "epoch": 0.37776762959798477, "grad_norm": 1.3292770415488193, "learning_rate": 7.828364208816928e-06, "loss": 0.4791, "step": 12822 }, { "epoch": 0.37779709205544837, "grad_norm": 1.6005686005986692, "learning_rate": 7.827940136570462e-06, "loss": 0.5039, "step": 12823 }, { "epoch": 0.3778265545129119, "grad_norm": 1.5063405970955033, "learning_rate": 7.827516034410908e-06, "loss": 0.4418, "step": 12824 }, { "epoch": 0.3778560169703755, "grad_norm": 1.4667037149632194, "learning_rate": 7.82709190234275e-06, "loss": 0.4287, "step": 12825 }, { "epoch": 0.37788547942783907, "grad_norm": 1.4588415683627365, "learning_rate": 7.826667740370476e-06, "loss": 0.5359, "step": 12826 }, { "epoch": 0.37791494188530267, "grad_norm": 1.3857060448189178, "learning_rate": 7.826243548498573e-06, "loss": 0.4233, "step": 12827 }, { "epoch": 0.3779444043427662, "grad_norm": 1.353487916997114, "learning_rate": 7.825819326731527e-06, "loss": 0.4458, "step": 12828 }, { "epoch": 0.3779738668002298, "grad_norm": 1.538136944135341, "learning_rate": 7.825395075073825e-06, "loss": 0.4681, "step": 12829 }, { "epoch": 0.37800332925769337, "grad_norm": 1.4728634226827209, "learning_rate": 7.824970793529955e-06, "loss": 0.3612, "step": 12830 }, { "epoch": 0.37803279171515697, "grad_norm": 1.7032083956744415, "learning_rate": 7.824546482104404e-06, "loss": 0.4582, "step": 12831 }, { "epoch": 0.3780622541726205, "grad_norm": 1.3321027868012936, "learning_rate": 7.824122140801662e-06, "loss": 0.433, "step": 12832 }, { "epoch": 0.3780917166300841, "grad_norm": 1.6398938067615088, "learning_rate": 7.823697769626217e-06, "loss": 0.5533, "step": 12833 }, { "epoch": 0.37812117908754767, "grad_norm": 1.4118215620455319, "learning_rate": 7.823273368582558e-06, "loss": 0.535, "step": 12834 }, { "epoch": 0.37815064154501127, "grad_norm": 1.4477464426226336, "learning_rate": 7.822848937675171e-06, "loss": 0.4612, "step": 12835 }, { "epoch": 0.37818010400247487, "grad_norm": 1.443436175654326, "learning_rate": 7.82242447690855e-06, "loss": 0.5274, "step": 12836 }, { "epoch": 0.3782095664599384, "grad_norm": 1.676950815986013, "learning_rate": 7.821999986287183e-06, "loss": 0.4901, "step": 12837 }, { "epoch": 0.378239028917402, "grad_norm": 1.677709247540936, "learning_rate": 7.82157546581556e-06, "loss": 0.4906, "step": 12838 }, { "epoch": 0.37826849137486557, "grad_norm": 1.4885674209829358, "learning_rate": 7.821150915498172e-06, "loss": 0.5461, "step": 12839 }, { "epoch": 0.37829795383232917, "grad_norm": 1.450692869443998, "learning_rate": 7.820726335339511e-06, "loss": 0.3563, "step": 12840 }, { "epoch": 0.3783274162897927, "grad_norm": 1.4831000958729734, "learning_rate": 7.820301725344064e-06, "loss": 0.5624, "step": 12841 }, { "epoch": 0.3783568787472563, "grad_norm": 1.4291036179533347, "learning_rate": 7.819877085516326e-06, "loss": 0.4731, "step": 12842 }, { "epoch": 0.37838634120471987, "grad_norm": 1.4707133425636323, "learning_rate": 7.819452415860787e-06, "loss": 0.562, "step": 12843 }, { "epoch": 0.37841580366218347, "grad_norm": 1.6518780248864453, "learning_rate": 7.81902771638194e-06, "loss": 0.4224, "step": 12844 }, { "epoch": 0.378445266119647, "grad_norm": 1.3616259630741696, "learning_rate": 7.818602987084275e-06, "loss": 0.4546, "step": 12845 }, { "epoch": 0.3784747285771106, "grad_norm": 1.4584699120841151, "learning_rate": 7.81817822797229e-06, "loss": 0.4375, "step": 12846 }, { "epoch": 0.37850419103457417, "grad_norm": 1.660693932377996, "learning_rate": 7.817753439050473e-06, "loss": 0.5313, "step": 12847 }, { "epoch": 0.37853365349203777, "grad_norm": 1.4872790750702298, "learning_rate": 7.817328620323317e-06, "loss": 0.4892, "step": 12848 }, { "epoch": 0.37856311594950137, "grad_norm": 1.4290955679125872, "learning_rate": 7.816903771795321e-06, "loss": 0.4021, "step": 12849 }, { "epoch": 0.3785925784069649, "grad_norm": 1.6589600489540977, "learning_rate": 7.816478893470973e-06, "loss": 0.5239, "step": 12850 }, { "epoch": 0.3786220408644285, "grad_norm": 1.586897756428099, "learning_rate": 7.81605398535477e-06, "loss": 0.3826, "step": 12851 }, { "epoch": 0.37865150332189207, "grad_norm": 1.5389677050555282, "learning_rate": 7.815629047451205e-06, "loss": 0.5337, "step": 12852 }, { "epoch": 0.37868096577935567, "grad_norm": 1.569601463252104, "learning_rate": 7.815204079764776e-06, "loss": 0.4662, "step": 12853 }, { "epoch": 0.3787104282368192, "grad_norm": 1.6142163931595754, "learning_rate": 7.814779082299977e-06, "loss": 0.5718, "step": 12854 }, { "epoch": 0.3787398906942828, "grad_norm": 1.4660647734038459, "learning_rate": 7.8143540550613e-06, "loss": 0.4885, "step": 12855 }, { "epoch": 0.37876935315174637, "grad_norm": 1.7474763652741159, "learning_rate": 7.813928998053246e-06, "loss": 0.6226, "step": 12856 }, { "epoch": 0.37879881560920997, "grad_norm": 1.3804059916494724, "learning_rate": 7.813503911280307e-06, "loss": 0.4455, "step": 12857 }, { "epoch": 0.3788282780666735, "grad_norm": 1.405601389043015, "learning_rate": 7.81307879474698e-06, "loss": 0.5094, "step": 12858 }, { "epoch": 0.3788577405241371, "grad_norm": 1.3701012273973092, "learning_rate": 7.812653648457766e-06, "loss": 0.3727, "step": 12859 }, { "epoch": 0.37888720298160067, "grad_norm": 1.4565256830663222, "learning_rate": 7.812228472417157e-06, "loss": 0.4193, "step": 12860 }, { "epoch": 0.37891666543906427, "grad_norm": 1.655311874095817, "learning_rate": 7.811803266629651e-06, "loss": 0.5794, "step": 12861 }, { "epoch": 0.3789461278965279, "grad_norm": 1.6403471623669987, "learning_rate": 7.81137803109975e-06, "loss": 0.391, "step": 12862 }, { "epoch": 0.3789755903539914, "grad_norm": 1.7409426675416333, "learning_rate": 7.810952765831948e-06, "loss": 0.5585, "step": 12863 }, { "epoch": 0.379005052811455, "grad_norm": 1.573475602892007, "learning_rate": 7.810527470830743e-06, "loss": 0.4236, "step": 12864 }, { "epoch": 0.37903451526891857, "grad_norm": 1.6705546860940055, "learning_rate": 7.810102146100636e-06, "loss": 0.6445, "step": 12865 }, { "epoch": 0.3790639777263822, "grad_norm": 1.4980779523425467, "learning_rate": 7.809676791646124e-06, "loss": 0.4839, "step": 12866 }, { "epoch": 0.3790934401838457, "grad_norm": 1.484718084715284, "learning_rate": 7.809251407471707e-06, "loss": 0.5392, "step": 12867 }, { "epoch": 0.3791229026413093, "grad_norm": 1.6539545138673601, "learning_rate": 7.808825993581887e-06, "loss": 0.506, "step": 12868 }, { "epoch": 0.37915236509877287, "grad_norm": 1.5794437267614596, "learning_rate": 7.80840054998116e-06, "loss": 0.4434, "step": 12869 }, { "epoch": 0.3791818275562365, "grad_norm": 1.7426443366750963, "learning_rate": 7.807975076674028e-06, "loss": 0.3019, "step": 12870 }, { "epoch": 0.3792112900137, "grad_norm": 1.600440128535683, "learning_rate": 7.807549573664992e-06, "loss": 0.5016, "step": 12871 }, { "epoch": 0.3792407524711636, "grad_norm": 1.6452722842967438, "learning_rate": 7.807124040958552e-06, "loss": 0.6623, "step": 12872 }, { "epoch": 0.37927021492862717, "grad_norm": 1.5703229984096894, "learning_rate": 7.806698478559208e-06, "loss": 0.6187, "step": 12873 }, { "epoch": 0.37929967738609077, "grad_norm": 1.6979011673099755, "learning_rate": 7.806272886471464e-06, "loss": 0.5943, "step": 12874 }, { "epoch": 0.3793291398435544, "grad_norm": 1.5285059185041008, "learning_rate": 7.805847264699821e-06, "loss": 0.4133, "step": 12875 }, { "epoch": 0.3793586023010179, "grad_norm": 1.4113499380920707, "learning_rate": 7.80542161324878e-06, "loss": 0.4362, "step": 12876 }, { "epoch": 0.3793880647584815, "grad_norm": 1.536263382148428, "learning_rate": 7.804995932122845e-06, "loss": 0.5491, "step": 12877 }, { "epoch": 0.37941752721594507, "grad_norm": 1.3283495427286103, "learning_rate": 7.804570221326517e-06, "loss": 0.4519, "step": 12878 }, { "epoch": 0.3794469896734087, "grad_norm": 1.6623985055919037, "learning_rate": 7.804144480864301e-06, "loss": 0.4962, "step": 12879 }, { "epoch": 0.3794764521308722, "grad_norm": 1.6505804507600927, "learning_rate": 7.803718710740698e-06, "loss": 0.5258, "step": 12880 }, { "epoch": 0.3795059145883358, "grad_norm": 1.5118363160536439, "learning_rate": 7.803292910960215e-06, "loss": 0.4521, "step": 12881 }, { "epoch": 0.37953537704579937, "grad_norm": 1.255443453670255, "learning_rate": 7.80286708152735e-06, "loss": 0.3153, "step": 12882 }, { "epoch": 0.379564839503263, "grad_norm": 1.3274652546679502, "learning_rate": 7.802441222446616e-06, "loss": 0.415, "step": 12883 }, { "epoch": 0.3795943019607265, "grad_norm": 1.8234594630982996, "learning_rate": 7.80201533372251e-06, "loss": 0.5479, "step": 12884 }, { "epoch": 0.3796237644181901, "grad_norm": 1.6463949293202222, "learning_rate": 7.80158941535954e-06, "loss": 0.3913, "step": 12885 }, { "epoch": 0.37965322687565367, "grad_norm": 1.4739745878525905, "learning_rate": 7.801163467362209e-06, "loss": 0.4769, "step": 12886 }, { "epoch": 0.3796826893331173, "grad_norm": 1.366526182024559, "learning_rate": 7.800737489735027e-06, "loss": 0.4289, "step": 12887 }, { "epoch": 0.3797121517905809, "grad_norm": 1.686345926870935, "learning_rate": 7.800311482482498e-06, "loss": 0.609, "step": 12888 }, { "epoch": 0.3797416142480444, "grad_norm": 1.6952124835060476, "learning_rate": 7.799885445609126e-06, "loss": 0.5154, "step": 12889 }, { "epoch": 0.379771076705508, "grad_norm": 1.6245039306786733, "learning_rate": 7.799459379119421e-06, "loss": 0.4456, "step": 12890 }, { "epoch": 0.3798005391629716, "grad_norm": 1.4749284139679482, "learning_rate": 7.799033283017887e-06, "loss": 0.4636, "step": 12891 }, { "epoch": 0.3798300016204352, "grad_norm": 1.4966219000302845, "learning_rate": 7.79860715730903e-06, "loss": 0.3359, "step": 12892 }, { "epoch": 0.3798594640778987, "grad_norm": 1.614364191508527, "learning_rate": 7.79818100199736e-06, "loss": 0.372, "step": 12893 }, { "epoch": 0.3798889265353623, "grad_norm": 1.4368109477709308, "learning_rate": 7.797754817087386e-06, "loss": 0.4962, "step": 12894 }, { "epoch": 0.3799183889928259, "grad_norm": 1.586808050262748, "learning_rate": 7.797328602583613e-06, "loss": 0.4996, "step": 12895 }, { "epoch": 0.3799478514502895, "grad_norm": 1.4127787806695031, "learning_rate": 7.79690235849055e-06, "loss": 0.4516, "step": 12896 }, { "epoch": 0.379977313907753, "grad_norm": 1.667571138603668, "learning_rate": 7.79647608481271e-06, "loss": 0.6482, "step": 12897 }, { "epoch": 0.3800067763652166, "grad_norm": 1.8766084062797328, "learning_rate": 7.796049781554594e-06, "loss": 0.5575, "step": 12898 }, { "epoch": 0.3800362388226802, "grad_norm": 1.6197909214045503, "learning_rate": 7.795623448720717e-06, "loss": 0.5302, "step": 12899 }, { "epoch": 0.3800657012801438, "grad_norm": 1.5202384524639123, "learning_rate": 7.795197086315586e-06, "loss": 0.5311, "step": 12900 }, { "epoch": 0.3800951637376074, "grad_norm": 1.2952503605679138, "learning_rate": 7.794770694343713e-06, "loss": 0.3555, "step": 12901 }, { "epoch": 0.3801246261950709, "grad_norm": 1.4430916480984426, "learning_rate": 7.794344272809608e-06, "loss": 0.3414, "step": 12902 }, { "epoch": 0.3801540886525345, "grad_norm": 1.2502141983091728, "learning_rate": 7.79391782171778e-06, "loss": 0.3686, "step": 12903 }, { "epoch": 0.3801835511099981, "grad_norm": 1.4421155843754363, "learning_rate": 7.79349134107274e-06, "loss": 0.4919, "step": 12904 }, { "epoch": 0.3802130135674617, "grad_norm": 1.4978079320375526, "learning_rate": 7.793064830879002e-06, "loss": 0.4862, "step": 12905 }, { "epoch": 0.3802424760249252, "grad_norm": 1.6657512027037387, "learning_rate": 7.792638291141075e-06, "loss": 0.5581, "step": 12906 }, { "epoch": 0.3802719384823888, "grad_norm": 1.4801542328175592, "learning_rate": 7.79221172186347e-06, "loss": 0.4628, "step": 12907 }, { "epoch": 0.3803014009398524, "grad_norm": 1.5585187330198829, "learning_rate": 7.791785123050702e-06, "loss": 0.4288, "step": 12908 }, { "epoch": 0.380330863397316, "grad_norm": 1.4706119522827208, "learning_rate": 7.79135849470728e-06, "loss": 0.3602, "step": 12909 }, { "epoch": 0.3803603258547795, "grad_norm": 1.903955303397176, "learning_rate": 7.790931836837718e-06, "loss": 0.4014, "step": 12910 }, { "epoch": 0.3803897883122431, "grad_norm": 1.5752060970281208, "learning_rate": 7.790505149446532e-06, "loss": 0.5061, "step": 12911 }, { "epoch": 0.3804192507697067, "grad_norm": 1.618484650968686, "learning_rate": 7.790078432538232e-06, "loss": 0.4455, "step": 12912 }, { "epoch": 0.3804487132271703, "grad_norm": 1.425728838451544, "learning_rate": 7.789651686117331e-06, "loss": 0.4227, "step": 12913 }, { "epoch": 0.3804781756846339, "grad_norm": 1.4647649527536049, "learning_rate": 7.789224910188348e-06, "loss": 0.4993, "step": 12914 }, { "epoch": 0.3805076381420974, "grad_norm": 1.4548760924977018, "learning_rate": 7.788798104755792e-06, "loss": 0.4429, "step": 12915 }, { "epoch": 0.38053710059956103, "grad_norm": 1.4842183878781585, "learning_rate": 7.788371269824179e-06, "loss": 0.4989, "step": 12916 }, { "epoch": 0.3805665630570246, "grad_norm": 1.8006810084453553, "learning_rate": 7.787944405398025e-06, "loss": 0.4953, "step": 12917 }, { "epoch": 0.3805960255144882, "grad_norm": 1.4276185138819228, "learning_rate": 7.787517511481844e-06, "loss": 0.3896, "step": 12918 }, { "epoch": 0.3806254879719517, "grad_norm": 1.5904857769719631, "learning_rate": 7.787090588080155e-06, "loss": 0.4262, "step": 12919 }, { "epoch": 0.38065495042941533, "grad_norm": 1.8774257034548438, "learning_rate": 7.78666363519747e-06, "loss": 0.5009, "step": 12920 }, { "epoch": 0.3806844128868789, "grad_norm": 1.396880997999131, "learning_rate": 7.786236652838303e-06, "loss": 0.4335, "step": 12921 }, { "epoch": 0.3807138753443425, "grad_norm": 1.5093820815159649, "learning_rate": 7.785809641007178e-06, "loss": 0.3869, "step": 12922 }, { "epoch": 0.380743337801806, "grad_norm": 1.2754444923157362, "learning_rate": 7.785382599708606e-06, "loss": 0.4205, "step": 12923 }, { "epoch": 0.38077280025926963, "grad_norm": 1.6141045682770996, "learning_rate": 7.784955528947106e-06, "loss": 0.5493, "step": 12924 }, { "epoch": 0.3808022627167332, "grad_norm": 1.5915573438912012, "learning_rate": 7.784528428727193e-06, "loss": 0.4679, "step": 12925 }, { "epoch": 0.3808317251741968, "grad_norm": 1.336851159731204, "learning_rate": 7.78410129905339e-06, "loss": 0.4021, "step": 12926 }, { "epoch": 0.3808611876316604, "grad_norm": 1.6291969133732436, "learning_rate": 7.78367413993021e-06, "loss": 0.4267, "step": 12927 }, { "epoch": 0.3808906500891239, "grad_norm": 1.6721148092325284, "learning_rate": 7.783246951362176e-06, "loss": 0.4524, "step": 12928 }, { "epoch": 0.38092011254658753, "grad_norm": 1.5060324185100589, "learning_rate": 7.782819733353802e-06, "loss": 0.506, "step": 12929 }, { "epoch": 0.3809495750040511, "grad_norm": 1.5647801173139226, "learning_rate": 7.782392485909608e-06, "loss": 0.3586, "step": 12930 }, { "epoch": 0.3809790374615147, "grad_norm": 1.623120008830826, "learning_rate": 7.781965209034115e-06, "loss": 0.5818, "step": 12931 }, { "epoch": 0.3810084999189782, "grad_norm": 1.4369581485977798, "learning_rate": 7.781537902731843e-06, "loss": 0.4765, "step": 12932 }, { "epoch": 0.38103796237644183, "grad_norm": 1.5282438553949156, "learning_rate": 7.781110567007309e-06, "loss": 0.4537, "step": 12933 }, { "epoch": 0.3810674248339054, "grad_norm": 1.4031224178598023, "learning_rate": 7.780683201865038e-06, "loss": 0.4633, "step": 12934 }, { "epoch": 0.381096887291369, "grad_norm": 1.3865961118755643, "learning_rate": 7.780255807309544e-06, "loss": 0.5065, "step": 12935 }, { "epoch": 0.3811263497488325, "grad_norm": 1.6911541290477832, "learning_rate": 7.779828383345353e-06, "loss": 0.5786, "step": 12936 }, { "epoch": 0.38115581220629613, "grad_norm": 1.5133082371660023, "learning_rate": 7.779400929976984e-06, "loss": 0.5228, "step": 12937 }, { "epoch": 0.3811852746637597, "grad_norm": 1.3391036848954403, "learning_rate": 7.77897344720896e-06, "loss": 0.4061, "step": 12938 }, { "epoch": 0.3812147371212233, "grad_norm": 1.436531719342231, "learning_rate": 7.7785459350458e-06, "loss": 0.4784, "step": 12939 }, { "epoch": 0.3812441995786869, "grad_norm": 1.6069081814727293, "learning_rate": 7.778118393492029e-06, "loss": 0.3424, "step": 12940 }, { "epoch": 0.38127366203615043, "grad_norm": 1.6706930993006914, "learning_rate": 7.777690822552168e-06, "loss": 0.5354, "step": 12941 }, { "epoch": 0.38130312449361403, "grad_norm": 1.4552968602931802, "learning_rate": 7.77726322223074e-06, "loss": 0.5725, "step": 12942 }, { "epoch": 0.3813325869510776, "grad_norm": 1.3409607239320822, "learning_rate": 7.77683559253227e-06, "loss": 0.332, "step": 12943 }, { "epoch": 0.3813620494085412, "grad_norm": 1.608809779650409, "learning_rate": 7.776407933461277e-06, "loss": 0.4421, "step": 12944 }, { "epoch": 0.38139151186600473, "grad_norm": 1.3901582117341718, "learning_rate": 7.775980245022288e-06, "loss": 0.4393, "step": 12945 }, { "epoch": 0.38142097432346833, "grad_norm": 1.586778635942932, "learning_rate": 7.775552527219825e-06, "loss": 0.557, "step": 12946 }, { "epoch": 0.3814504367809319, "grad_norm": 1.5252621053439026, "learning_rate": 7.775124780058415e-06, "loss": 0.5133, "step": 12947 }, { "epoch": 0.3814798992383955, "grad_norm": 1.7141974049906543, "learning_rate": 7.77469700354258e-06, "loss": 0.6072, "step": 12948 }, { "epoch": 0.38150936169585903, "grad_norm": 1.481588529926279, "learning_rate": 7.774269197676844e-06, "loss": 0.3947, "step": 12949 }, { "epoch": 0.38153882415332263, "grad_norm": 1.6386324350118684, "learning_rate": 7.773841362465738e-06, "loss": 0.4589, "step": 12950 }, { "epoch": 0.3815682866107862, "grad_norm": 1.6137289305328733, "learning_rate": 7.77341349791378e-06, "loss": 0.5713, "step": 12951 }, { "epoch": 0.3815977490682498, "grad_norm": 1.3464303295803026, "learning_rate": 7.772985604025499e-06, "loss": 0.4197, "step": 12952 }, { "epoch": 0.3816272115257134, "grad_norm": 1.3972921009684365, "learning_rate": 7.772557680805423e-06, "loss": 0.4589, "step": 12953 }, { "epoch": 0.38165667398317693, "grad_norm": 1.293159582618778, "learning_rate": 7.772129728258075e-06, "loss": 0.3469, "step": 12954 }, { "epoch": 0.38168613644064053, "grad_norm": 1.5998543491745372, "learning_rate": 7.771701746387986e-06, "loss": 0.44, "step": 12955 }, { "epoch": 0.3817155988981041, "grad_norm": 1.6169554065149598, "learning_rate": 7.771273735199681e-06, "loss": 0.5393, "step": 12956 }, { "epoch": 0.3817450613555677, "grad_norm": 1.4757741503914943, "learning_rate": 7.770845694697683e-06, "loss": 0.5724, "step": 12957 }, { "epoch": 0.38177452381303123, "grad_norm": 1.6969802691257143, "learning_rate": 7.770417624886526e-06, "loss": 0.5223, "step": 12958 }, { "epoch": 0.38180398627049483, "grad_norm": 1.3840939988322574, "learning_rate": 7.769989525770738e-06, "loss": 0.4694, "step": 12959 }, { "epoch": 0.3818334487279584, "grad_norm": 1.514325402270834, "learning_rate": 7.76956139735484e-06, "loss": 0.6163, "step": 12960 }, { "epoch": 0.381862911185422, "grad_norm": 1.3961146881922124, "learning_rate": 7.76913323964337e-06, "loss": 0.4742, "step": 12961 }, { "epoch": 0.38189237364288553, "grad_norm": 1.5389438992985631, "learning_rate": 7.76870505264085e-06, "loss": 0.4287, "step": 12962 }, { "epoch": 0.38192183610034913, "grad_norm": 1.625780093399609, "learning_rate": 7.768276836351813e-06, "loss": 0.4407, "step": 12963 }, { "epoch": 0.3819512985578127, "grad_norm": 1.3399907010924794, "learning_rate": 7.767848590780788e-06, "loss": 0.4394, "step": 12964 }, { "epoch": 0.3819807610152763, "grad_norm": 1.3109488296919083, "learning_rate": 7.767420315932304e-06, "loss": 0.3427, "step": 12965 }, { "epoch": 0.3820102234727399, "grad_norm": 1.4904344099982567, "learning_rate": 7.766992011810889e-06, "loss": 0.3369, "step": 12966 }, { "epoch": 0.38203968593020343, "grad_norm": 1.742494792481556, "learning_rate": 7.766563678421078e-06, "loss": 0.6795, "step": 12967 }, { "epoch": 0.38206914838766703, "grad_norm": 1.4751609939516899, "learning_rate": 7.766135315767399e-06, "loss": 0.4877, "step": 12968 }, { "epoch": 0.3820986108451306, "grad_norm": 1.713511178271405, "learning_rate": 7.765706923854382e-06, "loss": 0.6183, "step": 12969 }, { "epoch": 0.3821280733025942, "grad_norm": 1.5860530933257635, "learning_rate": 7.765278502686561e-06, "loss": 0.3552, "step": 12970 }, { "epoch": 0.38215753576005773, "grad_norm": 1.4139810430432311, "learning_rate": 7.764850052268468e-06, "loss": 0.418, "step": 12971 }, { "epoch": 0.38218699821752133, "grad_norm": 1.4421994021972955, "learning_rate": 7.764421572604632e-06, "loss": 0.4545, "step": 12972 }, { "epoch": 0.3822164606749849, "grad_norm": 1.3349892797037939, "learning_rate": 7.763993063699588e-06, "loss": 0.3764, "step": 12973 }, { "epoch": 0.3822459231324485, "grad_norm": 1.47442735210757, "learning_rate": 7.763564525557868e-06, "loss": 0.4868, "step": 12974 }, { "epoch": 0.38227538558991203, "grad_norm": 1.4643055742528868, "learning_rate": 7.763135958184005e-06, "loss": 0.4204, "step": 12975 }, { "epoch": 0.38230484804737563, "grad_norm": 1.6940566786553362, "learning_rate": 7.762707361582531e-06, "loss": 0.4153, "step": 12976 }, { "epoch": 0.3823343105048392, "grad_norm": 1.6345379626987682, "learning_rate": 7.76227873575798e-06, "loss": 0.4721, "step": 12977 }, { "epoch": 0.3823637729623028, "grad_norm": 1.5821143260357757, "learning_rate": 7.761850080714889e-06, "loss": 0.4792, "step": 12978 }, { "epoch": 0.3823932354197664, "grad_norm": 1.650875759937649, "learning_rate": 7.761421396457787e-06, "loss": 0.6048, "step": 12979 }, { "epoch": 0.38242269787722993, "grad_norm": 1.58141672127299, "learning_rate": 7.760992682991211e-06, "loss": 0.5236, "step": 12980 }, { "epoch": 0.38245216033469354, "grad_norm": 1.58543160877595, "learning_rate": 7.760563940319696e-06, "loss": 0.4608, "step": 12981 }, { "epoch": 0.3824816227921571, "grad_norm": 1.70427285226247, "learning_rate": 7.760135168447778e-06, "loss": 0.5475, "step": 12982 }, { "epoch": 0.3825110852496207, "grad_norm": 1.4765771071723828, "learning_rate": 7.759706367379991e-06, "loss": 0.4511, "step": 12983 }, { "epoch": 0.38254054770708423, "grad_norm": 1.7401183251476926, "learning_rate": 7.75927753712087e-06, "loss": 0.6068, "step": 12984 }, { "epoch": 0.38257001016454784, "grad_norm": 1.4836798365477415, "learning_rate": 7.758848677674954e-06, "loss": 0.4969, "step": 12985 }, { "epoch": 0.3825994726220114, "grad_norm": 1.5124077249738967, "learning_rate": 7.758419789046776e-06, "loss": 0.4195, "step": 12986 }, { "epoch": 0.382628935079475, "grad_norm": 1.4640471211523116, "learning_rate": 7.757990871240875e-06, "loss": 0.5028, "step": 12987 }, { "epoch": 0.38265839753693853, "grad_norm": 1.5167892117794917, "learning_rate": 7.757561924261788e-06, "loss": 0.3258, "step": 12988 }, { "epoch": 0.38268785999440214, "grad_norm": 1.5916728277937322, "learning_rate": 7.75713294811405e-06, "loss": 0.4669, "step": 12989 }, { "epoch": 0.3827173224518657, "grad_norm": 1.6895572742823997, "learning_rate": 7.7567039428022e-06, "loss": 0.4918, "step": 12990 }, { "epoch": 0.3827467849093293, "grad_norm": 1.6217588932235227, "learning_rate": 7.756274908330775e-06, "loss": 0.4795, "step": 12991 }, { "epoch": 0.3827762473667929, "grad_norm": 1.5978851431811574, "learning_rate": 7.755845844704316e-06, "loss": 0.5281, "step": 12992 }, { "epoch": 0.38280570982425643, "grad_norm": 1.5199029863024411, "learning_rate": 7.755416751927359e-06, "loss": 0.559, "step": 12993 }, { "epoch": 0.38283517228172004, "grad_norm": 1.6526702284989274, "learning_rate": 7.754987630004443e-06, "loss": 0.5094, "step": 12994 }, { "epoch": 0.3828646347391836, "grad_norm": 1.6875488659618423, "learning_rate": 7.754558478940106e-06, "loss": 0.5126, "step": 12995 }, { "epoch": 0.3828940971966472, "grad_norm": 1.4571280659262962, "learning_rate": 7.75412929873889e-06, "loss": 0.4583, "step": 12996 }, { "epoch": 0.38292355965411073, "grad_norm": 1.515811818953983, "learning_rate": 7.753700089405334e-06, "loss": 0.3815, "step": 12997 }, { "epoch": 0.38295302211157434, "grad_norm": 1.6336708075862165, "learning_rate": 7.753270850943977e-06, "loss": 0.4516, "step": 12998 }, { "epoch": 0.3829824845690379, "grad_norm": 1.665077651537542, "learning_rate": 7.75284158335936e-06, "loss": 0.4507, "step": 12999 }, { "epoch": 0.3830119470265015, "grad_norm": 1.4028315612157032, "learning_rate": 7.752412286656026e-06, "loss": 0.3732, "step": 13000 }, { "epoch": 0.38304140948396503, "grad_norm": 1.4938580732426383, "learning_rate": 7.75198296083851e-06, "loss": 0.5169, "step": 13001 }, { "epoch": 0.38307087194142864, "grad_norm": 1.3734259558085267, "learning_rate": 7.75155360591136e-06, "loss": 0.3462, "step": 13002 }, { "epoch": 0.3831003343988922, "grad_norm": 1.5140160398740736, "learning_rate": 7.751124221879113e-06, "loss": 0.5113, "step": 13003 }, { "epoch": 0.3831297968563558, "grad_norm": 1.7840305705017072, "learning_rate": 7.750694808746313e-06, "loss": 0.5805, "step": 13004 }, { "epoch": 0.3831592593138194, "grad_norm": 1.3786228640936093, "learning_rate": 7.7502653665175e-06, "loss": 0.4415, "step": 13005 }, { "epoch": 0.38318872177128294, "grad_norm": 1.6039627542844985, "learning_rate": 7.74983589519722e-06, "loss": 0.309, "step": 13006 }, { "epoch": 0.38321818422874654, "grad_norm": 1.510834600211905, "learning_rate": 7.749406394790014e-06, "loss": 0.493, "step": 13007 }, { "epoch": 0.3832476466862101, "grad_norm": 1.6204989460107002, "learning_rate": 7.748976865300423e-06, "loss": 0.5782, "step": 13008 }, { "epoch": 0.3832771091436737, "grad_norm": 1.4341708926484091, "learning_rate": 7.748547306732996e-06, "loss": 0.5254, "step": 13009 }, { "epoch": 0.38330657160113724, "grad_norm": 1.6229810358793078, "learning_rate": 7.748117719092269e-06, "loss": 0.5199, "step": 13010 }, { "epoch": 0.38333603405860084, "grad_norm": 1.3689075585180681, "learning_rate": 7.747688102382793e-06, "loss": 0.4845, "step": 13011 }, { "epoch": 0.3833654965160644, "grad_norm": 1.3442024057600956, "learning_rate": 7.74725845660911e-06, "loss": 0.4245, "step": 13012 }, { "epoch": 0.383394958973528, "grad_norm": 1.531300374593484, "learning_rate": 7.746828781775764e-06, "loss": 0.553, "step": 13013 }, { "epoch": 0.38342442143099154, "grad_norm": 1.4706030694180343, "learning_rate": 7.746399077887298e-06, "loss": 0.4786, "step": 13014 }, { "epoch": 0.38345388388845514, "grad_norm": 1.452666609355542, "learning_rate": 7.745969344948262e-06, "loss": 0.5043, "step": 13015 }, { "epoch": 0.38348334634591874, "grad_norm": 1.401885377738155, "learning_rate": 7.745539582963198e-06, "loss": 0.3771, "step": 13016 }, { "epoch": 0.3835128088033823, "grad_norm": 1.5677476672837987, "learning_rate": 7.745109791936653e-06, "loss": 0.4696, "step": 13017 }, { "epoch": 0.3835422712608459, "grad_norm": 1.507842295868495, "learning_rate": 7.744679971873174e-06, "loss": 0.5131, "step": 13018 }, { "epoch": 0.38357173371830944, "grad_norm": 1.7319607971792752, "learning_rate": 7.744250122777307e-06, "loss": 0.3217, "step": 13019 }, { "epoch": 0.38360119617577304, "grad_norm": 1.6354163918285196, "learning_rate": 7.743820244653596e-06, "loss": 0.4327, "step": 13020 }, { "epoch": 0.3836306586332366, "grad_norm": 1.596456266937491, "learning_rate": 7.743390337506594e-06, "loss": 0.5396, "step": 13021 }, { "epoch": 0.3836601210907002, "grad_norm": 1.4860423428142175, "learning_rate": 7.742960401340844e-06, "loss": 0.5342, "step": 13022 }, { "epoch": 0.38368958354816374, "grad_norm": 1.6699305321972069, "learning_rate": 7.742530436160895e-06, "loss": 0.5131, "step": 13023 }, { "epoch": 0.38371904600562734, "grad_norm": 1.5506536534599067, "learning_rate": 7.742100441971294e-06, "loss": 0.4745, "step": 13024 }, { "epoch": 0.3837485084630909, "grad_norm": 1.5842310208037396, "learning_rate": 7.74167041877659e-06, "loss": 0.4956, "step": 13025 }, { "epoch": 0.3837779709205545, "grad_norm": 1.614381176584608, "learning_rate": 7.741240366581332e-06, "loss": 0.465, "step": 13026 }, { "epoch": 0.38380743337801804, "grad_norm": 1.6404907221118739, "learning_rate": 7.740810285390068e-06, "loss": 0.5, "step": 13027 }, { "epoch": 0.38383689583548164, "grad_norm": 1.5815831455735072, "learning_rate": 7.74038017520735e-06, "loss": 0.3964, "step": 13028 }, { "epoch": 0.38386635829294524, "grad_norm": 1.689143412960064, "learning_rate": 7.739950036037725e-06, "loss": 0.4882, "step": 13029 }, { "epoch": 0.3838958207504088, "grad_norm": 1.3669597730836678, "learning_rate": 7.739519867885744e-06, "loss": 0.3519, "step": 13030 }, { "epoch": 0.3839252832078724, "grad_norm": 1.4426230686920505, "learning_rate": 7.739089670755955e-06, "loss": 0.4658, "step": 13031 }, { "epoch": 0.38395474566533594, "grad_norm": 1.9672562934948468, "learning_rate": 7.738659444652912e-06, "loss": 0.4533, "step": 13032 }, { "epoch": 0.38398420812279954, "grad_norm": 1.682859530629484, "learning_rate": 7.738229189581163e-06, "loss": 0.4979, "step": 13033 }, { "epoch": 0.3840136705802631, "grad_norm": 1.606900446946199, "learning_rate": 7.737798905545262e-06, "loss": 0.481, "step": 13034 }, { "epoch": 0.3840431330377267, "grad_norm": 1.608890138080788, "learning_rate": 7.737368592549756e-06, "loss": 0.5338, "step": 13035 }, { "epoch": 0.38407259549519024, "grad_norm": 1.704797519487098, "learning_rate": 7.736938250599201e-06, "loss": 0.4817, "step": 13036 }, { "epoch": 0.38410205795265384, "grad_norm": 1.5753580272128607, "learning_rate": 7.736507879698145e-06, "loss": 0.5292, "step": 13037 }, { "epoch": 0.3841315204101174, "grad_norm": 1.4947261838658972, "learning_rate": 7.736077479851147e-06, "loss": 0.458, "step": 13038 }, { "epoch": 0.384160982867581, "grad_norm": 1.4204398002186673, "learning_rate": 7.73564705106275e-06, "loss": 0.3923, "step": 13039 }, { "epoch": 0.38419044532504454, "grad_norm": 1.523263646452828, "learning_rate": 7.735216593337515e-06, "loss": 0.4643, "step": 13040 }, { "epoch": 0.38421990778250814, "grad_norm": 1.595774601468375, "learning_rate": 7.734786106679994e-06, "loss": 0.5081, "step": 13041 }, { "epoch": 0.38424937023997174, "grad_norm": 1.695019738738014, "learning_rate": 7.734355591094736e-06, "loss": 0.4883, "step": 13042 }, { "epoch": 0.3842788326974353, "grad_norm": 1.5028444540962211, "learning_rate": 7.7339250465863e-06, "loss": 0.4402, "step": 13043 }, { "epoch": 0.3843082951548989, "grad_norm": 1.6672254560132913, "learning_rate": 7.733494473159238e-06, "loss": 0.4976, "step": 13044 }, { "epoch": 0.38433775761236244, "grad_norm": 1.3336497608524536, "learning_rate": 7.733063870818103e-06, "loss": 0.3586, "step": 13045 }, { "epoch": 0.38436722006982604, "grad_norm": 1.5222235346462245, "learning_rate": 7.732633239567451e-06, "loss": 0.4875, "step": 13046 }, { "epoch": 0.3843966825272896, "grad_norm": 1.5603534204657792, "learning_rate": 7.732202579411838e-06, "loss": 0.3938, "step": 13047 }, { "epoch": 0.3844261449847532, "grad_norm": 1.3587135368305276, "learning_rate": 7.73177189035582e-06, "loss": 0.4192, "step": 13048 }, { "epoch": 0.38445560744221674, "grad_norm": 1.4216087932461445, "learning_rate": 7.731341172403952e-06, "loss": 0.3852, "step": 13049 }, { "epoch": 0.38448506989968034, "grad_norm": 1.7602140095343848, "learning_rate": 7.730910425560787e-06, "loss": 0.4839, "step": 13050 }, { "epoch": 0.3845145323571439, "grad_norm": 1.5979228752239276, "learning_rate": 7.730479649830884e-06, "loss": 0.5178, "step": 13051 }, { "epoch": 0.3845439948146075, "grad_norm": 1.4108687245695775, "learning_rate": 7.730048845218801e-06, "loss": 0.4693, "step": 13052 }, { "epoch": 0.38457345727207104, "grad_norm": 1.452249068935448, "learning_rate": 7.729618011729094e-06, "loss": 0.444, "step": 13053 }, { "epoch": 0.38460291972953464, "grad_norm": 1.7875546442271877, "learning_rate": 7.729187149366317e-06, "loss": 0.411, "step": 13054 }, { "epoch": 0.38463238218699825, "grad_norm": 1.5648109621531714, "learning_rate": 7.728756258135031e-06, "loss": 0.3935, "step": 13055 }, { "epoch": 0.3846618446444618, "grad_norm": 1.613285909001651, "learning_rate": 7.728325338039794e-06, "loss": 0.5493, "step": 13056 }, { "epoch": 0.3846913071019254, "grad_norm": 1.5833808164258794, "learning_rate": 7.727894389085162e-06, "loss": 0.4279, "step": 13057 }, { "epoch": 0.38472076955938894, "grad_norm": 1.5500618258028982, "learning_rate": 7.727463411275694e-06, "loss": 0.5813, "step": 13058 }, { "epoch": 0.38475023201685254, "grad_norm": 1.4239402357507185, "learning_rate": 7.72703240461595e-06, "loss": 0.4929, "step": 13059 }, { "epoch": 0.3847796944743161, "grad_norm": 1.5279177142401656, "learning_rate": 7.72660136911049e-06, "loss": 0.5712, "step": 13060 }, { "epoch": 0.3848091569317797, "grad_norm": 1.644922250404672, "learning_rate": 7.72617030476387e-06, "loss": 0.4073, "step": 13061 }, { "epoch": 0.38483861938924324, "grad_norm": 1.918890187680228, "learning_rate": 7.72573921158065e-06, "loss": 0.5593, "step": 13062 }, { "epoch": 0.38486808184670684, "grad_norm": 1.5470988616991128, "learning_rate": 7.725308089565393e-06, "loss": 0.4729, "step": 13063 }, { "epoch": 0.3848975443041704, "grad_norm": 1.5260101133576598, "learning_rate": 7.724876938722657e-06, "loss": 0.5411, "step": 13064 }, { "epoch": 0.384927006761634, "grad_norm": 1.5169786647768884, "learning_rate": 7.724445759057005e-06, "loss": 0.4057, "step": 13065 }, { "epoch": 0.38495646921909754, "grad_norm": 1.505003190194559, "learning_rate": 7.724014550572994e-06, "loss": 0.4008, "step": 13066 }, { "epoch": 0.38498593167656114, "grad_norm": 1.4473904404983304, "learning_rate": 7.723583313275187e-06, "loss": 0.4827, "step": 13067 }, { "epoch": 0.38501539413402475, "grad_norm": 1.5642262376263485, "learning_rate": 7.723152047168147e-06, "loss": 0.521, "step": 13068 }, { "epoch": 0.3850448565914883, "grad_norm": 1.558711759452909, "learning_rate": 7.722720752256435e-06, "loss": 0.589, "step": 13069 }, { "epoch": 0.3850743190489519, "grad_norm": 1.6572031584121965, "learning_rate": 7.72228942854461e-06, "loss": 0.5067, "step": 13070 }, { "epoch": 0.38510378150641544, "grad_norm": 1.6366968219614069, "learning_rate": 7.721858076037238e-06, "loss": 0.5923, "step": 13071 }, { "epoch": 0.38513324396387905, "grad_norm": 1.768396730112726, "learning_rate": 7.721426694738883e-06, "loss": 0.5601, "step": 13072 }, { "epoch": 0.3851627064213426, "grad_norm": 1.7760358935620657, "learning_rate": 7.720995284654105e-06, "loss": 0.5167, "step": 13073 }, { "epoch": 0.3851921688788062, "grad_norm": 1.465546121717862, "learning_rate": 7.720563845787467e-06, "loss": 0.4217, "step": 13074 }, { "epoch": 0.38522163133626974, "grad_norm": 1.439144169281436, "learning_rate": 7.720132378143534e-06, "loss": 0.507, "step": 13075 }, { "epoch": 0.38525109379373335, "grad_norm": 1.810215927454077, "learning_rate": 7.71970088172687e-06, "loss": 0.3256, "step": 13076 }, { "epoch": 0.3852805562511969, "grad_norm": 1.4685570026861312, "learning_rate": 7.719269356542039e-06, "loss": 0.4159, "step": 13077 }, { "epoch": 0.3853100187086605, "grad_norm": 1.6264027933741436, "learning_rate": 7.718837802593607e-06, "loss": 0.427, "step": 13078 }, { "epoch": 0.38533948116612404, "grad_norm": 1.5606147939409294, "learning_rate": 7.718406219886135e-06, "loss": 0.4672, "step": 13079 }, { "epoch": 0.38536894362358765, "grad_norm": 1.7203777305588557, "learning_rate": 7.71797460842419e-06, "loss": 0.5465, "step": 13080 }, { "epoch": 0.38539840608105125, "grad_norm": 1.698849709739826, "learning_rate": 7.717542968212339e-06, "loss": 0.433, "step": 13081 }, { "epoch": 0.3854278685385148, "grad_norm": 1.5098738516497976, "learning_rate": 7.717111299255147e-06, "loss": 0.3805, "step": 13082 }, { "epoch": 0.3854573309959784, "grad_norm": 1.617427564324652, "learning_rate": 7.71667960155718e-06, "loss": 0.4472, "step": 13083 }, { "epoch": 0.38548679345344194, "grad_norm": 1.618052124017731, "learning_rate": 7.716247875123004e-06, "loss": 0.5758, "step": 13084 }, { "epoch": 0.38551625591090555, "grad_norm": 1.4667015698810169, "learning_rate": 7.715816119957184e-06, "loss": 0.4269, "step": 13085 }, { "epoch": 0.3855457183683691, "grad_norm": 1.7106677985238359, "learning_rate": 7.715384336064291e-06, "loss": 0.4774, "step": 13086 }, { "epoch": 0.3855751808258327, "grad_norm": 1.5423786716262347, "learning_rate": 7.714952523448889e-06, "loss": 0.584, "step": 13087 }, { "epoch": 0.38560464328329624, "grad_norm": 1.603336285126479, "learning_rate": 7.714520682115548e-06, "loss": 0.4019, "step": 13088 }, { "epoch": 0.38563410574075985, "grad_norm": 1.4649986697248554, "learning_rate": 7.714088812068832e-06, "loss": 0.4058, "step": 13089 }, { "epoch": 0.3856635681982234, "grad_norm": 1.8578113992558218, "learning_rate": 7.713656913313313e-06, "loss": 0.4885, "step": 13090 }, { "epoch": 0.385693030655687, "grad_norm": 1.3234940372240114, "learning_rate": 7.713224985853558e-06, "loss": 0.4202, "step": 13091 }, { "epoch": 0.38572249311315054, "grad_norm": 1.4447892654552499, "learning_rate": 7.712793029694135e-06, "loss": 0.4917, "step": 13092 }, { "epoch": 0.38575195557061415, "grad_norm": 1.442926759859189, "learning_rate": 7.712361044839616e-06, "loss": 0.407, "step": 13093 }, { "epoch": 0.38578141802807775, "grad_norm": 1.4940507697048022, "learning_rate": 7.711929031294568e-06, "loss": 0.3952, "step": 13094 }, { "epoch": 0.3858108804855413, "grad_norm": 1.4155527567695931, "learning_rate": 7.71149698906356e-06, "loss": 0.4867, "step": 13095 }, { "epoch": 0.3858403429430049, "grad_norm": 1.5926987435318705, "learning_rate": 7.711064918151165e-06, "loss": 0.5735, "step": 13096 }, { "epoch": 0.38586980540046845, "grad_norm": 1.5588720587995941, "learning_rate": 7.71063281856195e-06, "loss": 0.3663, "step": 13097 }, { "epoch": 0.38589926785793205, "grad_norm": 1.4667328773520973, "learning_rate": 7.71020069030049e-06, "loss": 0.3655, "step": 13098 }, { "epoch": 0.3859287303153956, "grad_norm": 1.5783566148402037, "learning_rate": 7.709768533371349e-06, "loss": 0.5492, "step": 13099 }, { "epoch": 0.3859581927728592, "grad_norm": 1.4499862448696492, "learning_rate": 7.709336347779103e-06, "loss": 0.4005, "step": 13100 }, { "epoch": 0.38598765523032275, "grad_norm": 1.303742904384396, "learning_rate": 7.708904133528326e-06, "loss": 0.3596, "step": 13101 }, { "epoch": 0.38601711768778635, "grad_norm": 1.563871617409728, "learning_rate": 7.708471890623583e-06, "loss": 0.4221, "step": 13102 }, { "epoch": 0.3860465801452499, "grad_norm": 1.6285944892606787, "learning_rate": 7.708039619069452e-06, "loss": 0.6112, "step": 13103 }, { "epoch": 0.3860760426027135, "grad_norm": 1.477903417586847, "learning_rate": 7.707607318870503e-06, "loss": 0.3763, "step": 13104 }, { "epoch": 0.38610550506017705, "grad_norm": 1.5694744787208452, "learning_rate": 7.707174990031308e-06, "loss": 0.4788, "step": 13105 }, { "epoch": 0.38613496751764065, "grad_norm": 1.5198440740978703, "learning_rate": 7.706742632556441e-06, "loss": 0.5327, "step": 13106 }, { "epoch": 0.38616442997510425, "grad_norm": 1.3413076609428944, "learning_rate": 7.706310246450477e-06, "loss": 0.3676, "step": 13107 }, { "epoch": 0.3861938924325678, "grad_norm": 1.526303845843277, "learning_rate": 7.705877831717987e-06, "loss": 0.535, "step": 13108 }, { "epoch": 0.3862233548900314, "grad_norm": 1.5931353794614669, "learning_rate": 7.705445388363546e-06, "loss": 0.46, "step": 13109 }, { "epoch": 0.38625281734749495, "grad_norm": 1.6920211967865848, "learning_rate": 7.705012916391726e-06, "loss": 0.6321, "step": 13110 }, { "epoch": 0.38628227980495855, "grad_norm": 1.3978763834865267, "learning_rate": 7.704580415807106e-06, "loss": 0.3711, "step": 13111 }, { "epoch": 0.3863117422624221, "grad_norm": 1.327237611927947, "learning_rate": 7.70414788661426e-06, "loss": 0.4116, "step": 13112 }, { "epoch": 0.3863412047198857, "grad_norm": 1.6382702678972387, "learning_rate": 7.70371532881776e-06, "loss": 0.5547, "step": 13113 }, { "epoch": 0.38637066717734925, "grad_norm": 1.4846597825718308, "learning_rate": 7.703282742422183e-06, "loss": 0.4877, "step": 13114 }, { "epoch": 0.38640012963481285, "grad_norm": 1.488855684966499, "learning_rate": 7.702850127432104e-06, "loss": 0.4468, "step": 13115 }, { "epoch": 0.3864295920922764, "grad_norm": 1.5568920250112182, "learning_rate": 7.702417483852102e-06, "loss": 0.3947, "step": 13116 }, { "epoch": 0.38645905454974, "grad_norm": 1.5611016972921536, "learning_rate": 7.701984811686752e-06, "loss": 0.4497, "step": 13117 }, { "epoch": 0.38648851700720355, "grad_norm": 1.6142371459000164, "learning_rate": 7.70155211094063e-06, "loss": 0.4533, "step": 13118 }, { "epoch": 0.38651797946466715, "grad_norm": 1.5029729195136852, "learning_rate": 7.701119381618311e-06, "loss": 0.2993, "step": 13119 }, { "epoch": 0.38654744192213075, "grad_norm": 1.5471775011341464, "learning_rate": 7.700686623724373e-06, "loss": 0.39, "step": 13120 }, { "epoch": 0.3865769043795943, "grad_norm": 1.5019612401512101, "learning_rate": 7.700253837263398e-06, "loss": 0.4428, "step": 13121 }, { "epoch": 0.3866063668370579, "grad_norm": 1.473402776760039, "learning_rate": 7.69982102223996e-06, "loss": 0.4318, "step": 13122 }, { "epoch": 0.38663582929452145, "grad_norm": 1.686287967301422, "learning_rate": 7.69938817865864e-06, "loss": 0.4211, "step": 13123 }, { "epoch": 0.38666529175198505, "grad_norm": 1.6449841246389914, "learning_rate": 7.698955306524012e-06, "loss": 0.4977, "step": 13124 }, { "epoch": 0.3866947542094486, "grad_norm": 1.7128363568107297, "learning_rate": 7.698522405840659e-06, "loss": 0.5871, "step": 13125 }, { "epoch": 0.3867242166669122, "grad_norm": 1.7995991724130649, "learning_rate": 7.698089476613156e-06, "loss": 0.6327, "step": 13126 }, { "epoch": 0.38675367912437575, "grad_norm": 1.5050773198185239, "learning_rate": 7.697656518846087e-06, "loss": 0.4063, "step": 13127 }, { "epoch": 0.38678314158183935, "grad_norm": 1.4339499621650709, "learning_rate": 7.69722353254403e-06, "loss": 0.4958, "step": 13128 }, { "epoch": 0.3868126040393029, "grad_norm": 1.3911333994181512, "learning_rate": 7.69679051771156e-06, "loss": 0.4182, "step": 13129 }, { "epoch": 0.3868420664967665, "grad_norm": 1.5404728589434717, "learning_rate": 7.696357474353267e-06, "loss": 0.3446, "step": 13130 }, { "epoch": 0.38687152895423005, "grad_norm": 1.3274478073547302, "learning_rate": 7.695924402473725e-06, "loss": 0.3285, "step": 13131 }, { "epoch": 0.38690099141169365, "grad_norm": 1.4727513365405602, "learning_rate": 7.695491302077514e-06, "loss": 0.5186, "step": 13132 }, { "epoch": 0.38693045386915725, "grad_norm": 1.6275394177350129, "learning_rate": 7.69505817316922e-06, "loss": 0.6132, "step": 13133 }, { "epoch": 0.3869599163266208, "grad_norm": 1.39809861631611, "learning_rate": 7.694625015753421e-06, "loss": 0.401, "step": 13134 }, { "epoch": 0.3869893787840844, "grad_norm": 1.4569648361279717, "learning_rate": 7.6941918298347e-06, "loss": 0.3951, "step": 13135 }, { "epoch": 0.38701884124154795, "grad_norm": 1.6302920148714772, "learning_rate": 7.693758615417638e-06, "loss": 0.5385, "step": 13136 }, { "epoch": 0.38704830369901155, "grad_norm": 1.7257461183684786, "learning_rate": 7.69332537250682e-06, "loss": 0.5721, "step": 13137 }, { "epoch": 0.3870777661564751, "grad_norm": 1.5303157552936422, "learning_rate": 7.692892101106827e-06, "loss": 0.5015, "step": 13138 }, { "epoch": 0.3871072286139387, "grad_norm": 1.6098943273955266, "learning_rate": 7.69245880122224e-06, "loss": 0.5114, "step": 13139 }, { "epoch": 0.38713669107140225, "grad_norm": 1.528514641218599, "learning_rate": 7.692025472857646e-06, "loss": 0.5141, "step": 13140 }, { "epoch": 0.38716615352886585, "grad_norm": 1.4421985480974109, "learning_rate": 7.691592116017626e-06, "loss": 0.4345, "step": 13141 }, { "epoch": 0.3871956159863294, "grad_norm": 1.6042578057999028, "learning_rate": 7.691158730706765e-06, "loss": 0.4326, "step": 13142 }, { "epoch": 0.387225078443793, "grad_norm": 1.3557251301080049, "learning_rate": 7.690725316929648e-06, "loss": 0.3239, "step": 13143 }, { "epoch": 0.38725454090125655, "grad_norm": 1.4267936876470093, "learning_rate": 7.690291874690859e-06, "loss": 0.4189, "step": 13144 }, { "epoch": 0.38728400335872015, "grad_norm": 1.461609212558983, "learning_rate": 7.68985840399498e-06, "loss": 0.58, "step": 13145 }, { "epoch": 0.38731346581618376, "grad_norm": 1.5757210702728763, "learning_rate": 7.6894249048466e-06, "loss": 0.5312, "step": 13146 }, { "epoch": 0.3873429282736473, "grad_norm": 1.4938729637368735, "learning_rate": 7.688991377250303e-06, "loss": 0.5973, "step": 13147 }, { "epoch": 0.3873723907311109, "grad_norm": 1.639896951923709, "learning_rate": 7.688557821210675e-06, "loss": 0.4593, "step": 13148 }, { "epoch": 0.38740185318857445, "grad_norm": 1.354446223760598, "learning_rate": 7.688124236732301e-06, "loss": 0.4031, "step": 13149 }, { "epoch": 0.38743131564603805, "grad_norm": 1.5970735898522541, "learning_rate": 7.68769062381977e-06, "loss": 0.4877, "step": 13150 }, { "epoch": 0.3874607781035016, "grad_norm": 1.5712536073052579, "learning_rate": 7.687256982477664e-06, "loss": 0.5021, "step": 13151 }, { "epoch": 0.3874902405609652, "grad_norm": 1.9204419183340764, "learning_rate": 7.686823312710575e-06, "loss": 0.5701, "step": 13152 }, { "epoch": 0.38751970301842875, "grad_norm": 1.4155230197466744, "learning_rate": 7.686389614523087e-06, "loss": 0.4299, "step": 13153 }, { "epoch": 0.38754916547589235, "grad_norm": 1.3371923427233388, "learning_rate": 7.685955887919788e-06, "loss": 0.4295, "step": 13154 }, { "epoch": 0.3875786279333559, "grad_norm": 1.3785935823635498, "learning_rate": 7.685522132905266e-06, "loss": 0.4574, "step": 13155 }, { "epoch": 0.3876080903908195, "grad_norm": 1.563475336712375, "learning_rate": 7.68508834948411e-06, "loss": 0.5142, "step": 13156 }, { "epoch": 0.38763755284828305, "grad_norm": 1.429658205691863, "learning_rate": 7.684654537660906e-06, "loss": 0.3793, "step": 13157 }, { "epoch": 0.38766701530574665, "grad_norm": 1.5882588148741055, "learning_rate": 7.684220697440245e-06, "loss": 0.6248, "step": 13158 }, { "epoch": 0.38769647776321026, "grad_norm": 1.4799623253512133, "learning_rate": 7.683786828826719e-06, "loss": 0.4511, "step": 13159 }, { "epoch": 0.3877259402206738, "grad_norm": 1.8344949055439443, "learning_rate": 7.68335293182491e-06, "loss": 0.6881, "step": 13160 }, { "epoch": 0.3877554026781374, "grad_norm": 1.309916307612613, "learning_rate": 7.682919006439413e-06, "loss": 0.3401, "step": 13161 }, { "epoch": 0.38778486513560095, "grad_norm": 1.4894057100146885, "learning_rate": 7.682485052674814e-06, "loss": 0.4619, "step": 13162 }, { "epoch": 0.38781432759306456, "grad_norm": 1.474257705274097, "learning_rate": 7.68205107053571e-06, "loss": 0.4258, "step": 13163 }, { "epoch": 0.3878437900505281, "grad_norm": 1.4517031157694875, "learning_rate": 7.681617060026685e-06, "loss": 0.5015, "step": 13164 }, { "epoch": 0.3878732525079917, "grad_norm": 1.4186437022952476, "learning_rate": 7.68118302115233e-06, "loss": 0.4414, "step": 13165 }, { "epoch": 0.38790271496545525, "grad_norm": 1.4857590773425338, "learning_rate": 7.68074895391724e-06, "loss": 0.3826, "step": 13166 }, { "epoch": 0.38793217742291886, "grad_norm": 1.447730682011154, "learning_rate": 7.680314858326005e-06, "loss": 0.4597, "step": 13167 }, { "epoch": 0.3879616398803824, "grad_norm": 1.7123097288852815, "learning_rate": 7.679880734383216e-06, "loss": 0.5363, "step": 13168 }, { "epoch": 0.387991102337846, "grad_norm": 1.4570306382973188, "learning_rate": 7.679446582093464e-06, "loss": 0.4903, "step": 13169 }, { "epoch": 0.38802056479530955, "grad_norm": 1.5082098516327997, "learning_rate": 7.679012401461345e-06, "loss": 0.4024, "step": 13170 }, { "epoch": 0.38805002725277316, "grad_norm": 1.475287913046277, "learning_rate": 7.678578192491449e-06, "loss": 0.4476, "step": 13171 }, { "epoch": 0.38807948971023676, "grad_norm": 1.3854654942901885, "learning_rate": 7.678143955188366e-06, "loss": 0.4167, "step": 13172 }, { "epoch": 0.3881089521677003, "grad_norm": 1.688605105434381, "learning_rate": 7.677709689556696e-06, "loss": 0.5588, "step": 13173 }, { "epoch": 0.3881384146251639, "grad_norm": 1.7036467131961432, "learning_rate": 7.677275395601027e-06, "loss": 0.4904, "step": 13174 }, { "epoch": 0.38816787708262745, "grad_norm": 1.5796753075974173, "learning_rate": 7.676841073325956e-06, "loss": 0.4779, "step": 13175 }, { "epoch": 0.38819733954009106, "grad_norm": 1.3932230946143271, "learning_rate": 7.676406722736075e-06, "loss": 0.3584, "step": 13176 }, { "epoch": 0.3882268019975546, "grad_norm": 1.5617183931137588, "learning_rate": 7.675972343835978e-06, "loss": 0.3989, "step": 13177 }, { "epoch": 0.3882562644550182, "grad_norm": 1.5691380038310463, "learning_rate": 7.675537936630264e-06, "loss": 0.5049, "step": 13178 }, { "epoch": 0.38828572691248175, "grad_norm": 1.4168224161070506, "learning_rate": 7.675103501123525e-06, "loss": 0.4609, "step": 13179 }, { "epoch": 0.38831518936994536, "grad_norm": 1.6162494300705954, "learning_rate": 7.674669037320354e-06, "loss": 0.4588, "step": 13180 }, { "epoch": 0.3883446518274089, "grad_norm": 1.734461308843359, "learning_rate": 7.67423454522535e-06, "loss": 0.4789, "step": 13181 }, { "epoch": 0.3883741142848725, "grad_norm": 1.7375301933496785, "learning_rate": 7.673800024843108e-06, "loss": 0.5858, "step": 13182 }, { "epoch": 0.38840357674233605, "grad_norm": 1.4513744901468972, "learning_rate": 7.673365476178226e-06, "loss": 0.4189, "step": 13183 }, { "epoch": 0.38843303919979966, "grad_norm": 1.5839818606793201, "learning_rate": 7.672930899235296e-06, "loss": 0.5334, "step": 13184 }, { "epoch": 0.38846250165726326, "grad_norm": 1.2734469575859626, "learning_rate": 7.672496294018918e-06, "loss": 0.3927, "step": 13185 }, { "epoch": 0.3884919641147268, "grad_norm": 1.6952400476718585, "learning_rate": 7.672061660533691e-06, "loss": 0.6088, "step": 13186 }, { "epoch": 0.3885214265721904, "grad_norm": 1.3298857334049934, "learning_rate": 7.671626998784208e-06, "loss": 0.3475, "step": 13187 }, { "epoch": 0.38855088902965396, "grad_norm": 1.554132902751825, "learning_rate": 7.67119230877507e-06, "loss": 0.5719, "step": 13188 }, { "epoch": 0.38858035148711756, "grad_norm": 1.4852226566811244, "learning_rate": 7.670757590510873e-06, "loss": 0.4039, "step": 13189 }, { "epoch": 0.3886098139445811, "grad_norm": 1.59103870152514, "learning_rate": 7.670322843996216e-06, "loss": 0.4144, "step": 13190 }, { "epoch": 0.3886392764020447, "grad_norm": 1.5903420389413723, "learning_rate": 7.669888069235699e-06, "loss": 0.554, "step": 13191 }, { "epoch": 0.38866873885950826, "grad_norm": 1.5136173006020606, "learning_rate": 7.669453266233917e-06, "loss": 0.4416, "step": 13192 }, { "epoch": 0.38869820131697186, "grad_norm": 1.4170844322240008, "learning_rate": 7.669018434995474e-06, "loss": 0.4484, "step": 13193 }, { "epoch": 0.3887276637744354, "grad_norm": 1.4968438880677557, "learning_rate": 7.668583575524967e-06, "loss": 0.367, "step": 13194 }, { "epoch": 0.388757126231899, "grad_norm": 1.4463065186061366, "learning_rate": 7.668148687826998e-06, "loss": 0.3755, "step": 13195 }, { "epoch": 0.38878658868936256, "grad_norm": 1.4625972112134022, "learning_rate": 7.667713771906163e-06, "loss": 0.4584, "step": 13196 }, { "epoch": 0.38881605114682616, "grad_norm": 1.4267622310984547, "learning_rate": 7.667278827767064e-06, "loss": 0.3918, "step": 13197 }, { "epoch": 0.38884551360428976, "grad_norm": 1.5597883432519797, "learning_rate": 7.666843855414306e-06, "loss": 0.497, "step": 13198 }, { "epoch": 0.3888749760617533, "grad_norm": 1.4890658376530612, "learning_rate": 7.666408854852483e-06, "loss": 0.4312, "step": 13199 }, { "epoch": 0.3889044385192169, "grad_norm": 1.5252791209390988, "learning_rate": 7.665973826086199e-06, "loss": 0.3797, "step": 13200 }, { "epoch": 0.38893390097668046, "grad_norm": 1.5570096071633455, "learning_rate": 7.66553876912006e-06, "loss": 0.4483, "step": 13201 }, { "epoch": 0.38896336343414406, "grad_norm": 1.4740728129323681, "learning_rate": 7.665103683958661e-06, "loss": 0.452, "step": 13202 }, { "epoch": 0.3889928258916076, "grad_norm": 1.4533468889781247, "learning_rate": 7.66466857060661e-06, "loss": 0.5057, "step": 13203 }, { "epoch": 0.3890222883490712, "grad_norm": 1.6865725733154606, "learning_rate": 7.664233429068506e-06, "loss": 0.4147, "step": 13204 }, { "epoch": 0.38905175080653476, "grad_norm": 1.4524123750050968, "learning_rate": 7.663798259348953e-06, "loss": 0.4639, "step": 13205 }, { "epoch": 0.38908121326399836, "grad_norm": 1.7592429948047839, "learning_rate": 7.663363061452553e-06, "loss": 0.4198, "step": 13206 }, { "epoch": 0.3891106757214619, "grad_norm": 1.7758453927170885, "learning_rate": 7.662927835383911e-06, "loss": 0.6761, "step": 13207 }, { "epoch": 0.3891401381789255, "grad_norm": 1.669654207469261, "learning_rate": 7.66249258114763e-06, "loss": 0.5532, "step": 13208 }, { "epoch": 0.38916960063638906, "grad_norm": 1.3999829547204863, "learning_rate": 7.662057298748314e-06, "loss": 0.3655, "step": 13209 }, { "epoch": 0.38919906309385266, "grad_norm": 1.5613259644446904, "learning_rate": 7.661621988190567e-06, "loss": 0.4978, "step": 13210 }, { "epoch": 0.38922852555131626, "grad_norm": 1.380562652196086, "learning_rate": 7.661186649478993e-06, "loss": 0.4021, "step": 13211 }, { "epoch": 0.3892579880087798, "grad_norm": 1.6668572655741971, "learning_rate": 7.660751282618196e-06, "loss": 0.6418, "step": 13212 }, { "epoch": 0.3892874504662434, "grad_norm": 1.6295093516729464, "learning_rate": 7.660315887612788e-06, "loss": 0.4589, "step": 13213 }, { "epoch": 0.38931691292370696, "grad_norm": 1.70280881688491, "learning_rate": 7.659880464467364e-06, "loss": 0.4937, "step": 13214 }, { "epoch": 0.38934637538117056, "grad_norm": 1.4528312744194853, "learning_rate": 7.659445013186537e-06, "loss": 0.4385, "step": 13215 }, { "epoch": 0.3893758378386341, "grad_norm": 1.6198059560829234, "learning_rate": 7.659009533774912e-06, "loss": 0.4571, "step": 13216 }, { "epoch": 0.3894053002960977, "grad_norm": 1.4416469717651859, "learning_rate": 7.658574026237093e-06, "loss": 0.4599, "step": 13217 }, { "epoch": 0.38943476275356126, "grad_norm": 1.8170318224071016, "learning_rate": 7.658138490577691e-06, "loss": 0.4558, "step": 13218 }, { "epoch": 0.38946422521102486, "grad_norm": 1.4134605406876641, "learning_rate": 7.657702926801308e-06, "loss": 0.3932, "step": 13219 }, { "epoch": 0.3894936876684884, "grad_norm": 1.5199439584996783, "learning_rate": 7.657267334912553e-06, "loss": 0.4985, "step": 13220 }, { "epoch": 0.389523150125952, "grad_norm": 1.7010956276803912, "learning_rate": 7.656831714916035e-06, "loss": 0.4793, "step": 13221 }, { "epoch": 0.38955261258341556, "grad_norm": 1.6114838410470484, "learning_rate": 7.65639606681636e-06, "loss": 0.698, "step": 13222 }, { "epoch": 0.38958207504087916, "grad_norm": 1.4214933231540525, "learning_rate": 7.655960390618137e-06, "loss": 0.4656, "step": 13223 }, { "epoch": 0.38961153749834276, "grad_norm": 1.5993447428990781, "learning_rate": 7.655524686325976e-06, "loss": 0.568, "step": 13224 }, { "epoch": 0.3896409999558063, "grad_norm": 1.544541517579763, "learning_rate": 7.655088953944483e-06, "loss": 0.4523, "step": 13225 }, { "epoch": 0.3896704624132699, "grad_norm": 1.47149337308528, "learning_rate": 7.654653193478268e-06, "loss": 0.429, "step": 13226 }, { "epoch": 0.38969992487073346, "grad_norm": 1.4461660141295527, "learning_rate": 7.65421740493194e-06, "loss": 0.4013, "step": 13227 }, { "epoch": 0.38972938732819706, "grad_norm": 1.3575827220487455, "learning_rate": 7.653781588310112e-06, "loss": 0.3899, "step": 13228 }, { "epoch": 0.3897588497856606, "grad_norm": 1.495803824151631, "learning_rate": 7.653345743617388e-06, "loss": 0.4731, "step": 13229 }, { "epoch": 0.3897883122431242, "grad_norm": 1.7122821643859178, "learning_rate": 7.652909870858382e-06, "loss": 0.4491, "step": 13230 }, { "epoch": 0.38981777470058776, "grad_norm": 1.621326688682392, "learning_rate": 7.652473970037704e-06, "loss": 0.5332, "step": 13231 }, { "epoch": 0.38984723715805136, "grad_norm": 1.3397726471923659, "learning_rate": 7.652038041159966e-06, "loss": 0.4912, "step": 13232 }, { "epoch": 0.3898766996155149, "grad_norm": 1.3640404378977327, "learning_rate": 7.651602084229776e-06, "loss": 0.3, "step": 13233 }, { "epoch": 0.3899061620729785, "grad_norm": 1.7379021248126498, "learning_rate": 7.65116609925175e-06, "loss": 0.537, "step": 13234 }, { "epoch": 0.38993562453044206, "grad_norm": 1.684634991063936, "learning_rate": 7.650730086230493e-06, "loss": 0.6232, "step": 13235 }, { "epoch": 0.38996508698790566, "grad_norm": 1.5346418333312055, "learning_rate": 7.650294045170623e-06, "loss": 0.4398, "step": 13236 }, { "epoch": 0.38999454944536927, "grad_norm": 1.427291508792042, "learning_rate": 7.649857976076752e-06, "loss": 0.4447, "step": 13237 }, { "epoch": 0.3900240119028328, "grad_norm": 1.5739035207613583, "learning_rate": 7.64942187895349e-06, "loss": 0.4023, "step": 13238 }, { "epoch": 0.3900534743602964, "grad_norm": 1.5814228434838342, "learning_rate": 7.64898575380545e-06, "loss": 0.6823, "step": 13239 }, { "epoch": 0.39008293681775996, "grad_norm": 1.5448590325351488, "learning_rate": 7.648549600637245e-06, "loss": 0.4191, "step": 13240 }, { "epoch": 0.39011239927522356, "grad_norm": 1.695547808671041, "learning_rate": 7.648113419453491e-06, "loss": 0.5923, "step": 13241 }, { "epoch": 0.3901418617326871, "grad_norm": 1.4069377951075857, "learning_rate": 7.647677210258799e-06, "loss": 0.4307, "step": 13242 }, { "epoch": 0.3901713241901507, "grad_norm": 1.584634234221079, "learning_rate": 7.647240973057785e-06, "loss": 0.5205, "step": 13243 }, { "epoch": 0.39020078664761426, "grad_norm": 1.6352424921960977, "learning_rate": 7.646804707855065e-06, "loss": 0.4597, "step": 13244 }, { "epoch": 0.39023024910507786, "grad_norm": 1.6218935828051504, "learning_rate": 7.646368414655248e-06, "loss": 0.4958, "step": 13245 }, { "epoch": 0.3902597115625414, "grad_norm": 1.4883294897338522, "learning_rate": 7.645932093462954e-06, "loss": 0.4581, "step": 13246 }, { "epoch": 0.390289174020005, "grad_norm": 1.5277748617154303, "learning_rate": 7.645495744282795e-06, "loss": 0.416, "step": 13247 }, { "epoch": 0.39031863647746856, "grad_norm": 1.5212364567330405, "learning_rate": 7.645059367119392e-06, "loss": 0.4791, "step": 13248 }, { "epoch": 0.39034809893493216, "grad_norm": 1.4758880168389492, "learning_rate": 7.644622961977354e-06, "loss": 0.4237, "step": 13249 }, { "epoch": 0.39037756139239577, "grad_norm": 1.4630553053251942, "learning_rate": 7.644186528861301e-06, "loss": 0.3736, "step": 13250 }, { "epoch": 0.3904070238498593, "grad_norm": 1.4860135134709846, "learning_rate": 7.643750067775849e-06, "loss": 0.4408, "step": 13251 }, { "epoch": 0.3904364863073229, "grad_norm": 1.3721105806502256, "learning_rate": 7.643313578725616e-06, "loss": 0.4598, "step": 13252 }, { "epoch": 0.39046594876478646, "grad_norm": 1.5776093029065639, "learning_rate": 7.642877061715215e-06, "loss": 0.5078, "step": 13253 }, { "epoch": 0.39049541122225007, "grad_norm": 1.6462587066414114, "learning_rate": 7.642440516749266e-06, "loss": 0.5943, "step": 13254 }, { "epoch": 0.3905248736797136, "grad_norm": 1.4320106685817524, "learning_rate": 7.642003943832388e-06, "loss": 0.3891, "step": 13255 }, { "epoch": 0.3905543361371772, "grad_norm": 1.4686551561167698, "learning_rate": 7.641567342969196e-06, "loss": 0.5023, "step": 13256 }, { "epoch": 0.39058379859464076, "grad_norm": 1.4637199906040899, "learning_rate": 7.64113071416431e-06, "loss": 0.4609, "step": 13257 }, { "epoch": 0.39061326105210437, "grad_norm": 1.5118681760102857, "learning_rate": 7.64069405742235e-06, "loss": 0.4764, "step": 13258 }, { "epoch": 0.3906427235095679, "grad_norm": 1.3359999461270728, "learning_rate": 7.640257372747932e-06, "loss": 0.3157, "step": 13259 }, { "epoch": 0.3906721859670315, "grad_norm": 1.2965109271612476, "learning_rate": 7.639820660145675e-06, "loss": 0.4128, "step": 13260 }, { "epoch": 0.39070164842449506, "grad_norm": 1.519405296601516, "learning_rate": 7.639383919620202e-06, "loss": 0.5965, "step": 13261 }, { "epoch": 0.39073111088195867, "grad_norm": 1.9648045105768717, "learning_rate": 7.638947151176129e-06, "loss": 0.5485, "step": 13262 }, { "epoch": 0.39076057333942227, "grad_norm": 1.4254868279105772, "learning_rate": 7.638510354818075e-06, "loss": 0.4665, "step": 13263 }, { "epoch": 0.3907900357968858, "grad_norm": 1.782885879755262, "learning_rate": 7.638073530550667e-06, "loss": 0.5345, "step": 13264 }, { "epoch": 0.3908194982543494, "grad_norm": 1.6520188625895813, "learning_rate": 7.637636678378518e-06, "loss": 0.4345, "step": 13265 }, { "epoch": 0.39084896071181296, "grad_norm": 1.5033360663839235, "learning_rate": 7.637199798306251e-06, "loss": 0.4485, "step": 13266 }, { "epoch": 0.39087842316927657, "grad_norm": 1.494199305164821, "learning_rate": 7.636762890338491e-06, "loss": 0.4387, "step": 13267 }, { "epoch": 0.3909078856267401, "grad_norm": 1.6097606095374664, "learning_rate": 7.636325954479858e-06, "loss": 0.4974, "step": 13268 }, { "epoch": 0.3909373480842037, "grad_norm": 1.6660868761992245, "learning_rate": 7.63588899073497e-06, "loss": 0.6417, "step": 13269 }, { "epoch": 0.39096681054166726, "grad_norm": 1.368514510208875, "learning_rate": 7.635451999108452e-06, "loss": 0.3883, "step": 13270 }, { "epoch": 0.39099627299913087, "grad_norm": 1.4022220634361338, "learning_rate": 7.635014979604924e-06, "loss": 0.3911, "step": 13271 }, { "epoch": 0.3910257354565944, "grad_norm": 1.5635190647193424, "learning_rate": 7.634577932229012e-06, "loss": 0.6209, "step": 13272 }, { "epoch": 0.391055197914058, "grad_norm": 1.5981107100170415, "learning_rate": 7.634140856985338e-06, "loss": 0.5309, "step": 13273 }, { "epoch": 0.39108466037152156, "grad_norm": 1.6213695821140621, "learning_rate": 7.633703753878528e-06, "loss": 0.5158, "step": 13274 }, { "epoch": 0.39111412282898517, "grad_norm": 1.695117525418517, "learning_rate": 7.633266622913199e-06, "loss": 0.4154, "step": 13275 }, { "epoch": 0.39114358528644877, "grad_norm": 1.59066166885025, "learning_rate": 7.632829464093977e-06, "loss": 0.5203, "step": 13276 }, { "epoch": 0.3911730477439123, "grad_norm": 1.8873055314480183, "learning_rate": 7.632392277425489e-06, "loss": 0.4804, "step": 13277 }, { "epoch": 0.3912025102013759, "grad_norm": 1.3588438308411497, "learning_rate": 7.631955062912357e-06, "loss": 0.4084, "step": 13278 }, { "epoch": 0.39123197265883947, "grad_norm": 1.4151813448094728, "learning_rate": 7.631517820559206e-06, "loss": 0.4302, "step": 13279 }, { "epoch": 0.39126143511630307, "grad_norm": 1.4601588099263942, "learning_rate": 7.631080550370663e-06, "loss": 0.4539, "step": 13280 }, { "epoch": 0.3912908975737666, "grad_norm": 1.3177731056560849, "learning_rate": 7.630643252351351e-06, "loss": 0.4468, "step": 13281 }, { "epoch": 0.3913203600312302, "grad_norm": 1.4405219150432356, "learning_rate": 7.630205926505895e-06, "loss": 0.4517, "step": 13282 }, { "epoch": 0.39134982248869377, "grad_norm": 1.4488723359068454, "learning_rate": 7.629768572838926e-06, "loss": 0.5072, "step": 13283 }, { "epoch": 0.39137928494615737, "grad_norm": 1.6681665086914421, "learning_rate": 7.629331191355064e-06, "loss": 0.4504, "step": 13284 }, { "epoch": 0.3914087474036209, "grad_norm": 1.4460088753463765, "learning_rate": 7.628893782058938e-06, "loss": 0.4154, "step": 13285 }, { "epoch": 0.3914382098610845, "grad_norm": 1.5229463923770497, "learning_rate": 7.628456344955175e-06, "loss": 0.5905, "step": 13286 }, { "epoch": 0.39146767231854807, "grad_norm": 1.3964004633307343, "learning_rate": 7.6280188800484015e-06, "loss": 0.4551, "step": 13287 }, { "epoch": 0.39149713477601167, "grad_norm": 1.7827611273243424, "learning_rate": 7.627581387343246e-06, "loss": 0.6463, "step": 13288 }, { "epoch": 0.39152659723347527, "grad_norm": 1.3790694526396001, "learning_rate": 7.627143866844335e-06, "loss": 0.3603, "step": 13289 }, { "epoch": 0.3915560596909388, "grad_norm": 1.7586179251535827, "learning_rate": 7.626706318556296e-06, "loss": 0.54, "step": 13290 }, { "epoch": 0.3915855221484024, "grad_norm": 1.5377881481655589, "learning_rate": 7.62626874248376e-06, "loss": 0.453, "step": 13291 }, { "epoch": 0.39161498460586597, "grad_norm": 1.493594565581915, "learning_rate": 7.625831138631351e-06, "loss": 0.5593, "step": 13292 }, { "epoch": 0.39164444706332957, "grad_norm": 1.5106975726083238, "learning_rate": 7.6253935070037025e-06, "loss": 0.3436, "step": 13293 }, { "epoch": 0.3916739095207931, "grad_norm": 1.3756823726897804, "learning_rate": 7.62495584760544e-06, "loss": 0.5208, "step": 13294 }, { "epoch": 0.3917033719782567, "grad_norm": 1.6028087028500657, "learning_rate": 7.624518160441197e-06, "loss": 0.3661, "step": 13295 }, { "epoch": 0.39173283443572027, "grad_norm": 1.6206270681082533, "learning_rate": 7.624080445515598e-06, "loss": 0.4699, "step": 13296 }, { "epoch": 0.39176229689318387, "grad_norm": 1.637372418234879, "learning_rate": 7.623642702833278e-06, "loss": 0.491, "step": 13297 }, { "epoch": 0.3917917593506474, "grad_norm": 1.6990445840651753, "learning_rate": 7.623204932398864e-06, "loss": 0.5292, "step": 13298 }, { "epoch": 0.391821221808111, "grad_norm": 1.592989079827227, "learning_rate": 7.6227671342169875e-06, "loss": 0.5028, "step": 13299 }, { "epoch": 0.39185068426557457, "grad_norm": 1.763894269996866, "learning_rate": 7.6223293082922804e-06, "loss": 0.599, "step": 13300 }, { "epoch": 0.39188014672303817, "grad_norm": 1.4112977620832299, "learning_rate": 7.621891454629371e-06, "loss": 0.3462, "step": 13301 }, { "epoch": 0.3919096091805018, "grad_norm": 1.5018986529760838, "learning_rate": 7.621453573232894e-06, "loss": 0.3363, "step": 13302 }, { "epoch": 0.3919390716379653, "grad_norm": 1.5134449848995966, "learning_rate": 7.6210156641074805e-06, "loss": 0.4569, "step": 13303 }, { "epoch": 0.3919685340954289, "grad_norm": 1.3893869198187396, "learning_rate": 7.620577727257763e-06, "loss": 0.3441, "step": 13304 }, { "epoch": 0.39199799655289247, "grad_norm": 1.4756799425037284, "learning_rate": 7.620139762688372e-06, "loss": 0.3248, "step": 13305 }, { "epoch": 0.39202745901035607, "grad_norm": 1.6289258744976614, "learning_rate": 7.619701770403941e-06, "loss": 0.5211, "step": 13306 }, { "epoch": 0.3920569214678196, "grad_norm": 1.6812948177648266, "learning_rate": 7.6192637504091026e-06, "loss": 0.5303, "step": 13307 }, { "epoch": 0.3920863839252832, "grad_norm": 1.4481427067247434, "learning_rate": 7.6188257027084915e-06, "loss": 0.4391, "step": 13308 }, { "epoch": 0.39211584638274677, "grad_norm": 1.5632278541277902, "learning_rate": 7.6183876273067385e-06, "loss": 0.4847, "step": 13309 }, { "epoch": 0.39214530884021037, "grad_norm": 1.5348523135541356, "learning_rate": 7.617949524208481e-06, "loss": 0.5835, "step": 13310 }, { "epoch": 0.3921747712976739, "grad_norm": 1.6689127990739614, "learning_rate": 7.61751139341835e-06, "loss": 0.4343, "step": 13311 }, { "epoch": 0.3922042337551375, "grad_norm": 1.5453832439232436, "learning_rate": 7.617073234940981e-06, "loss": 0.6034, "step": 13312 }, { "epoch": 0.39223369621260107, "grad_norm": 1.3735754696434268, "learning_rate": 7.616635048781011e-06, "loss": 0.4572, "step": 13313 }, { "epoch": 0.39226315867006467, "grad_norm": 1.6384388852913603, "learning_rate": 7.6161968349430714e-06, "loss": 0.5067, "step": 13314 }, { "epoch": 0.3922926211275283, "grad_norm": 1.6549241484449986, "learning_rate": 7.615758593431799e-06, "loss": 0.5523, "step": 13315 }, { "epoch": 0.3923220835849918, "grad_norm": 1.314043996016576, "learning_rate": 7.615320324251829e-06, "loss": 0.3405, "step": 13316 }, { "epoch": 0.3923515460424554, "grad_norm": 1.51347874959386, "learning_rate": 7.6148820274078e-06, "loss": 0.5215, "step": 13317 }, { "epoch": 0.39238100849991897, "grad_norm": 1.4218617784098757, "learning_rate": 7.614443702904343e-06, "loss": 0.438, "step": 13318 }, { "epoch": 0.3924104709573826, "grad_norm": 1.4142601000628225, "learning_rate": 7.614005350746098e-06, "loss": 0.3277, "step": 13319 }, { "epoch": 0.3924399334148461, "grad_norm": 1.620604510995514, "learning_rate": 7.6135669709377016e-06, "loss": 0.4817, "step": 13320 }, { "epoch": 0.3924693958723097, "grad_norm": 1.7219837529584914, "learning_rate": 7.613128563483789e-06, "loss": 0.5234, "step": 13321 }, { "epoch": 0.39249885832977327, "grad_norm": 1.6036486328279334, "learning_rate": 7.612690128389002e-06, "loss": 0.4818, "step": 13322 }, { "epoch": 0.3925283207872369, "grad_norm": 1.4108487357642914, "learning_rate": 7.612251665657973e-06, "loss": 0.4631, "step": 13323 }, { "epoch": 0.3925577832447004, "grad_norm": 1.6926027633844323, "learning_rate": 7.611813175295341e-06, "loss": 0.5515, "step": 13324 }, { "epoch": 0.392587245702164, "grad_norm": 1.322450804510401, "learning_rate": 7.611374657305745e-06, "loss": 0.221, "step": 13325 }, { "epoch": 0.39261670815962757, "grad_norm": 1.6650320241334167, "learning_rate": 7.6109361116938255e-06, "loss": 0.466, "step": 13326 }, { "epoch": 0.3926461706170912, "grad_norm": 1.4438854011260007, "learning_rate": 7.6104975384642185e-06, "loss": 0.4813, "step": 13327 }, { "epoch": 0.3926756330745548, "grad_norm": 1.6101914806224749, "learning_rate": 7.610058937621565e-06, "loss": 0.4911, "step": 13328 }, { "epoch": 0.3927050955320183, "grad_norm": 1.397494401307252, "learning_rate": 7.6096203091705025e-06, "loss": 0.3566, "step": 13329 }, { "epoch": 0.3927345579894819, "grad_norm": 1.6896995472266514, "learning_rate": 7.609181653115672e-06, "loss": 0.5983, "step": 13330 }, { "epoch": 0.39276402044694547, "grad_norm": 1.6219984403311656, "learning_rate": 7.6087429694617134e-06, "loss": 0.5224, "step": 13331 }, { "epoch": 0.3927934829044091, "grad_norm": 1.6750451455877724, "learning_rate": 7.608304258213268e-06, "loss": 0.4881, "step": 13332 }, { "epoch": 0.3928229453618726, "grad_norm": 1.3798366769312436, "learning_rate": 7.607865519374973e-06, "loss": 0.3747, "step": 13333 }, { "epoch": 0.3928524078193362, "grad_norm": 1.5942080943266672, "learning_rate": 7.607426752951472e-06, "loss": 0.4653, "step": 13334 }, { "epoch": 0.39288187027679977, "grad_norm": 1.856248685274037, "learning_rate": 7.6069879589474046e-06, "loss": 0.6898, "step": 13335 }, { "epoch": 0.3929113327342634, "grad_norm": 1.7136419000458574, "learning_rate": 7.606549137367414e-06, "loss": 0.3822, "step": 13336 }, { "epoch": 0.3929407951917269, "grad_norm": 1.6899582434420988, "learning_rate": 7.606110288216142e-06, "loss": 0.4179, "step": 13337 }, { "epoch": 0.3929702576491905, "grad_norm": 1.723948133004022, "learning_rate": 7.605671411498228e-06, "loss": 0.6404, "step": 13338 }, { "epoch": 0.39299972010665407, "grad_norm": 1.4371811429737942, "learning_rate": 7.605232507218317e-06, "loss": 0.3669, "step": 13339 }, { "epoch": 0.3930291825641177, "grad_norm": 1.7996084245898856, "learning_rate": 7.60479357538105e-06, "loss": 0.638, "step": 13340 }, { "epoch": 0.3930586450215813, "grad_norm": 1.6025188870739797, "learning_rate": 7.60435461599107e-06, "loss": 0.55, "step": 13341 }, { "epoch": 0.3930881074790448, "grad_norm": 1.730877543467086, "learning_rate": 7.60391562905302e-06, "loss": 0.5307, "step": 13342 }, { "epoch": 0.3931175699365084, "grad_norm": 1.5691423141160152, "learning_rate": 7.603476614571546e-06, "loss": 0.3972, "step": 13343 }, { "epoch": 0.393147032393972, "grad_norm": 1.6476076595329587, "learning_rate": 7.603037572551288e-06, "loss": 0.5731, "step": 13344 }, { "epoch": 0.3931764948514356, "grad_norm": 1.6593387873480765, "learning_rate": 7.602598502996891e-06, "loss": 0.4695, "step": 13345 }, { "epoch": 0.3932059573088991, "grad_norm": 1.6077597177519933, "learning_rate": 7.602159405913003e-06, "loss": 0.3908, "step": 13346 }, { "epoch": 0.3932354197663627, "grad_norm": 1.4314663897690092, "learning_rate": 7.601720281304266e-06, "loss": 0.4699, "step": 13347 }, { "epoch": 0.3932648822238263, "grad_norm": 1.327311135513649, "learning_rate": 7.601281129175323e-06, "loss": 0.2692, "step": 13348 }, { "epoch": 0.3932943446812899, "grad_norm": 1.533295898211887, "learning_rate": 7.600841949530822e-06, "loss": 0.5566, "step": 13349 }, { "epoch": 0.3933238071387534, "grad_norm": 1.6870313895824343, "learning_rate": 7.600402742375406e-06, "loss": 0.4437, "step": 13350 }, { "epoch": 0.393353269596217, "grad_norm": 1.8193319808878663, "learning_rate": 7.5999635077137235e-06, "loss": 0.7085, "step": 13351 }, { "epoch": 0.3933827320536806, "grad_norm": 1.5569419530837563, "learning_rate": 7.59952424555042e-06, "loss": 0.5182, "step": 13352 }, { "epoch": 0.3934121945111442, "grad_norm": 1.4699861278414827, "learning_rate": 7.599084955890139e-06, "loss": 0.4378, "step": 13353 }, { "epoch": 0.3934416569686078, "grad_norm": 1.5251574915473423, "learning_rate": 7.59864563873753e-06, "loss": 0.5625, "step": 13354 }, { "epoch": 0.3934711194260713, "grad_norm": 1.6591281477558912, "learning_rate": 7.598206294097242e-06, "loss": 0.6036, "step": 13355 }, { "epoch": 0.39350058188353493, "grad_norm": 1.4904773611903877, "learning_rate": 7.597766921973917e-06, "loss": 0.4833, "step": 13356 }, { "epoch": 0.3935300443409985, "grad_norm": 1.4666355978333767, "learning_rate": 7.597327522372206e-06, "loss": 0.4125, "step": 13357 }, { "epoch": 0.3935595067984621, "grad_norm": 1.4067704799755507, "learning_rate": 7.596888095296757e-06, "loss": 0.5274, "step": 13358 }, { "epoch": 0.3935889692559256, "grad_norm": 1.5519887080337191, "learning_rate": 7.596448640752215e-06, "loss": 0.3308, "step": 13359 }, { "epoch": 0.3936184317133892, "grad_norm": 1.5396067173371666, "learning_rate": 7.596009158743232e-06, "loss": 0.4056, "step": 13360 }, { "epoch": 0.3936478941708528, "grad_norm": 1.337907551598485, "learning_rate": 7.595569649274455e-06, "loss": 0.3845, "step": 13361 }, { "epoch": 0.3936773566283164, "grad_norm": 1.4827391860240513, "learning_rate": 7.595130112350534e-06, "loss": 0.4564, "step": 13362 }, { "epoch": 0.3937068190857799, "grad_norm": 1.45941152636337, "learning_rate": 7.594690547976116e-06, "loss": 0.4808, "step": 13363 }, { "epoch": 0.3937362815432435, "grad_norm": 1.3921939774720475, "learning_rate": 7.594250956155853e-06, "loss": 0.4045, "step": 13364 }, { "epoch": 0.3937657440007071, "grad_norm": 1.678622366131045, "learning_rate": 7.5938113368943944e-06, "loss": 0.4563, "step": 13365 }, { "epoch": 0.3937952064581707, "grad_norm": 1.6402479528009324, "learning_rate": 7.593371690196389e-06, "loss": 0.533, "step": 13366 }, { "epoch": 0.3938246689156343, "grad_norm": 1.5209777067876051, "learning_rate": 7.59293201606649e-06, "loss": 0.4397, "step": 13367 }, { "epoch": 0.3938541313730978, "grad_norm": 1.4672005329299198, "learning_rate": 7.592492314509345e-06, "loss": 0.4435, "step": 13368 }, { "epoch": 0.39388359383056143, "grad_norm": 1.7209692138592834, "learning_rate": 7.592052585529606e-06, "loss": 0.5952, "step": 13369 }, { "epoch": 0.393913056288025, "grad_norm": 1.3826824973604945, "learning_rate": 7.591612829131925e-06, "loss": 0.4359, "step": 13370 }, { "epoch": 0.3939425187454886, "grad_norm": 1.8373531718665375, "learning_rate": 7.591173045320954e-06, "loss": 0.5468, "step": 13371 }, { "epoch": 0.3939719812029521, "grad_norm": 1.4549798741306086, "learning_rate": 7.590733234101344e-06, "loss": 0.3919, "step": 13372 }, { "epoch": 0.39400144366041573, "grad_norm": 1.6064214870438502, "learning_rate": 7.590293395477748e-06, "loss": 0.3998, "step": 13373 }, { "epoch": 0.3940309061178793, "grad_norm": 1.5074315930034092, "learning_rate": 7.589853529454816e-06, "loss": 0.538, "step": 13374 }, { "epoch": 0.3940603685753429, "grad_norm": 1.4020180629327348, "learning_rate": 7.589413636037203e-06, "loss": 0.4068, "step": 13375 }, { "epoch": 0.3940898310328064, "grad_norm": 1.3693352270760246, "learning_rate": 7.588973715229563e-06, "loss": 0.4493, "step": 13376 }, { "epoch": 0.39411929349027003, "grad_norm": 1.4687861409671799, "learning_rate": 7.588533767036547e-06, "loss": 0.5209, "step": 13377 }, { "epoch": 0.3941487559477336, "grad_norm": 1.5572861034541385, "learning_rate": 7.588093791462811e-06, "loss": 0.5711, "step": 13378 }, { "epoch": 0.3941782184051972, "grad_norm": 1.4717173919994304, "learning_rate": 7.587653788513007e-06, "loss": 0.4223, "step": 13379 }, { "epoch": 0.3942076808626608, "grad_norm": 1.4212021365819365, "learning_rate": 7.5872137581917895e-06, "loss": 0.4364, "step": 13380 }, { "epoch": 0.39423714332012433, "grad_norm": 1.7594515439009184, "learning_rate": 7.5867737005038125e-06, "loss": 0.4904, "step": 13381 }, { "epoch": 0.39426660577758793, "grad_norm": 1.7278337357805944, "learning_rate": 7.586333615453733e-06, "loss": 0.7237, "step": 13382 }, { "epoch": 0.3942960682350515, "grad_norm": 1.5503078948598876, "learning_rate": 7.585893503046204e-06, "loss": 0.4071, "step": 13383 }, { "epoch": 0.3943255306925151, "grad_norm": 1.4685478734124457, "learning_rate": 7.585453363285882e-06, "loss": 0.4465, "step": 13384 }, { "epoch": 0.3943549931499786, "grad_norm": 1.4585036242137455, "learning_rate": 7.585013196177422e-06, "loss": 0.5204, "step": 13385 }, { "epoch": 0.39438445560744223, "grad_norm": 1.386128686609172, "learning_rate": 7.58457300172548e-06, "loss": 0.4439, "step": 13386 }, { "epoch": 0.3944139180649058, "grad_norm": 1.5011295916863139, "learning_rate": 7.584132779934714e-06, "loss": 0.5324, "step": 13387 }, { "epoch": 0.3944433805223694, "grad_norm": 1.5995481437295362, "learning_rate": 7.5836925308097785e-06, "loss": 0.3732, "step": 13388 }, { "epoch": 0.3944728429798329, "grad_norm": 1.4648956991180913, "learning_rate": 7.583252254355329e-06, "loss": 0.4554, "step": 13389 }, { "epoch": 0.39450230543729653, "grad_norm": 1.3361186749820977, "learning_rate": 7.582811950576026e-06, "loss": 0.4066, "step": 13390 }, { "epoch": 0.3945317678947601, "grad_norm": 1.393719381492469, "learning_rate": 7.582371619476525e-06, "loss": 0.4021, "step": 13391 }, { "epoch": 0.3945612303522237, "grad_norm": 1.515693994987375, "learning_rate": 7.5819312610614836e-06, "loss": 0.6534, "step": 13392 }, { "epoch": 0.3945906928096873, "grad_norm": 1.6876716887357988, "learning_rate": 7.581490875335561e-06, "loss": 0.4918, "step": 13393 }, { "epoch": 0.39462015526715083, "grad_norm": 1.7651758618552504, "learning_rate": 7.581050462303414e-06, "loss": 0.4448, "step": 13394 }, { "epoch": 0.39464961772461443, "grad_norm": 1.7392051388527774, "learning_rate": 7.580610021969702e-06, "loss": 0.673, "step": 13395 }, { "epoch": 0.394679080182078, "grad_norm": 1.4968606481839644, "learning_rate": 7.580169554339083e-06, "loss": 0.4048, "step": 13396 }, { "epoch": 0.3947085426395416, "grad_norm": 2.0345131839432664, "learning_rate": 7.5797290594162185e-06, "loss": 0.6059, "step": 13397 }, { "epoch": 0.39473800509700513, "grad_norm": 1.6412238509552832, "learning_rate": 7.5792885372057645e-06, "loss": 0.407, "step": 13398 }, { "epoch": 0.39476746755446873, "grad_norm": 1.602017743388303, "learning_rate": 7.578847987712383e-06, "loss": 0.5224, "step": 13399 }, { "epoch": 0.3947969300119323, "grad_norm": 1.5657960292139976, "learning_rate": 7.578407410940733e-06, "loss": 0.4325, "step": 13400 }, { "epoch": 0.3948263924693959, "grad_norm": 1.3964927558359712, "learning_rate": 7.577966806895476e-06, "loss": 0.4198, "step": 13401 }, { "epoch": 0.39485585492685943, "grad_norm": 1.5863580502717884, "learning_rate": 7.577526175581273e-06, "loss": 0.4611, "step": 13402 }, { "epoch": 0.39488531738432303, "grad_norm": 1.4501853160066231, "learning_rate": 7.5770855170027805e-06, "loss": 0.426, "step": 13403 }, { "epoch": 0.3949147798417866, "grad_norm": 1.4534088509060537, "learning_rate": 7.576644831164666e-06, "loss": 0.4371, "step": 13404 }, { "epoch": 0.3949442422992502, "grad_norm": 1.3259947486403039, "learning_rate": 7.576204118071584e-06, "loss": 0.4144, "step": 13405 }, { "epoch": 0.3949737047567138, "grad_norm": 1.7399649797586323, "learning_rate": 7.575763377728203e-06, "loss": 0.4124, "step": 13406 }, { "epoch": 0.39500316721417733, "grad_norm": 1.5420048502099544, "learning_rate": 7.575322610139182e-06, "loss": 0.3975, "step": 13407 }, { "epoch": 0.39503262967164093, "grad_norm": 1.4885128614777867, "learning_rate": 7.574881815309182e-06, "loss": 0.4312, "step": 13408 }, { "epoch": 0.3950620921291045, "grad_norm": 1.3572672864780702, "learning_rate": 7.574440993242866e-06, "loss": 0.4149, "step": 13409 }, { "epoch": 0.3950915545865681, "grad_norm": 1.571166049548189, "learning_rate": 7.574000143944899e-06, "loss": 0.4957, "step": 13410 }, { "epoch": 0.39512101704403163, "grad_norm": 1.8109426566861033, "learning_rate": 7.573559267419943e-06, "loss": 0.4865, "step": 13411 }, { "epoch": 0.39515047950149523, "grad_norm": 1.4477905412042515, "learning_rate": 7.573118363672662e-06, "loss": 0.3786, "step": 13412 }, { "epoch": 0.3951799419589588, "grad_norm": 1.4484901628193356, "learning_rate": 7.572677432707719e-06, "loss": 0.438, "step": 13413 }, { "epoch": 0.3952094044164224, "grad_norm": 1.464810734495778, "learning_rate": 7.572236474529777e-06, "loss": 0.3918, "step": 13414 }, { "epoch": 0.39523886687388593, "grad_norm": 1.6684285307849989, "learning_rate": 7.571795489143503e-06, "loss": 0.6481, "step": 13415 }, { "epoch": 0.39526832933134953, "grad_norm": 1.4720964716550207, "learning_rate": 7.5713544765535586e-06, "loss": 0.4505, "step": 13416 }, { "epoch": 0.3952977917888131, "grad_norm": 1.5782345480666355, "learning_rate": 7.5709134367646105e-06, "loss": 0.4198, "step": 13417 }, { "epoch": 0.3953272542462767, "grad_norm": 1.4042253345252356, "learning_rate": 7.570472369781323e-06, "loss": 0.4606, "step": 13418 }, { "epoch": 0.3953567167037403, "grad_norm": 1.8724351789386902, "learning_rate": 7.570031275608365e-06, "loss": 0.3989, "step": 13419 }, { "epoch": 0.39538617916120383, "grad_norm": 1.6440627138721087, "learning_rate": 7.5695901542503965e-06, "loss": 0.6055, "step": 13420 }, { "epoch": 0.39541564161866743, "grad_norm": 1.4180922544838441, "learning_rate": 7.569149005712088e-06, "loss": 0.4817, "step": 13421 }, { "epoch": 0.395445104076131, "grad_norm": 1.4792484301609927, "learning_rate": 7.568707829998103e-06, "loss": 0.6052, "step": 13422 }, { "epoch": 0.3954745665335946, "grad_norm": 1.5235649746672704, "learning_rate": 7.56826662711311e-06, "loss": 0.5345, "step": 13423 }, { "epoch": 0.39550402899105813, "grad_norm": 1.7732320893711342, "learning_rate": 7.5678253970617744e-06, "loss": 0.4777, "step": 13424 }, { "epoch": 0.39553349144852173, "grad_norm": 1.4144227550590103, "learning_rate": 7.567384139848766e-06, "loss": 0.3838, "step": 13425 }, { "epoch": 0.3955629539059853, "grad_norm": 1.456756970946148, "learning_rate": 7.5669428554787494e-06, "loss": 0.4639, "step": 13426 }, { "epoch": 0.3955924163634489, "grad_norm": 1.327957252738526, "learning_rate": 7.566501543956395e-06, "loss": 0.3332, "step": 13427 }, { "epoch": 0.39562187882091243, "grad_norm": 1.3027144177778276, "learning_rate": 7.56606020528637e-06, "loss": 0.31, "step": 13428 }, { "epoch": 0.39565134127837603, "grad_norm": 1.5894095922666827, "learning_rate": 7.565618839473338e-06, "loss": 0.4433, "step": 13429 }, { "epoch": 0.3956808037358396, "grad_norm": 1.505648591053234, "learning_rate": 7.565177446521975e-06, "loss": 0.3744, "step": 13430 }, { "epoch": 0.3957102661933032, "grad_norm": 1.5147120949920065, "learning_rate": 7.564736026436948e-06, "loss": 0.4215, "step": 13431 }, { "epoch": 0.3957397286507668, "grad_norm": 1.5545636679959527, "learning_rate": 7.564294579222923e-06, "loss": 0.4339, "step": 13432 }, { "epoch": 0.39576919110823033, "grad_norm": 1.8050418954658414, "learning_rate": 7.563853104884571e-06, "loss": 0.5105, "step": 13433 }, { "epoch": 0.39579865356569394, "grad_norm": 1.7566048854432494, "learning_rate": 7.5634116034265625e-06, "loss": 0.66, "step": 13434 }, { "epoch": 0.3958281160231575, "grad_norm": 1.6165272313923513, "learning_rate": 7.5629700748535686e-06, "loss": 0.4881, "step": 13435 }, { "epoch": 0.3958575784806211, "grad_norm": 1.585506866235912, "learning_rate": 7.562528519170256e-06, "loss": 0.579, "step": 13436 }, { "epoch": 0.39588704093808463, "grad_norm": 1.5209602511420508, "learning_rate": 7.562086936381301e-06, "loss": 0.4958, "step": 13437 }, { "epoch": 0.39591650339554824, "grad_norm": 1.5502432260520107, "learning_rate": 7.561645326491368e-06, "loss": 0.5634, "step": 13438 }, { "epoch": 0.3959459658530118, "grad_norm": 1.4208737974342924, "learning_rate": 7.561203689505132e-06, "loss": 0.4219, "step": 13439 }, { "epoch": 0.3959754283104754, "grad_norm": 1.5906815760878672, "learning_rate": 7.560762025427267e-06, "loss": 0.4836, "step": 13440 }, { "epoch": 0.39600489076793893, "grad_norm": 1.6079608444885316, "learning_rate": 7.560320334262437e-06, "loss": 0.5173, "step": 13441 }, { "epoch": 0.39603435322540254, "grad_norm": 1.8006212509098982, "learning_rate": 7.559878616015322e-06, "loss": 0.6111, "step": 13442 }, { "epoch": 0.3960638156828661, "grad_norm": 1.4928348437011763, "learning_rate": 7.55943687069059e-06, "loss": 0.395, "step": 13443 }, { "epoch": 0.3960932781403297, "grad_norm": 1.358734684851991, "learning_rate": 7.558995098292915e-06, "loss": 0.2588, "step": 13444 }, { "epoch": 0.3961227405977933, "grad_norm": 1.3680255462970003, "learning_rate": 7.55855329882697e-06, "loss": 0.3837, "step": 13445 }, { "epoch": 0.39615220305525684, "grad_norm": 1.6373095915306666, "learning_rate": 7.558111472297427e-06, "loss": 0.544, "step": 13446 }, { "epoch": 0.39618166551272044, "grad_norm": 1.8164103150324828, "learning_rate": 7.5576696187089625e-06, "loss": 0.6515, "step": 13447 }, { "epoch": 0.396211127970184, "grad_norm": 1.5810808708923731, "learning_rate": 7.557227738066246e-06, "loss": 0.5315, "step": 13448 }, { "epoch": 0.3962405904276476, "grad_norm": 1.8497141541798807, "learning_rate": 7.5567858303739564e-06, "loss": 0.6013, "step": 13449 }, { "epoch": 0.39627005288511113, "grad_norm": 1.6233709650367747, "learning_rate": 7.556343895636762e-06, "loss": 0.5646, "step": 13450 }, { "epoch": 0.39629951534257474, "grad_norm": 1.6700634241720789, "learning_rate": 7.555901933859342e-06, "loss": 0.425, "step": 13451 }, { "epoch": 0.3963289778000383, "grad_norm": 1.7448403191919981, "learning_rate": 7.555459945046373e-06, "loss": 0.4515, "step": 13452 }, { "epoch": 0.3963584402575019, "grad_norm": 1.4388012755008763, "learning_rate": 7.555017929202526e-06, "loss": 0.557, "step": 13453 }, { "epoch": 0.39638790271496543, "grad_norm": 1.4248325256886671, "learning_rate": 7.554575886332476e-06, "loss": 0.4121, "step": 13454 }, { "epoch": 0.39641736517242904, "grad_norm": 1.6856602869756754, "learning_rate": 7.554133816440903e-06, "loss": 0.6612, "step": 13455 }, { "epoch": 0.3964468276298926, "grad_norm": 1.4641936207595259, "learning_rate": 7.55369171953248e-06, "loss": 0.4803, "step": 13456 }, { "epoch": 0.3964762900873562, "grad_norm": 1.7132374977554288, "learning_rate": 7.553249595611884e-06, "loss": 0.4663, "step": 13457 }, { "epoch": 0.3965057525448198, "grad_norm": 1.5774232411698996, "learning_rate": 7.552807444683794e-06, "loss": 0.432, "step": 13458 }, { "epoch": 0.39653521500228334, "grad_norm": 1.7611451098358255, "learning_rate": 7.5523652667528834e-06, "loss": 0.4577, "step": 13459 }, { "epoch": 0.39656467745974694, "grad_norm": 1.6341970274625213, "learning_rate": 7.5519230618238295e-06, "loss": 0.4639, "step": 13460 }, { "epoch": 0.3965941399172105, "grad_norm": 1.667345861325334, "learning_rate": 7.551480829901313e-06, "loss": 0.5283, "step": 13461 }, { "epoch": 0.3966236023746741, "grad_norm": 1.7372665432650234, "learning_rate": 7.5510385709900106e-06, "loss": 0.4264, "step": 13462 }, { "epoch": 0.39665306483213764, "grad_norm": 1.3698359569171186, "learning_rate": 7.550596285094597e-06, "loss": 0.4287, "step": 13463 }, { "epoch": 0.39668252728960124, "grad_norm": 1.321102388280883, "learning_rate": 7.550153972219755e-06, "loss": 0.3258, "step": 13464 }, { "epoch": 0.3967119897470648, "grad_norm": 1.7264890865594527, "learning_rate": 7.54971163237016e-06, "loss": 0.6465, "step": 13465 }, { "epoch": 0.3967414522045284, "grad_norm": 1.4569439996340423, "learning_rate": 7.549269265550494e-06, "loss": 0.363, "step": 13466 }, { "epoch": 0.39677091466199194, "grad_norm": 1.6697742275830085, "learning_rate": 7.548826871765436e-06, "loss": 0.5708, "step": 13467 }, { "epoch": 0.39680037711945554, "grad_norm": 1.5279691116495886, "learning_rate": 7.548384451019661e-06, "loss": 0.4687, "step": 13468 }, { "epoch": 0.3968298395769191, "grad_norm": 1.7098870814549862, "learning_rate": 7.547942003317852e-06, "loss": 0.4997, "step": 13469 }, { "epoch": 0.3968593020343827, "grad_norm": 1.7467086317838962, "learning_rate": 7.54749952866469e-06, "loss": 0.5502, "step": 13470 }, { "epoch": 0.3968887644918463, "grad_norm": 1.4423797155244205, "learning_rate": 7.547057027064855e-06, "loss": 0.4563, "step": 13471 }, { "epoch": 0.39691822694930984, "grad_norm": 1.5025693026028075, "learning_rate": 7.5466144985230265e-06, "loss": 0.4283, "step": 13472 }, { "epoch": 0.39694768940677344, "grad_norm": 1.3955737618734698, "learning_rate": 7.5461719430438876e-06, "loss": 0.5155, "step": 13473 }, { "epoch": 0.396977151864237, "grad_norm": 1.415918792710874, "learning_rate": 7.545729360632115e-06, "loss": 0.4533, "step": 13474 }, { "epoch": 0.3970066143217006, "grad_norm": 1.7245307266935552, "learning_rate": 7.5452867512923936e-06, "loss": 0.4682, "step": 13475 }, { "epoch": 0.39703607677916414, "grad_norm": 1.7179806994926008, "learning_rate": 7.544844115029406e-06, "loss": 0.4821, "step": 13476 }, { "epoch": 0.39706553923662774, "grad_norm": 1.2108289820041713, "learning_rate": 7.544401451847831e-06, "loss": 0.2766, "step": 13477 }, { "epoch": 0.3970950016940913, "grad_norm": 1.4680706880614118, "learning_rate": 7.543958761752355e-06, "loss": 0.4579, "step": 13478 }, { "epoch": 0.3971244641515549, "grad_norm": 1.4976192714560934, "learning_rate": 7.543516044747658e-06, "loss": 0.4745, "step": 13479 }, { "epoch": 0.39715392660901844, "grad_norm": 1.5379756378723701, "learning_rate": 7.543073300838423e-06, "loss": 0.5664, "step": 13480 }, { "epoch": 0.39718338906648204, "grad_norm": 1.4616940164108327, "learning_rate": 7.542630530029334e-06, "loss": 0.5359, "step": 13481 }, { "epoch": 0.3972128515239456, "grad_norm": 1.3360385203997027, "learning_rate": 7.542187732325074e-06, "loss": 0.3114, "step": 13482 }, { "epoch": 0.3972423139814092, "grad_norm": 1.3228614138980455, "learning_rate": 7.541744907730326e-06, "loss": 0.3825, "step": 13483 }, { "epoch": 0.3972717764388728, "grad_norm": 1.4948576513472256, "learning_rate": 7.5413020562497756e-06, "loss": 0.4602, "step": 13484 }, { "epoch": 0.39730123889633634, "grad_norm": 1.477942981549085, "learning_rate": 7.540859177888107e-06, "loss": 0.5476, "step": 13485 }, { "epoch": 0.39733070135379994, "grad_norm": 1.538489996068202, "learning_rate": 7.540416272650004e-06, "loss": 0.5707, "step": 13486 }, { "epoch": 0.3973601638112635, "grad_norm": 1.7491143173897188, "learning_rate": 7.539973340540152e-06, "loss": 0.4929, "step": 13487 }, { "epoch": 0.3973896262687271, "grad_norm": 1.3909226610434913, "learning_rate": 7.539530381563236e-06, "loss": 0.4117, "step": 13488 }, { "epoch": 0.39741908872619064, "grad_norm": 1.6517925273773966, "learning_rate": 7.539087395723941e-06, "loss": 0.5281, "step": 13489 }, { "epoch": 0.39744855118365424, "grad_norm": 1.3598520665545482, "learning_rate": 7.538644383026954e-06, "loss": 0.3065, "step": 13490 }, { "epoch": 0.3974780136411178, "grad_norm": 1.479295574447748, "learning_rate": 7.5382013434769604e-06, "loss": 0.3339, "step": 13491 }, { "epoch": 0.3975074760985814, "grad_norm": 1.3950361713301471, "learning_rate": 7.537758277078647e-06, "loss": 0.2981, "step": 13492 }, { "epoch": 0.39753693855604494, "grad_norm": 1.680796577093465, "learning_rate": 7.5373151838366995e-06, "loss": 0.5722, "step": 13493 }, { "epoch": 0.39756640101350854, "grad_norm": 1.4382810275028717, "learning_rate": 7.536872063755805e-06, "loss": 0.4885, "step": 13494 }, { "epoch": 0.3975958634709721, "grad_norm": 1.7246809663760954, "learning_rate": 7.53642891684065e-06, "loss": 0.5098, "step": 13495 }, { "epoch": 0.3976253259284357, "grad_norm": 1.3814473387424488, "learning_rate": 7.535985743095924e-06, "loss": 0.3999, "step": 13496 }, { "epoch": 0.3976547883858993, "grad_norm": 1.4381627093505573, "learning_rate": 7.535542542526315e-06, "loss": 0.3827, "step": 13497 }, { "epoch": 0.39768425084336284, "grad_norm": 1.4519585719227621, "learning_rate": 7.535099315136508e-06, "loss": 0.5524, "step": 13498 }, { "epoch": 0.39771371330082644, "grad_norm": 1.2958589544526593, "learning_rate": 7.534656060931194e-06, "loss": 0.3693, "step": 13499 }, { "epoch": 0.39774317575829, "grad_norm": 1.438913130419605, "learning_rate": 7.5342127799150595e-06, "loss": 0.4265, "step": 13500 }, { "epoch": 0.3977726382157536, "grad_norm": 1.4595137638470883, "learning_rate": 7.533769472092796e-06, "loss": 0.5444, "step": 13501 }, { "epoch": 0.39780210067321714, "grad_norm": 1.42863051335722, "learning_rate": 7.53332613746909e-06, "loss": 0.3979, "step": 13502 }, { "epoch": 0.39783156313068074, "grad_norm": 1.6460158933406535, "learning_rate": 7.532882776048634e-06, "loss": 0.5282, "step": 13503 }, { "epoch": 0.3978610255881443, "grad_norm": 1.4284554653622945, "learning_rate": 7.532439387836112e-06, "loss": 0.448, "step": 13504 }, { "epoch": 0.3978904880456079, "grad_norm": 1.5956531914264742, "learning_rate": 7.531995972836222e-06, "loss": 0.5666, "step": 13505 }, { "epoch": 0.39791995050307144, "grad_norm": 1.480717419887983, "learning_rate": 7.5315525310536476e-06, "loss": 0.5047, "step": 13506 }, { "epoch": 0.39794941296053504, "grad_norm": 1.7445313704218721, "learning_rate": 7.531109062493084e-06, "loss": 0.6204, "step": 13507 }, { "epoch": 0.3979788754179986, "grad_norm": 1.6083725109668627, "learning_rate": 7.530665567159219e-06, "loss": 0.4893, "step": 13508 }, { "epoch": 0.3980083378754622, "grad_norm": 1.4450682125163006, "learning_rate": 7.530222045056745e-06, "loss": 0.3423, "step": 13509 }, { "epoch": 0.3980378003329258, "grad_norm": 1.5138005892870878, "learning_rate": 7.529778496190353e-06, "loss": 0.5105, "step": 13510 }, { "epoch": 0.39806726279038934, "grad_norm": 1.7103913836835527, "learning_rate": 7.529334920564735e-06, "loss": 0.4532, "step": 13511 }, { "epoch": 0.39809672524785295, "grad_norm": 1.6281679134711668, "learning_rate": 7.528891318184584e-06, "loss": 0.3726, "step": 13512 }, { "epoch": 0.3981261877053165, "grad_norm": 1.5307337339277631, "learning_rate": 7.528447689054591e-06, "loss": 0.4181, "step": 13513 }, { "epoch": 0.3981556501627801, "grad_norm": 1.7634203230365821, "learning_rate": 7.528004033179448e-06, "loss": 0.611, "step": 13514 }, { "epoch": 0.39818511262024364, "grad_norm": 1.561086232970445, "learning_rate": 7.52756035056385e-06, "loss": 0.4236, "step": 13515 }, { "epoch": 0.39821457507770724, "grad_norm": 1.5551412013360002, "learning_rate": 7.527116641212488e-06, "loss": 0.5683, "step": 13516 }, { "epoch": 0.3982440375351708, "grad_norm": 1.496977043527986, "learning_rate": 7.5266729051300566e-06, "loss": 0.3903, "step": 13517 }, { "epoch": 0.3982734999926344, "grad_norm": 1.4230903397490768, "learning_rate": 7.52622914232125e-06, "loss": 0.491, "step": 13518 }, { "epoch": 0.39830296245009794, "grad_norm": 1.429949391309542, "learning_rate": 7.5257853527907606e-06, "loss": 0.5406, "step": 13519 }, { "epoch": 0.39833242490756154, "grad_norm": 1.3303151991078488, "learning_rate": 7.525341536543283e-06, "loss": 0.3406, "step": 13520 }, { "epoch": 0.3983618873650251, "grad_norm": 1.7869623036677749, "learning_rate": 7.524897693583514e-06, "loss": 0.6769, "step": 13521 }, { "epoch": 0.3983913498224887, "grad_norm": 1.6512805815498584, "learning_rate": 7.524453823916147e-06, "loss": 0.5205, "step": 13522 }, { "epoch": 0.3984208122799523, "grad_norm": 1.4449518571655762, "learning_rate": 7.524009927545874e-06, "loss": 0.5454, "step": 13523 }, { "epoch": 0.39845027473741584, "grad_norm": 1.4430373489992814, "learning_rate": 7.523566004477394e-06, "loss": 0.4372, "step": 13524 }, { "epoch": 0.39847973719487945, "grad_norm": 1.7858824431411746, "learning_rate": 7.523122054715404e-06, "loss": 0.4034, "step": 13525 }, { "epoch": 0.398509199652343, "grad_norm": 1.5177191049545504, "learning_rate": 7.522678078264596e-06, "loss": 0.3854, "step": 13526 }, { "epoch": 0.3985386621098066, "grad_norm": 1.7226313350173001, "learning_rate": 7.522234075129669e-06, "loss": 0.5415, "step": 13527 }, { "epoch": 0.39856812456727014, "grad_norm": 1.416299297167585, "learning_rate": 7.521790045315318e-06, "loss": 0.452, "step": 13528 }, { "epoch": 0.39859758702473375, "grad_norm": 1.4490088655899322, "learning_rate": 7.52134598882624e-06, "loss": 0.4088, "step": 13529 }, { "epoch": 0.3986270494821973, "grad_norm": 1.5716295224918622, "learning_rate": 7.520901905667133e-06, "loss": 0.3686, "step": 13530 }, { "epoch": 0.3986565119396609, "grad_norm": 1.678387703905615, "learning_rate": 7.520457795842694e-06, "loss": 0.5932, "step": 13531 }, { "epoch": 0.39868597439712444, "grad_norm": 1.5423747996692843, "learning_rate": 7.520013659357621e-06, "loss": 0.4841, "step": 13532 }, { "epoch": 0.39871543685458805, "grad_norm": 1.599905098594335, "learning_rate": 7.51956949621661e-06, "loss": 0.5126, "step": 13533 }, { "epoch": 0.3987448993120516, "grad_norm": 1.4945559716409056, "learning_rate": 7.519125306424363e-06, "loss": 0.4389, "step": 13534 }, { "epoch": 0.3987743617695152, "grad_norm": 1.4318686517517194, "learning_rate": 7.5186810899855745e-06, "loss": 0.5219, "step": 13535 }, { "epoch": 0.3988038242269788, "grad_norm": 1.4749580810560323, "learning_rate": 7.518236846904946e-06, "loss": 0.4853, "step": 13536 }, { "epoch": 0.39883328668444235, "grad_norm": 1.7424461697029165, "learning_rate": 7.5177925771871756e-06, "loss": 0.569, "step": 13537 }, { "epoch": 0.39886274914190595, "grad_norm": 1.5420116781319844, "learning_rate": 7.517348280836962e-06, "loss": 0.5587, "step": 13538 }, { "epoch": 0.3988922115993695, "grad_norm": 1.5481683491514127, "learning_rate": 7.516903957859006e-06, "loss": 0.5415, "step": 13539 }, { "epoch": 0.3989216740568331, "grad_norm": 1.5458038727887864, "learning_rate": 7.516459608258006e-06, "loss": 0.4952, "step": 13540 }, { "epoch": 0.39895113651429664, "grad_norm": 1.3857311912113628, "learning_rate": 7.516015232038663e-06, "loss": 0.425, "step": 13541 }, { "epoch": 0.39898059897176025, "grad_norm": 1.5757017011274324, "learning_rate": 7.5155708292056796e-06, "loss": 0.3922, "step": 13542 }, { "epoch": 0.3990100614292238, "grad_norm": 1.4476967288118614, "learning_rate": 7.5151263997637535e-06, "loss": 0.4195, "step": 13543 }, { "epoch": 0.3990395238866874, "grad_norm": 1.5047867038310885, "learning_rate": 7.5146819437175865e-06, "loss": 0.469, "step": 13544 }, { "epoch": 0.39906898634415094, "grad_norm": 1.4001027502142327, "learning_rate": 7.51423746107188e-06, "loss": 0.3595, "step": 13545 }, { "epoch": 0.39909844880161455, "grad_norm": 1.6252489475064429, "learning_rate": 7.513792951831338e-06, "loss": 0.4044, "step": 13546 }, { "epoch": 0.3991279112590781, "grad_norm": 1.2605381740050488, "learning_rate": 7.513348416000659e-06, "loss": 0.3567, "step": 13547 }, { "epoch": 0.3991573737165417, "grad_norm": 1.5167493335297995, "learning_rate": 7.512903853584546e-06, "loss": 0.4606, "step": 13548 }, { "epoch": 0.3991868361740053, "grad_norm": 1.647567681570053, "learning_rate": 7.512459264587701e-06, "loss": 0.509, "step": 13549 }, { "epoch": 0.39921629863146885, "grad_norm": 1.3985772124404294, "learning_rate": 7.512014649014829e-06, "loss": 0.367, "step": 13550 }, { "epoch": 0.39924576108893245, "grad_norm": 1.6198786966065826, "learning_rate": 7.511570006870632e-06, "loss": 0.4519, "step": 13551 }, { "epoch": 0.399275223546396, "grad_norm": 1.4561149807427731, "learning_rate": 7.5111253381598124e-06, "loss": 0.4731, "step": 13552 }, { "epoch": 0.3993046860038596, "grad_norm": 1.6238951038257587, "learning_rate": 7.510680642887074e-06, "loss": 0.3274, "step": 13553 }, { "epoch": 0.39933414846132315, "grad_norm": 1.6389740403879707, "learning_rate": 7.5102359210571195e-06, "loss": 0.5251, "step": 13554 }, { "epoch": 0.39936361091878675, "grad_norm": 1.4788608770775413, "learning_rate": 7.509791172674656e-06, "loss": 0.3785, "step": 13555 }, { "epoch": 0.3993930733762503, "grad_norm": 1.6853777093557354, "learning_rate": 7.509346397744386e-06, "loss": 0.5105, "step": 13556 }, { "epoch": 0.3994225358337139, "grad_norm": 1.489109760858603, "learning_rate": 7.5089015962710155e-06, "loss": 0.3828, "step": 13557 }, { "epoch": 0.39945199829117745, "grad_norm": 1.5503006534213637, "learning_rate": 7.508456768259249e-06, "loss": 0.4815, "step": 13558 }, { "epoch": 0.39948146074864105, "grad_norm": 1.5205932431056055, "learning_rate": 7.508011913713788e-06, "loss": 0.4479, "step": 13559 }, { "epoch": 0.3995109232061046, "grad_norm": 1.6775627133434163, "learning_rate": 7.5075670326393445e-06, "loss": 0.5081, "step": 13560 }, { "epoch": 0.3995403856635682, "grad_norm": 1.3605028499339358, "learning_rate": 7.5071221250406205e-06, "loss": 0.4941, "step": 13561 }, { "epoch": 0.3995698481210318, "grad_norm": 1.567053059274577, "learning_rate": 7.5066771909223224e-06, "loss": 0.4854, "step": 13562 }, { "epoch": 0.39959931057849535, "grad_norm": 1.4711070290582118, "learning_rate": 7.506232230289156e-06, "loss": 0.387, "step": 13563 }, { "epoch": 0.39962877303595895, "grad_norm": 1.4842465782401308, "learning_rate": 7.50578724314583e-06, "loss": 0.3808, "step": 13564 }, { "epoch": 0.3996582354934225, "grad_norm": 1.6730169865098787, "learning_rate": 7.5053422294970505e-06, "loss": 0.6119, "step": 13565 }, { "epoch": 0.3996876979508861, "grad_norm": 1.4642245020202433, "learning_rate": 7.504897189347522e-06, "loss": 0.4959, "step": 13566 }, { "epoch": 0.39971716040834965, "grad_norm": 1.455117729195996, "learning_rate": 7.504452122701957e-06, "loss": 0.4887, "step": 13567 }, { "epoch": 0.39974662286581325, "grad_norm": 1.3535625119571881, "learning_rate": 7.504007029565061e-06, "loss": 0.3839, "step": 13568 }, { "epoch": 0.3997760853232768, "grad_norm": 1.87416421932608, "learning_rate": 7.503561909941539e-06, "loss": 0.5154, "step": 13569 }, { "epoch": 0.3998055477807404, "grad_norm": 1.5097857955895484, "learning_rate": 7.503116763836105e-06, "loss": 0.383, "step": 13570 }, { "epoch": 0.39983501023820395, "grad_norm": 1.5275119065885734, "learning_rate": 7.502671591253463e-06, "loss": 0.3697, "step": 13571 }, { "epoch": 0.39986447269566755, "grad_norm": 1.4417362516318781, "learning_rate": 7.502226392198324e-06, "loss": 0.4102, "step": 13572 }, { "epoch": 0.3998939351531311, "grad_norm": 1.74689910647253, "learning_rate": 7.501781166675398e-06, "loss": 0.5372, "step": 13573 }, { "epoch": 0.3999233976105947, "grad_norm": 1.5778312654120472, "learning_rate": 7.501335914689392e-06, "loss": 0.5784, "step": 13574 }, { "epoch": 0.3999528600680583, "grad_norm": 1.7368645379686938, "learning_rate": 7.500890636245016e-06, "loss": 0.6197, "step": 13575 }, { "epoch": 0.39998232252552185, "grad_norm": 1.4757750010506316, "learning_rate": 7.500445331346984e-06, "loss": 0.4364, "step": 13576 }, { "epoch": 0.40001178498298545, "grad_norm": 1.567256661608202, "learning_rate": 7.500000000000001e-06, "loss": 0.5503, "step": 13577 }, { "epoch": 0.400041247440449, "grad_norm": 1.6153349344214467, "learning_rate": 7.49955464220878e-06, "loss": 0.5653, "step": 13578 }, { "epoch": 0.4000707098979126, "grad_norm": 1.578563858938253, "learning_rate": 7.4991092579780324e-06, "loss": 0.6193, "step": 13579 }, { "epoch": 0.40010017235537615, "grad_norm": 1.4111782678289746, "learning_rate": 7.4986638473124685e-06, "loss": 0.4572, "step": 13580 }, { "epoch": 0.40012963481283975, "grad_norm": 1.466506707728746, "learning_rate": 7.4982184102168e-06, "loss": 0.407, "step": 13581 }, { "epoch": 0.4001590972703033, "grad_norm": 1.4185467329802208, "learning_rate": 7.49777294669574e-06, "loss": 0.4696, "step": 13582 }, { "epoch": 0.4001885597277669, "grad_norm": 1.5678907684660663, "learning_rate": 7.4973274567539955e-06, "loss": 0.4661, "step": 13583 }, { "epoch": 0.40021802218523045, "grad_norm": 1.6871222895163085, "learning_rate": 7.496881940396285e-06, "loss": 0.571, "step": 13584 }, { "epoch": 0.40024748464269405, "grad_norm": 1.4534252318168273, "learning_rate": 7.4964363976273165e-06, "loss": 0.5289, "step": 13585 }, { "epoch": 0.4002769471001576, "grad_norm": 1.4542440034917317, "learning_rate": 7.495990828451806e-06, "loss": 0.4769, "step": 13586 }, { "epoch": 0.4003064095576212, "grad_norm": 1.6128500877888188, "learning_rate": 7.495545232874465e-06, "loss": 0.513, "step": 13587 }, { "epoch": 0.4003358720150848, "grad_norm": 1.3687415671647758, "learning_rate": 7.495099610900008e-06, "loss": 0.4287, "step": 13588 }, { "epoch": 0.40036533447254835, "grad_norm": 1.627748427266915, "learning_rate": 7.494653962533145e-06, "loss": 0.4782, "step": 13589 }, { "epoch": 0.40039479693001195, "grad_norm": 1.3559745979997393, "learning_rate": 7.494208287778594e-06, "loss": 0.4164, "step": 13590 }, { "epoch": 0.4004242593874755, "grad_norm": 1.4036758604795025, "learning_rate": 7.493762586641068e-06, "loss": 0.3398, "step": 13591 }, { "epoch": 0.4004537218449391, "grad_norm": 1.5158200961238497, "learning_rate": 7.493316859125282e-06, "loss": 0.4917, "step": 13592 }, { "epoch": 0.40048318430240265, "grad_norm": 1.5753821912508175, "learning_rate": 7.492871105235949e-06, "loss": 0.5105, "step": 13593 }, { "epoch": 0.40051264675986625, "grad_norm": 1.6178594624253109, "learning_rate": 7.4924253249777865e-06, "loss": 0.6071, "step": 13594 }, { "epoch": 0.4005421092173298, "grad_norm": 1.4951770195984884, "learning_rate": 7.491979518355507e-06, "loss": 0.4932, "step": 13595 }, { "epoch": 0.4005715716747934, "grad_norm": 1.777540873468337, "learning_rate": 7.491533685373828e-06, "loss": 0.5682, "step": 13596 }, { "epoch": 0.40060103413225695, "grad_norm": 1.6369957006061706, "learning_rate": 7.491087826037467e-06, "loss": 0.5448, "step": 13597 }, { "epoch": 0.40063049658972055, "grad_norm": 1.7136825556066149, "learning_rate": 7.490641940351136e-06, "loss": 0.5699, "step": 13598 }, { "epoch": 0.4006599590471841, "grad_norm": 1.628691690341443, "learning_rate": 7.490196028319553e-06, "loss": 0.5028, "step": 13599 }, { "epoch": 0.4006894215046477, "grad_norm": 1.4287199459527116, "learning_rate": 7.489750089947437e-06, "loss": 0.3363, "step": 13600 }, { "epoch": 0.4007188839621113, "grad_norm": 1.4058191459890332, "learning_rate": 7.489304125239503e-06, "loss": 0.3843, "step": 13601 }, { "epoch": 0.40074834641957485, "grad_norm": 1.7039726934714017, "learning_rate": 7.4888581342004685e-06, "loss": 0.4724, "step": 13602 }, { "epoch": 0.40077780887703846, "grad_norm": 1.5124338169571538, "learning_rate": 7.488412116835051e-06, "loss": 0.5016, "step": 13603 }, { "epoch": 0.400807271334502, "grad_norm": 1.8008547124227507, "learning_rate": 7.4879660731479675e-06, "loss": 0.603, "step": 13604 }, { "epoch": 0.4008367337919656, "grad_norm": 1.4024783929462932, "learning_rate": 7.487520003143938e-06, "loss": 0.4211, "step": 13605 }, { "epoch": 0.40086619624942915, "grad_norm": 1.6335526162540064, "learning_rate": 7.487073906827681e-06, "loss": 0.6585, "step": 13606 }, { "epoch": 0.40089565870689275, "grad_norm": 1.5188073509078734, "learning_rate": 7.4866277842039135e-06, "loss": 0.3457, "step": 13607 }, { "epoch": 0.4009251211643563, "grad_norm": 1.6082569758232232, "learning_rate": 7.486181635277354e-06, "loss": 0.4943, "step": 13608 }, { "epoch": 0.4009545836218199, "grad_norm": 1.5490478774152503, "learning_rate": 7.485735460052724e-06, "loss": 0.5019, "step": 13609 }, { "epoch": 0.40098404607928345, "grad_norm": 1.586133486001202, "learning_rate": 7.485289258534741e-06, "loss": 0.4335, "step": 13610 }, { "epoch": 0.40101350853674705, "grad_norm": 1.5251357683323965, "learning_rate": 7.484843030728126e-06, "loss": 0.5179, "step": 13611 }, { "epoch": 0.4010429709942106, "grad_norm": 1.3338569073885465, "learning_rate": 7.4843967766376e-06, "loss": 0.3899, "step": 13612 }, { "epoch": 0.4010724334516742, "grad_norm": 1.5695348537761924, "learning_rate": 7.483950496267881e-06, "loss": 0.4756, "step": 13613 }, { "epoch": 0.4011018959091378, "grad_norm": 1.527349596530169, "learning_rate": 7.48350418962369e-06, "loss": 0.4414, "step": 13614 }, { "epoch": 0.40113135836660135, "grad_norm": 1.5600870775421554, "learning_rate": 7.48305785670975e-06, "loss": 0.4665, "step": 13615 }, { "epoch": 0.40116082082406496, "grad_norm": 1.6660408917647207, "learning_rate": 7.4826114975307795e-06, "loss": 0.5352, "step": 13616 }, { "epoch": 0.4011902832815285, "grad_norm": 1.709637802019274, "learning_rate": 7.482165112091501e-06, "loss": 0.559, "step": 13617 }, { "epoch": 0.4012197457389921, "grad_norm": 1.569790150290872, "learning_rate": 7.481718700396639e-06, "loss": 0.4649, "step": 13618 }, { "epoch": 0.40124920819645565, "grad_norm": 1.848566901229116, "learning_rate": 7.481272262450911e-06, "loss": 0.4711, "step": 13619 }, { "epoch": 0.40127867065391926, "grad_norm": 1.8339002704293923, "learning_rate": 7.480825798259039e-06, "loss": 0.4016, "step": 13620 }, { "epoch": 0.4013081331113828, "grad_norm": 1.5460943967804262, "learning_rate": 7.480379307825751e-06, "loss": 0.428, "step": 13621 }, { "epoch": 0.4013375955688464, "grad_norm": 1.5695384191446757, "learning_rate": 7.479932791155766e-06, "loss": 0.4381, "step": 13622 }, { "epoch": 0.40136705802630995, "grad_norm": 1.529918373767893, "learning_rate": 7.479486248253807e-06, "loss": 0.4867, "step": 13623 }, { "epoch": 0.40139652048377356, "grad_norm": 1.6604554820135506, "learning_rate": 7.479039679124599e-06, "loss": 0.511, "step": 13624 }, { "epoch": 0.4014259829412371, "grad_norm": 1.3899069338285452, "learning_rate": 7.478593083772863e-06, "loss": 0.4462, "step": 13625 }, { "epoch": 0.4014554453987007, "grad_norm": 1.3630063102894112, "learning_rate": 7.478146462203326e-06, "loss": 0.4204, "step": 13626 }, { "epoch": 0.4014849078561643, "grad_norm": 1.3724999878712465, "learning_rate": 7.477699814420711e-06, "loss": 0.438, "step": 13627 }, { "epoch": 0.40151437031362786, "grad_norm": 1.5307229471617618, "learning_rate": 7.477253140429743e-06, "loss": 0.4521, "step": 13628 }, { "epoch": 0.40154383277109146, "grad_norm": 1.7093378244678348, "learning_rate": 7.476806440235145e-06, "loss": 0.4968, "step": 13629 }, { "epoch": 0.401573295228555, "grad_norm": 1.5071222883390798, "learning_rate": 7.476359713841644e-06, "loss": 0.538, "step": 13630 }, { "epoch": 0.4016027576860186, "grad_norm": 1.5289237819103216, "learning_rate": 7.475912961253964e-06, "loss": 0.3708, "step": 13631 }, { "epoch": 0.40163222014348215, "grad_norm": 1.6028802021007578, "learning_rate": 7.4754661824768315e-06, "loss": 0.6456, "step": 13632 }, { "epoch": 0.40166168260094576, "grad_norm": 1.4145925665600594, "learning_rate": 7.475019377514973e-06, "loss": 0.3429, "step": 13633 }, { "epoch": 0.4016911450584093, "grad_norm": 1.5606744877968326, "learning_rate": 7.4745725463731135e-06, "loss": 0.3764, "step": 13634 }, { "epoch": 0.4017206075158729, "grad_norm": 1.337756699528921, "learning_rate": 7.474125689055979e-06, "loss": 0.4867, "step": 13635 }, { "epoch": 0.40175006997333645, "grad_norm": 1.510929296528019, "learning_rate": 7.4736788055682975e-06, "loss": 0.4893, "step": 13636 }, { "epoch": 0.40177953243080006, "grad_norm": 1.5204424988732967, "learning_rate": 7.4732318959147965e-06, "loss": 0.4183, "step": 13637 }, { "epoch": 0.4018089948882636, "grad_norm": 1.754768524298468, "learning_rate": 7.4727849601002e-06, "loss": 0.5193, "step": 13638 }, { "epoch": 0.4018384573457272, "grad_norm": 1.4416005080731278, "learning_rate": 7.47233799812924e-06, "loss": 0.391, "step": 13639 }, { "epoch": 0.4018679198031908, "grad_norm": 1.4705196654914965, "learning_rate": 7.47189101000664e-06, "loss": 0.5292, "step": 13640 }, { "epoch": 0.40189738226065436, "grad_norm": 1.562230564814376, "learning_rate": 7.471443995737132e-06, "loss": 0.546, "step": 13641 }, { "epoch": 0.40192684471811796, "grad_norm": 1.5228071182889895, "learning_rate": 7.470996955325441e-06, "loss": 0.4538, "step": 13642 }, { "epoch": 0.4019563071755815, "grad_norm": 1.3288809684425504, "learning_rate": 7.4705498887762985e-06, "loss": 0.3192, "step": 13643 }, { "epoch": 0.4019857696330451, "grad_norm": 1.4805504694665548, "learning_rate": 7.470102796094432e-06, "loss": 0.5541, "step": 13644 }, { "epoch": 0.40201523209050866, "grad_norm": 1.650342418969319, "learning_rate": 7.469655677284571e-06, "loss": 0.6825, "step": 13645 }, { "epoch": 0.40204469454797226, "grad_norm": 1.675957413961828, "learning_rate": 7.4692085323514454e-06, "loss": 0.459, "step": 13646 }, { "epoch": 0.4020741570054358, "grad_norm": 1.4006301376811596, "learning_rate": 7.468761361299783e-06, "loss": 0.4761, "step": 13647 }, { "epoch": 0.4021036194628994, "grad_norm": 1.8488071490084113, "learning_rate": 7.468314164134316e-06, "loss": 0.5087, "step": 13648 }, { "epoch": 0.40213308192036296, "grad_norm": 1.387468983178795, "learning_rate": 7.467866940859774e-06, "loss": 0.3498, "step": 13649 }, { "epoch": 0.40216254437782656, "grad_norm": 1.3887698571801317, "learning_rate": 7.467419691480887e-06, "loss": 0.4913, "step": 13650 }, { "epoch": 0.4021920068352901, "grad_norm": 1.5610512560634207, "learning_rate": 7.4669724160023875e-06, "loss": 0.4162, "step": 13651 }, { "epoch": 0.4022214692927537, "grad_norm": 1.5636913434657336, "learning_rate": 7.466525114429006e-06, "loss": 0.4645, "step": 13652 }, { "epoch": 0.4022509317502173, "grad_norm": 1.4814975955034921, "learning_rate": 7.466077786765472e-06, "loss": 0.6242, "step": 13653 }, { "epoch": 0.40228039420768086, "grad_norm": 1.6743585567232648, "learning_rate": 7.46563043301652e-06, "loss": 0.4566, "step": 13654 }, { "epoch": 0.40230985666514446, "grad_norm": 1.6111212153059469, "learning_rate": 7.4651830531868796e-06, "loss": 0.4986, "step": 13655 }, { "epoch": 0.402339319122608, "grad_norm": 1.7023281792168716, "learning_rate": 7.464735647281286e-06, "loss": 0.4542, "step": 13656 }, { "epoch": 0.4023687815800716, "grad_norm": 1.5197499521883555, "learning_rate": 7.464288215304467e-06, "loss": 0.3958, "step": 13657 }, { "epoch": 0.40239824403753516, "grad_norm": 1.5947913397735016, "learning_rate": 7.463840757261161e-06, "loss": 0.579, "step": 13658 }, { "epoch": 0.40242770649499876, "grad_norm": 1.3658876224128298, "learning_rate": 7.463393273156096e-06, "loss": 0.4793, "step": 13659 }, { "epoch": 0.4024571689524623, "grad_norm": 1.5910904152596341, "learning_rate": 7.462945762994009e-06, "loss": 0.5521, "step": 13660 }, { "epoch": 0.4024866314099259, "grad_norm": 1.3498034216713477, "learning_rate": 7.462498226779634e-06, "loss": 0.3488, "step": 13661 }, { "epoch": 0.40251609386738946, "grad_norm": 1.638838815024669, "learning_rate": 7.462050664517701e-06, "loss": 0.6114, "step": 13662 }, { "epoch": 0.40254555632485306, "grad_norm": 1.6924821894014208, "learning_rate": 7.461603076212946e-06, "loss": 0.438, "step": 13663 }, { "epoch": 0.4025750187823166, "grad_norm": 1.6015076055477904, "learning_rate": 7.461155461870107e-06, "loss": 0.5711, "step": 13664 }, { "epoch": 0.4026044812397802, "grad_norm": 1.3273218088005274, "learning_rate": 7.460707821493911e-06, "loss": 0.4077, "step": 13665 }, { "epoch": 0.4026339436972438, "grad_norm": 1.6159936137373452, "learning_rate": 7.460260155089101e-06, "loss": 0.5034, "step": 13666 }, { "epoch": 0.40266340615470736, "grad_norm": 1.4960878094631809, "learning_rate": 7.4598124626604085e-06, "loss": 0.3591, "step": 13667 }, { "epoch": 0.40269286861217096, "grad_norm": 1.6904008053541009, "learning_rate": 7.459364744212568e-06, "loss": 0.6406, "step": 13668 }, { "epoch": 0.4027223310696345, "grad_norm": 1.3833340677748454, "learning_rate": 7.458916999750317e-06, "loss": 0.365, "step": 13669 }, { "epoch": 0.4027517935270981, "grad_norm": 1.572965079364913, "learning_rate": 7.4584692292783925e-06, "loss": 0.507, "step": 13670 }, { "epoch": 0.40278125598456166, "grad_norm": 1.6953729718322528, "learning_rate": 7.45802143280153e-06, "loss": 0.5239, "step": 13671 }, { "epoch": 0.40281071844202526, "grad_norm": 1.83669636770401, "learning_rate": 7.457573610324465e-06, "loss": 0.571, "step": 13672 }, { "epoch": 0.4028401808994888, "grad_norm": 1.6787308486702397, "learning_rate": 7.4571257618519355e-06, "loss": 0.5541, "step": 13673 }, { "epoch": 0.4028696433569524, "grad_norm": 1.6798736816898852, "learning_rate": 7.456677887388677e-06, "loss": 0.5384, "step": 13674 }, { "epoch": 0.40289910581441596, "grad_norm": 1.5972256229827493, "learning_rate": 7.45622998693943e-06, "loss": 0.3864, "step": 13675 }, { "epoch": 0.40292856827187956, "grad_norm": 1.5308989509668756, "learning_rate": 7.455782060508932e-06, "loss": 0.3785, "step": 13676 }, { "epoch": 0.4029580307293431, "grad_norm": 1.4862445035558747, "learning_rate": 7.455334108101917e-06, "loss": 0.3807, "step": 13677 }, { "epoch": 0.4029874931868067, "grad_norm": 1.4944829691612862, "learning_rate": 7.454886129723128e-06, "loss": 0.3946, "step": 13678 }, { "epoch": 0.4030169556442703, "grad_norm": 1.4762689019419628, "learning_rate": 7.454438125377301e-06, "loss": 0.5107, "step": 13679 }, { "epoch": 0.40304641810173386, "grad_norm": 1.4400111576633146, "learning_rate": 7.453990095069175e-06, "loss": 0.4648, "step": 13680 }, { "epoch": 0.40307588055919746, "grad_norm": 1.4359235708540867, "learning_rate": 7.4535420388034905e-06, "loss": 0.4547, "step": 13681 }, { "epoch": 0.403105343016661, "grad_norm": 1.5858595161198272, "learning_rate": 7.453093956584987e-06, "loss": 0.5392, "step": 13682 }, { "epoch": 0.4031348054741246, "grad_norm": 1.6217740943332157, "learning_rate": 7.452645848418402e-06, "loss": 0.4153, "step": 13683 }, { "epoch": 0.40316426793158816, "grad_norm": 1.4093051250363033, "learning_rate": 7.452197714308475e-06, "loss": 0.4141, "step": 13684 }, { "epoch": 0.40319373038905176, "grad_norm": 1.256080107826201, "learning_rate": 7.45174955425995e-06, "loss": 0.3067, "step": 13685 }, { "epoch": 0.4032231928465153, "grad_norm": 1.3868320510495165, "learning_rate": 7.451301368277566e-06, "loss": 0.415, "step": 13686 }, { "epoch": 0.4032526553039789, "grad_norm": 1.6732042129027795, "learning_rate": 7.450853156366062e-06, "loss": 0.5579, "step": 13687 }, { "epoch": 0.40328211776144246, "grad_norm": 1.5723917007929726, "learning_rate": 7.450404918530181e-06, "loss": 0.5033, "step": 13688 }, { "epoch": 0.40331158021890606, "grad_norm": 1.3711275669663174, "learning_rate": 7.449956654774663e-06, "loss": 0.5024, "step": 13689 }, { "epoch": 0.4033410426763696, "grad_norm": 1.6956426021498012, "learning_rate": 7.44950836510425e-06, "loss": 0.5764, "step": 13690 }, { "epoch": 0.4033705051338332, "grad_norm": 1.3728848396938946, "learning_rate": 7.4490600495236855e-06, "loss": 0.5509, "step": 13691 }, { "epoch": 0.4033999675912968, "grad_norm": 1.6712486087385192, "learning_rate": 7.448611708037709e-06, "loss": 0.4723, "step": 13692 }, { "epoch": 0.40342943004876036, "grad_norm": 1.4387731295302129, "learning_rate": 7.448163340651062e-06, "loss": 0.4709, "step": 13693 }, { "epoch": 0.40345889250622397, "grad_norm": 1.5072731980760263, "learning_rate": 7.447714947368493e-06, "loss": 0.4421, "step": 13694 }, { "epoch": 0.4034883549636875, "grad_norm": 1.9037946055087374, "learning_rate": 7.447266528194738e-06, "loss": 0.5984, "step": 13695 }, { "epoch": 0.4035178174211511, "grad_norm": 1.5151691023849918, "learning_rate": 7.446818083134546e-06, "loss": 0.4623, "step": 13696 }, { "epoch": 0.40354727987861466, "grad_norm": 1.5588020057285341, "learning_rate": 7.4463696121926575e-06, "loss": 0.5067, "step": 13697 }, { "epoch": 0.40357674233607826, "grad_norm": 1.6226732491585774, "learning_rate": 7.445921115373817e-06, "loss": 0.5391, "step": 13698 }, { "epoch": 0.4036062047935418, "grad_norm": 1.2909011794215068, "learning_rate": 7.445472592682767e-06, "loss": 0.3933, "step": 13699 }, { "epoch": 0.4036356672510054, "grad_norm": 1.5140063574699787, "learning_rate": 7.445024044124254e-06, "loss": 0.3456, "step": 13700 }, { "epoch": 0.40366512970846896, "grad_norm": 1.4294667523045046, "learning_rate": 7.444575469703022e-06, "loss": 0.4499, "step": 13701 }, { "epoch": 0.40369459216593256, "grad_norm": 1.7193826767541827, "learning_rate": 7.444126869423816e-06, "loss": 0.4708, "step": 13702 }, { "epoch": 0.4037240546233961, "grad_norm": 1.4932704283046416, "learning_rate": 7.443678243291379e-06, "loss": 0.4795, "step": 13703 }, { "epoch": 0.4037535170808597, "grad_norm": 1.6983246220354802, "learning_rate": 7.443229591310459e-06, "loss": 0.4702, "step": 13704 }, { "epoch": 0.4037829795383233, "grad_norm": 1.4973976651438994, "learning_rate": 7.442780913485802e-06, "loss": 0.6216, "step": 13705 }, { "epoch": 0.40381244199578686, "grad_norm": 1.640126655869137, "learning_rate": 7.442332209822154e-06, "loss": 0.5697, "step": 13706 }, { "epoch": 0.40384190445325047, "grad_norm": 1.4828725527647277, "learning_rate": 7.441883480324258e-06, "loss": 0.4633, "step": 13707 }, { "epoch": 0.403871366910714, "grad_norm": 1.7648111917881601, "learning_rate": 7.4414347249968635e-06, "loss": 0.5026, "step": 13708 }, { "epoch": 0.4039008293681776, "grad_norm": 1.4430842288662213, "learning_rate": 7.440985943844716e-06, "loss": 0.4755, "step": 13709 }, { "epoch": 0.40393029182564116, "grad_norm": 1.4957326600353456, "learning_rate": 7.4405371368725636e-06, "loss": 0.5448, "step": 13710 }, { "epoch": 0.40395975428310477, "grad_norm": 1.5858931209191662, "learning_rate": 7.440088304085153e-06, "loss": 0.5448, "step": 13711 }, { "epoch": 0.4039892167405683, "grad_norm": 1.8890037457284103, "learning_rate": 7.4396394454872326e-06, "loss": 0.6082, "step": 13712 }, { "epoch": 0.4040186791980319, "grad_norm": 1.6527046713887832, "learning_rate": 7.4391905610835495e-06, "loss": 0.6101, "step": 13713 }, { "epoch": 0.40404814165549546, "grad_norm": 1.4888979491436662, "learning_rate": 7.43874165087885e-06, "loss": 0.3559, "step": 13714 }, { "epoch": 0.40407760411295907, "grad_norm": 1.4814544026442233, "learning_rate": 7.438292714877887e-06, "loss": 0.5085, "step": 13715 }, { "epoch": 0.4041070665704226, "grad_norm": 1.257357606681077, "learning_rate": 7.437843753085406e-06, "loss": 0.34, "step": 13716 }, { "epoch": 0.4041365290278862, "grad_norm": 1.7053306808264113, "learning_rate": 7.437394765506156e-06, "loss": 0.5892, "step": 13717 }, { "epoch": 0.4041659914853498, "grad_norm": 1.459071398235649, "learning_rate": 7.436945752144888e-06, "loss": 0.4698, "step": 13718 }, { "epoch": 0.40419545394281337, "grad_norm": 1.6696173223732766, "learning_rate": 7.43649671300635e-06, "loss": 0.4242, "step": 13719 }, { "epoch": 0.40422491640027697, "grad_norm": 1.4984881755375987, "learning_rate": 7.4360476480952926e-06, "loss": 0.4588, "step": 13720 }, { "epoch": 0.4042543788577405, "grad_norm": 1.4660587997644583, "learning_rate": 7.435598557416466e-06, "loss": 0.4142, "step": 13721 }, { "epoch": 0.4042838413152041, "grad_norm": 1.5606892305735203, "learning_rate": 7.43514944097462e-06, "loss": 0.444, "step": 13722 }, { "epoch": 0.40431330377266766, "grad_norm": 1.8285359656484803, "learning_rate": 7.434700298774504e-06, "loss": 0.5549, "step": 13723 }, { "epoch": 0.40434276623013127, "grad_norm": 1.8889915239452995, "learning_rate": 7.434251130820872e-06, "loss": 0.4995, "step": 13724 }, { "epoch": 0.4043722286875948, "grad_norm": 1.3489615706099884, "learning_rate": 7.433801937118472e-06, "loss": 0.3584, "step": 13725 }, { "epoch": 0.4044016911450584, "grad_norm": 1.6590252354945987, "learning_rate": 7.433352717672057e-06, "loss": 0.4894, "step": 13726 }, { "epoch": 0.40443115360252196, "grad_norm": 1.4149589154116522, "learning_rate": 7.43290347248638e-06, "loss": 0.4661, "step": 13727 }, { "epoch": 0.40446061605998557, "grad_norm": 1.4122892939614857, "learning_rate": 7.4324542015661905e-06, "loss": 0.3894, "step": 13728 }, { "epoch": 0.4044900785174491, "grad_norm": 1.4387009534948325, "learning_rate": 7.432004904916242e-06, "loss": 0.4643, "step": 13729 }, { "epoch": 0.4045195409749127, "grad_norm": 1.5533878434849588, "learning_rate": 7.4315555825412855e-06, "loss": 0.4669, "step": 13730 }, { "epoch": 0.4045490034323763, "grad_norm": 1.3838571684668812, "learning_rate": 7.4311062344460775e-06, "loss": 0.4475, "step": 13731 }, { "epoch": 0.40457846588983987, "grad_norm": 1.6612357884161149, "learning_rate": 7.430656860635367e-06, "loss": 0.559, "step": 13732 }, { "epoch": 0.40460792834730347, "grad_norm": 1.5791375742891622, "learning_rate": 7.430207461113909e-06, "loss": 0.5462, "step": 13733 }, { "epoch": 0.404637390804767, "grad_norm": 1.6482726463734085, "learning_rate": 7.429758035886457e-06, "loss": 0.421, "step": 13734 }, { "epoch": 0.4046668532622306, "grad_norm": 1.4729872011413092, "learning_rate": 7.429308584957765e-06, "loss": 0.4523, "step": 13735 }, { "epoch": 0.40469631571969417, "grad_norm": 1.5081093268892556, "learning_rate": 7.428859108332588e-06, "loss": 0.4429, "step": 13736 }, { "epoch": 0.40472577817715777, "grad_norm": 1.6275832095972231, "learning_rate": 7.42840960601568e-06, "loss": 0.5281, "step": 13737 }, { "epoch": 0.4047552406346213, "grad_norm": 1.550921286822689, "learning_rate": 7.427960078011793e-06, "loss": 0.457, "step": 13738 }, { "epoch": 0.4047847030920849, "grad_norm": 1.4329684789014294, "learning_rate": 7.427510524325686e-06, "loss": 0.3865, "step": 13739 }, { "epoch": 0.40481416554954847, "grad_norm": 1.378956306763091, "learning_rate": 7.427060944962112e-06, "loss": 0.4061, "step": 13740 }, { "epoch": 0.40484362800701207, "grad_norm": 1.4568680610253457, "learning_rate": 7.426611339925828e-06, "loss": 0.4571, "step": 13741 }, { "epoch": 0.4048730904644756, "grad_norm": 1.599004222528545, "learning_rate": 7.426161709221586e-06, "loss": 0.3373, "step": 13742 }, { "epoch": 0.4049025529219392, "grad_norm": 1.5996115738646188, "learning_rate": 7.425712052854148e-06, "loss": 0.4447, "step": 13743 }, { "epoch": 0.4049320153794028, "grad_norm": 1.4765039442812473, "learning_rate": 7.425262370828266e-06, "loss": 0.4528, "step": 13744 }, { "epoch": 0.40496147783686637, "grad_norm": 1.5692145642319602, "learning_rate": 7.424812663148697e-06, "loss": 0.4212, "step": 13745 }, { "epoch": 0.40499094029432997, "grad_norm": 1.58286099584252, "learning_rate": 7.4243629298202e-06, "loss": 0.5551, "step": 13746 }, { "epoch": 0.4050204027517935, "grad_norm": 1.5164915178162595, "learning_rate": 7.42391317084753e-06, "loss": 0.3996, "step": 13747 }, { "epoch": 0.4050498652092571, "grad_norm": 1.514940169390667, "learning_rate": 7.423463386235445e-06, "loss": 0.4659, "step": 13748 }, { "epoch": 0.40507932766672067, "grad_norm": 1.6160008146438898, "learning_rate": 7.423013575988703e-06, "loss": 0.4523, "step": 13749 }, { "epoch": 0.40510879012418427, "grad_norm": 1.5173338000800638, "learning_rate": 7.422563740112062e-06, "loss": 0.4407, "step": 13750 }, { "epoch": 0.4051382525816478, "grad_norm": 1.4471832456411968, "learning_rate": 7.42211387861028e-06, "loss": 0.4451, "step": 13751 }, { "epoch": 0.4051677150391114, "grad_norm": 1.5393297760323408, "learning_rate": 7.421663991488116e-06, "loss": 0.4549, "step": 13752 }, { "epoch": 0.40519717749657497, "grad_norm": 1.5461778647347775, "learning_rate": 7.421214078750327e-06, "loss": 0.4212, "step": 13753 }, { "epoch": 0.40522663995403857, "grad_norm": 1.3833183901185901, "learning_rate": 7.420764140401675e-06, "loss": 0.3292, "step": 13754 }, { "epoch": 0.4052561024115021, "grad_norm": 1.5062827169240631, "learning_rate": 7.420314176446916e-06, "loss": 0.4269, "step": 13755 }, { "epoch": 0.4052855648689657, "grad_norm": 1.4968712285249035, "learning_rate": 7.419864186890813e-06, "loss": 0.3707, "step": 13756 }, { "epoch": 0.4053150273264293, "grad_norm": 1.7223441238636634, "learning_rate": 7.419414171738123e-06, "loss": 0.6565, "step": 13757 }, { "epoch": 0.40534448978389287, "grad_norm": 1.5963604949157784, "learning_rate": 7.4189641309936075e-06, "loss": 0.4384, "step": 13758 }, { "epoch": 0.40537395224135647, "grad_norm": 1.453379314302376, "learning_rate": 7.418514064662027e-06, "loss": 0.361, "step": 13759 }, { "epoch": 0.40540341469882, "grad_norm": 1.64265705134385, "learning_rate": 7.418063972748142e-06, "loss": 0.4336, "step": 13760 }, { "epoch": 0.4054328771562836, "grad_norm": 1.5995408641411504, "learning_rate": 7.4176138552567136e-06, "loss": 0.4817, "step": 13761 }, { "epoch": 0.40546233961374717, "grad_norm": 1.3467578889515726, "learning_rate": 7.4171637121925e-06, "loss": 0.4017, "step": 13762 }, { "epoch": 0.40549180207121077, "grad_norm": 1.430887084020755, "learning_rate": 7.416713543560267e-06, "loss": 0.4599, "step": 13763 }, { "epoch": 0.4055212645286743, "grad_norm": 1.3876607566752515, "learning_rate": 7.416263349364775e-06, "loss": 0.4209, "step": 13764 }, { "epoch": 0.4055507269861379, "grad_norm": 1.64754356119825, "learning_rate": 7.415813129610785e-06, "loss": 0.6451, "step": 13765 }, { "epoch": 0.40558018944360147, "grad_norm": 1.4020521374122774, "learning_rate": 7.415362884303061e-06, "loss": 0.4265, "step": 13766 }, { "epoch": 0.40560965190106507, "grad_norm": 1.6477501239544619, "learning_rate": 7.414912613446363e-06, "loss": 0.4854, "step": 13767 }, { "epoch": 0.4056391143585286, "grad_norm": 1.482150040113536, "learning_rate": 7.414462317045456e-06, "loss": 0.4385, "step": 13768 }, { "epoch": 0.4056685768159922, "grad_norm": 1.6284090186865137, "learning_rate": 7.4140119951051025e-06, "loss": 0.4853, "step": 13769 }, { "epoch": 0.4056980392734558, "grad_norm": 1.393045753506768, "learning_rate": 7.413561647630066e-06, "loss": 0.4973, "step": 13770 }, { "epoch": 0.40572750173091937, "grad_norm": 1.3195706140972496, "learning_rate": 7.413111274625111e-06, "loss": 0.3055, "step": 13771 }, { "epoch": 0.405756964188383, "grad_norm": 1.7160225170790557, "learning_rate": 7.412660876094998e-06, "loss": 0.5201, "step": 13772 }, { "epoch": 0.4057864266458465, "grad_norm": 1.6739377500718955, "learning_rate": 7.412210452044495e-06, "loss": 0.5229, "step": 13773 }, { "epoch": 0.4058158891033101, "grad_norm": 1.4876450190690569, "learning_rate": 7.4117600024783655e-06, "loss": 0.4751, "step": 13774 }, { "epoch": 0.40584535156077367, "grad_norm": 1.5645613583992939, "learning_rate": 7.411309527401372e-06, "loss": 0.5057, "step": 13775 }, { "epoch": 0.4058748140182373, "grad_norm": 1.7375213609864721, "learning_rate": 7.410859026818284e-06, "loss": 0.5051, "step": 13776 }, { "epoch": 0.4059042764757008, "grad_norm": 1.4077548832526339, "learning_rate": 7.410408500733861e-06, "loss": 0.4207, "step": 13777 }, { "epoch": 0.4059337389331644, "grad_norm": 1.4918972193111883, "learning_rate": 7.409957949152872e-06, "loss": 0.4618, "step": 13778 }, { "epoch": 0.40596320139062797, "grad_norm": 1.3637829173814802, "learning_rate": 7.409507372080083e-06, "loss": 0.4556, "step": 13779 }, { "epoch": 0.4059926638480916, "grad_norm": 1.6006103202632773, "learning_rate": 7.409056769520259e-06, "loss": 0.5133, "step": 13780 }, { "epoch": 0.4060221263055551, "grad_norm": 1.501328710013489, "learning_rate": 7.408606141478168e-06, "loss": 0.4654, "step": 13781 }, { "epoch": 0.4060515887630187, "grad_norm": 1.5509344542469594, "learning_rate": 7.408155487958574e-06, "loss": 0.4526, "step": 13782 }, { "epoch": 0.4060810512204823, "grad_norm": 1.4541563877573234, "learning_rate": 7.407704808966244e-06, "loss": 0.4892, "step": 13783 }, { "epoch": 0.4061105136779459, "grad_norm": 1.585829663846783, "learning_rate": 7.407254104505948e-06, "loss": 0.5447, "step": 13784 }, { "epoch": 0.4061399761354095, "grad_norm": 1.6857320471741886, "learning_rate": 7.406803374582451e-06, "loss": 0.6025, "step": 13785 }, { "epoch": 0.406169438592873, "grad_norm": 1.4903019575610374, "learning_rate": 7.406352619200522e-06, "loss": 0.3919, "step": 13786 }, { "epoch": 0.4061989010503366, "grad_norm": 1.7348752283049556, "learning_rate": 7.405901838364927e-06, "loss": 0.6465, "step": 13787 }, { "epoch": 0.40622836350780017, "grad_norm": 1.6499421241709564, "learning_rate": 7.4054510320804375e-06, "loss": 0.6393, "step": 13788 }, { "epoch": 0.4062578259652638, "grad_norm": 1.533406836953357, "learning_rate": 7.405000200351819e-06, "loss": 0.3622, "step": 13789 }, { "epoch": 0.4062872884227273, "grad_norm": 1.5285781827894478, "learning_rate": 7.404549343183841e-06, "loss": 0.4312, "step": 13790 }, { "epoch": 0.4063167508801909, "grad_norm": 1.5280069157823437, "learning_rate": 7.4040984605812746e-06, "loss": 0.4657, "step": 13791 }, { "epoch": 0.40634621333765447, "grad_norm": 1.570306202764222, "learning_rate": 7.403647552548885e-06, "loss": 0.4819, "step": 13792 }, { "epoch": 0.4063756757951181, "grad_norm": 1.766511431961259, "learning_rate": 7.403196619091445e-06, "loss": 0.4608, "step": 13793 }, { "epoch": 0.4064051382525816, "grad_norm": 1.4043329191861071, "learning_rate": 7.4027456602137245e-06, "loss": 0.4654, "step": 13794 }, { "epoch": 0.4064346007100452, "grad_norm": 1.6982975008100365, "learning_rate": 7.4022946759204916e-06, "loss": 0.4683, "step": 13795 }, { "epoch": 0.4064640631675088, "grad_norm": 1.3419973713427884, "learning_rate": 7.401843666216518e-06, "loss": 0.4401, "step": 13796 }, { "epoch": 0.4064935256249724, "grad_norm": 1.7840452850991848, "learning_rate": 7.401392631106575e-06, "loss": 0.5498, "step": 13797 }, { "epoch": 0.406522988082436, "grad_norm": 1.2846255260172619, "learning_rate": 7.4009415705954315e-06, "loss": 0.3534, "step": 13798 }, { "epoch": 0.4065524505398995, "grad_norm": 1.5619049879243847, "learning_rate": 7.400490484687859e-06, "loss": 0.4136, "step": 13799 }, { "epoch": 0.4065819129973631, "grad_norm": 1.3684354004326826, "learning_rate": 7.400039373388632e-06, "loss": 0.4387, "step": 13800 }, { "epoch": 0.4066113754548267, "grad_norm": 1.5440853863011, "learning_rate": 7.399588236702519e-06, "loss": 0.4185, "step": 13801 }, { "epoch": 0.4066408379122903, "grad_norm": 1.789476487801004, "learning_rate": 7.399137074634293e-06, "loss": 0.5798, "step": 13802 }, { "epoch": 0.4066703003697538, "grad_norm": 1.923927210489882, "learning_rate": 7.398685887188726e-06, "loss": 0.6613, "step": 13803 }, { "epoch": 0.4066997628272174, "grad_norm": 1.5408361243261666, "learning_rate": 7.398234674370589e-06, "loss": 0.4492, "step": 13804 }, { "epoch": 0.406729225284681, "grad_norm": 1.4924987732392878, "learning_rate": 7.397783436184658e-06, "loss": 0.4872, "step": 13805 }, { "epoch": 0.4067586877421446, "grad_norm": 1.4384971049528406, "learning_rate": 7.397332172635706e-06, "loss": 0.4541, "step": 13806 }, { "epoch": 0.4067881501996081, "grad_norm": 1.6341604479959213, "learning_rate": 7.396880883728503e-06, "loss": 0.4926, "step": 13807 }, { "epoch": 0.4068176126570717, "grad_norm": 1.4100482188734056, "learning_rate": 7.396429569467825e-06, "loss": 0.4103, "step": 13808 }, { "epoch": 0.40684707511453533, "grad_norm": 1.5617146308789724, "learning_rate": 7.395978229858445e-06, "loss": 0.521, "step": 13809 }, { "epoch": 0.4068765375719989, "grad_norm": 1.4147987431276585, "learning_rate": 7.395526864905138e-06, "loss": 0.5117, "step": 13810 }, { "epoch": 0.4069060000294625, "grad_norm": 1.4187206210688736, "learning_rate": 7.395075474612677e-06, "loss": 0.4224, "step": 13811 }, { "epoch": 0.406935462486926, "grad_norm": 1.5765275825021663, "learning_rate": 7.39462405898584e-06, "loss": 0.5512, "step": 13812 }, { "epoch": 0.4069649249443896, "grad_norm": 1.3942832585273284, "learning_rate": 7.394172618029397e-06, "loss": 0.3142, "step": 13813 }, { "epoch": 0.4069943874018532, "grad_norm": 1.6651311674080103, "learning_rate": 7.393721151748126e-06, "loss": 0.3992, "step": 13814 }, { "epoch": 0.4070238498593168, "grad_norm": 1.5086120416857611, "learning_rate": 7.393269660146802e-06, "loss": 0.4029, "step": 13815 }, { "epoch": 0.4070533123167803, "grad_norm": 1.4685508646367205, "learning_rate": 7.392818143230202e-06, "loss": 0.5164, "step": 13816 }, { "epoch": 0.4070827747742439, "grad_norm": 1.455244159591536, "learning_rate": 7.3923666010031e-06, "loss": 0.4582, "step": 13817 }, { "epoch": 0.4071122372317075, "grad_norm": 1.4956690547539764, "learning_rate": 7.391915033470275e-06, "loss": 0.4408, "step": 13818 }, { "epoch": 0.4071416996891711, "grad_norm": 1.5208323620045563, "learning_rate": 7.391463440636499e-06, "loss": 0.5632, "step": 13819 }, { "epoch": 0.4071711621466346, "grad_norm": 1.4854514533074623, "learning_rate": 7.391011822506553e-06, "loss": 0.3954, "step": 13820 }, { "epoch": 0.4072006246040982, "grad_norm": 1.4330846003331121, "learning_rate": 7.390560179085213e-06, "loss": 0.3914, "step": 13821 }, { "epoch": 0.40723008706156183, "grad_norm": 1.7033628480908987, "learning_rate": 7.390108510377256e-06, "loss": 0.5091, "step": 13822 }, { "epoch": 0.4072595495190254, "grad_norm": 1.635106473311255, "learning_rate": 7.389656816387458e-06, "loss": 0.5079, "step": 13823 }, { "epoch": 0.407289011976489, "grad_norm": 1.3427546579684795, "learning_rate": 7.389205097120601e-06, "loss": 0.4732, "step": 13824 }, { "epoch": 0.4073184744339525, "grad_norm": 1.7285450045441304, "learning_rate": 7.388753352581458e-06, "loss": 0.4793, "step": 13825 }, { "epoch": 0.40734793689141613, "grad_norm": 1.5895074161304084, "learning_rate": 7.388301582774812e-06, "loss": 0.5225, "step": 13826 }, { "epoch": 0.4073773993488797, "grad_norm": 1.5084124529390703, "learning_rate": 7.387849787705439e-06, "loss": 0.3928, "step": 13827 }, { "epoch": 0.4074068618063433, "grad_norm": 1.4657183541860483, "learning_rate": 7.387397967378119e-06, "loss": 0.3918, "step": 13828 }, { "epoch": 0.4074363242638068, "grad_norm": 1.5689606603133188, "learning_rate": 7.386946121797629e-06, "loss": 0.6036, "step": 13829 }, { "epoch": 0.40746578672127043, "grad_norm": 1.8644911122990748, "learning_rate": 7.386494250968752e-06, "loss": 0.3267, "step": 13830 }, { "epoch": 0.407495249178734, "grad_norm": 1.4906662733483294, "learning_rate": 7.386042354896267e-06, "loss": 0.4023, "step": 13831 }, { "epoch": 0.4075247116361976, "grad_norm": 1.6500154607081103, "learning_rate": 7.385590433584951e-06, "loss": 0.5711, "step": 13832 }, { "epoch": 0.4075541740936611, "grad_norm": 1.5166799438782568, "learning_rate": 7.385138487039589e-06, "loss": 0.5442, "step": 13833 }, { "epoch": 0.40758363655112473, "grad_norm": 1.5596747533353534, "learning_rate": 7.384686515264956e-06, "loss": 0.4412, "step": 13834 }, { "epoch": 0.40761309900858833, "grad_norm": 1.3080304291818645, "learning_rate": 7.384234518265837e-06, "loss": 0.3309, "step": 13835 }, { "epoch": 0.4076425614660519, "grad_norm": 1.5553180435432958, "learning_rate": 7.383782496047012e-06, "loss": 0.4398, "step": 13836 }, { "epoch": 0.4076720239235155, "grad_norm": 1.695380481819743, "learning_rate": 7.383330448613264e-06, "loss": 0.5106, "step": 13837 }, { "epoch": 0.40770148638097903, "grad_norm": 1.709263546956562, "learning_rate": 7.382878375969371e-06, "loss": 0.596, "step": 13838 }, { "epoch": 0.40773094883844263, "grad_norm": 1.7561899833149537, "learning_rate": 7.382426278120117e-06, "loss": 0.5605, "step": 13839 }, { "epoch": 0.4077604112959062, "grad_norm": 1.4539359082863492, "learning_rate": 7.3819741550702836e-06, "loss": 0.4857, "step": 13840 }, { "epoch": 0.4077898737533698, "grad_norm": 1.6036926705596497, "learning_rate": 7.381522006824654e-06, "loss": 0.5011, "step": 13841 }, { "epoch": 0.4078193362108333, "grad_norm": 1.4170922996849384, "learning_rate": 7.38106983338801e-06, "loss": 0.4029, "step": 13842 }, { "epoch": 0.40784879866829693, "grad_norm": 1.4783549535589438, "learning_rate": 7.380617634765136e-06, "loss": 0.3863, "step": 13843 }, { "epoch": 0.4078782611257605, "grad_norm": 1.546957053775723, "learning_rate": 7.380165410960812e-06, "loss": 0.4494, "step": 13844 }, { "epoch": 0.4079077235832241, "grad_norm": 1.4125579919313533, "learning_rate": 7.379713161979827e-06, "loss": 0.3863, "step": 13845 }, { "epoch": 0.4079371860406876, "grad_norm": 1.5158852287281712, "learning_rate": 7.379260887826959e-06, "loss": 0.3784, "step": 13846 }, { "epoch": 0.40796664849815123, "grad_norm": 1.302145991783082, "learning_rate": 7.378808588506995e-06, "loss": 0.3394, "step": 13847 }, { "epoch": 0.40799611095561483, "grad_norm": 1.6170716431196632, "learning_rate": 7.378356264024718e-06, "loss": 0.5062, "step": 13848 }, { "epoch": 0.4080255734130784, "grad_norm": 1.475674166891452, "learning_rate": 7.377903914384914e-06, "loss": 0.3977, "step": 13849 }, { "epoch": 0.408055035870542, "grad_norm": 1.78858404761619, "learning_rate": 7.377451539592366e-06, "loss": 0.4863, "step": 13850 }, { "epoch": 0.40808449832800553, "grad_norm": 1.4164917391587835, "learning_rate": 7.3769991396518615e-06, "loss": 0.4655, "step": 13851 }, { "epoch": 0.40811396078546913, "grad_norm": 1.4696669011529735, "learning_rate": 7.376546714568185e-06, "loss": 0.4369, "step": 13852 }, { "epoch": 0.4081434232429327, "grad_norm": 1.4212738149422555, "learning_rate": 7.3760942643461205e-06, "loss": 0.4152, "step": 13853 }, { "epoch": 0.4081728857003963, "grad_norm": 1.8055294710509617, "learning_rate": 7.375641788990456e-06, "loss": 0.4543, "step": 13854 }, { "epoch": 0.40820234815785983, "grad_norm": 1.4787733156975875, "learning_rate": 7.375189288505975e-06, "loss": 0.3179, "step": 13855 }, { "epoch": 0.40823181061532343, "grad_norm": 1.5323637890847956, "learning_rate": 7.374736762897468e-06, "loss": 0.4297, "step": 13856 }, { "epoch": 0.408261273072787, "grad_norm": 1.4859842629041036, "learning_rate": 7.3742842121697155e-06, "loss": 0.3876, "step": 13857 }, { "epoch": 0.4082907355302506, "grad_norm": 1.384229805998333, "learning_rate": 7.373831636327512e-06, "loss": 0.2612, "step": 13858 }, { "epoch": 0.40832019798771413, "grad_norm": 1.3414815923467551, "learning_rate": 7.373379035375639e-06, "loss": 0.4025, "step": 13859 }, { "epoch": 0.40834966044517773, "grad_norm": 1.632829254731113, "learning_rate": 7.372926409318886e-06, "loss": 0.526, "step": 13860 }, { "epoch": 0.40837912290264133, "grad_norm": 1.7703574304442276, "learning_rate": 7.372473758162042e-06, "loss": 0.6333, "step": 13861 }, { "epoch": 0.4084085853601049, "grad_norm": 1.5795110832327228, "learning_rate": 7.372021081909892e-06, "loss": 0.474, "step": 13862 }, { "epoch": 0.4084380478175685, "grad_norm": 1.6063172920265683, "learning_rate": 7.371568380567226e-06, "loss": 0.5464, "step": 13863 }, { "epoch": 0.40846751027503203, "grad_norm": 1.520884450925802, "learning_rate": 7.371115654138832e-06, "loss": 0.5577, "step": 13864 }, { "epoch": 0.40849697273249563, "grad_norm": 1.5678847303309884, "learning_rate": 7.3706629026294995e-06, "loss": 0.4947, "step": 13865 }, { "epoch": 0.4085264351899592, "grad_norm": 1.5631547247510402, "learning_rate": 7.3702101260440185e-06, "loss": 0.4466, "step": 13866 }, { "epoch": 0.4085558976474228, "grad_norm": 1.4567718105819243, "learning_rate": 7.369757324387177e-06, "loss": 0.4691, "step": 13867 }, { "epoch": 0.40858536010488633, "grad_norm": 1.4667942780097702, "learning_rate": 7.369304497663764e-06, "loss": 0.4683, "step": 13868 }, { "epoch": 0.40861482256234993, "grad_norm": 1.4393114291117792, "learning_rate": 7.368851645878571e-06, "loss": 0.419, "step": 13869 }, { "epoch": 0.4086442850198135, "grad_norm": 1.5140561721076666, "learning_rate": 7.368398769036386e-06, "loss": 0.4234, "step": 13870 }, { "epoch": 0.4086737474772771, "grad_norm": 1.5454038846061127, "learning_rate": 7.3679458671420015e-06, "loss": 0.4298, "step": 13871 }, { "epoch": 0.40870320993474063, "grad_norm": 1.5616700145568314, "learning_rate": 7.367492940200206e-06, "loss": 0.4176, "step": 13872 }, { "epoch": 0.40873267239220423, "grad_norm": 1.6147178522143981, "learning_rate": 7.367039988215792e-06, "loss": 0.4932, "step": 13873 }, { "epoch": 0.40876213484966784, "grad_norm": 1.5010034108233916, "learning_rate": 7.366587011193551e-06, "loss": 0.5357, "step": 13874 }, { "epoch": 0.4087915973071314, "grad_norm": 1.6211837214649767, "learning_rate": 7.366134009138275e-06, "loss": 0.5908, "step": 13875 }, { "epoch": 0.408821059764595, "grad_norm": 1.4532845253124884, "learning_rate": 7.365680982054753e-06, "loss": 0.3629, "step": 13876 }, { "epoch": 0.40885052222205853, "grad_norm": 1.5760500302507008, "learning_rate": 7.365227929947778e-06, "loss": 0.4061, "step": 13877 }, { "epoch": 0.40887998467952213, "grad_norm": 1.5146760278118947, "learning_rate": 7.364774852822144e-06, "loss": 0.4033, "step": 13878 }, { "epoch": 0.4089094471369857, "grad_norm": 1.5322099285513722, "learning_rate": 7.364321750682643e-06, "loss": 0.4668, "step": 13879 }, { "epoch": 0.4089389095944493, "grad_norm": 1.6264815264804628, "learning_rate": 7.363868623534065e-06, "loss": 0.5792, "step": 13880 }, { "epoch": 0.40896837205191283, "grad_norm": 1.7476006324957634, "learning_rate": 7.363415471381207e-06, "loss": 0.6737, "step": 13881 }, { "epoch": 0.40899783450937643, "grad_norm": 1.7480802028419564, "learning_rate": 7.362962294228859e-06, "loss": 0.5829, "step": 13882 }, { "epoch": 0.40902729696684, "grad_norm": 1.3653273726284214, "learning_rate": 7.362509092081816e-06, "loss": 0.4061, "step": 13883 }, { "epoch": 0.4090567594243036, "grad_norm": 1.5364568584645468, "learning_rate": 7.362055864944872e-06, "loss": 0.5533, "step": 13884 }, { "epoch": 0.40908622188176713, "grad_norm": 1.3948250971089058, "learning_rate": 7.361602612822823e-06, "loss": 0.4194, "step": 13885 }, { "epoch": 0.40911568433923073, "grad_norm": 1.4689825665770684, "learning_rate": 7.361149335720459e-06, "loss": 0.55, "step": 13886 }, { "epoch": 0.40914514679669434, "grad_norm": 1.4691313269584325, "learning_rate": 7.360696033642577e-06, "loss": 0.4879, "step": 13887 }, { "epoch": 0.4091746092541579, "grad_norm": 1.3152063727436403, "learning_rate": 7.360242706593973e-06, "loss": 0.2833, "step": 13888 }, { "epoch": 0.4092040717116215, "grad_norm": 1.5532517629395648, "learning_rate": 7.3597893545794395e-06, "loss": 0.5303, "step": 13889 }, { "epoch": 0.40923353416908503, "grad_norm": 1.3274547398557432, "learning_rate": 7.359335977603774e-06, "loss": 0.4497, "step": 13890 }, { "epoch": 0.40926299662654864, "grad_norm": 1.4092522259530207, "learning_rate": 7.358882575671773e-06, "loss": 0.3663, "step": 13891 }, { "epoch": 0.4092924590840122, "grad_norm": 1.5747734662378596, "learning_rate": 7.3584291487882285e-06, "loss": 0.4925, "step": 13892 }, { "epoch": 0.4093219215414758, "grad_norm": 1.48529885654929, "learning_rate": 7.35797569695794e-06, "loss": 0.5067, "step": 13893 }, { "epoch": 0.40935138399893933, "grad_norm": 1.657188897373579, "learning_rate": 7.357522220185704e-06, "loss": 0.4679, "step": 13894 }, { "epoch": 0.40938084645640294, "grad_norm": 2.0767489325257724, "learning_rate": 7.357068718476316e-06, "loss": 0.6771, "step": 13895 }, { "epoch": 0.4094103089138665, "grad_norm": 1.4011159126904424, "learning_rate": 7.356615191834573e-06, "loss": 0.3628, "step": 13896 }, { "epoch": 0.4094397713713301, "grad_norm": 1.6066648012307814, "learning_rate": 7.3561616402652735e-06, "loss": 0.502, "step": 13897 }, { "epoch": 0.40946923382879363, "grad_norm": 1.550733990415742, "learning_rate": 7.355708063773215e-06, "loss": 0.5023, "step": 13898 }, { "epoch": 0.40949869628625724, "grad_norm": 1.421427499760384, "learning_rate": 7.3552544623631915e-06, "loss": 0.44, "step": 13899 }, { "epoch": 0.40952815874372084, "grad_norm": 1.6240259079138535, "learning_rate": 7.3548008360400066e-06, "loss": 0.6458, "step": 13900 }, { "epoch": 0.4095576212011844, "grad_norm": 1.4519639947297311, "learning_rate": 7.354347184808457e-06, "loss": 0.3651, "step": 13901 }, { "epoch": 0.409587083658648, "grad_norm": 1.4921049496688321, "learning_rate": 7.353893508673338e-06, "loss": 0.4966, "step": 13902 }, { "epoch": 0.40961654611611154, "grad_norm": 1.3961625031347331, "learning_rate": 7.353439807639452e-06, "loss": 0.4514, "step": 13903 }, { "epoch": 0.40964600857357514, "grad_norm": 1.6613977105537665, "learning_rate": 7.352986081711596e-06, "loss": 0.6135, "step": 13904 }, { "epoch": 0.4096754710310387, "grad_norm": 1.5076851471819843, "learning_rate": 7.352532330894571e-06, "loss": 0.437, "step": 13905 }, { "epoch": 0.4097049334885023, "grad_norm": 1.4163289579051268, "learning_rate": 7.352078555193177e-06, "loss": 0.4529, "step": 13906 }, { "epoch": 0.40973439594596583, "grad_norm": 1.6145549099043837, "learning_rate": 7.351624754612211e-06, "loss": 0.562, "step": 13907 }, { "epoch": 0.40976385840342944, "grad_norm": 1.4693975349535437, "learning_rate": 7.351170929156476e-06, "loss": 0.3854, "step": 13908 }, { "epoch": 0.409793320860893, "grad_norm": 1.583704693811202, "learning_rate": 7.350717078830772e-06, "loss": 0.5091, "step": 13909 }, { "epoch": 0.4098227833183566, "grad_norm": 1.9656792866544974, "learning_rate": 7.350263203639898e-06, "loss": 0.4549, "step": 13910 }, { "epoch": 0.40985224577582013, "grad_norm": 1.2674662115441648, "learning_rate": 7.3498093035886564e-06, "loss": 0.3107, "step": 13911 }, { "epoch": 0.40988170823328374, "grad_norm": 1.5404670755590353, "learning_rate": 7.349355378681848e-06, "loss": 0.5131, "step": 13912 }, { "epoch": 0.40991117069074734, "grad_norm": 1.711584209049683, "learning_rate": 7.348901428924275e-06, "loss": 0.6091, "step": 13913 }, { "epoch": 0.4099406331482109, "grad_norm": 1.5792267210218014, "learning_rate": 7.348447454320738e-06, "loss": 0.5201, "step": 13914 }, { "epoch": 0.4099700956056745, "grad_norm": 1.5593207643732319, "learning_rate": 7.34799345487604e-06, "loss": 0.5627, "step": 13915 }, { "epoch": 0.40999955806313804, "grad_norm": 1.6067818249246533, "learning_rate": 7.3475394305949835e-06, "loss": 0.5343, "step": 13916 }, { "epoch": 0.41002902052060164, "grad_norm": 1.4079903130555391, "learning_rate": 7.347085381482368e-06, "loss": 0.5414, "step": 13917 }, { "epoch": 0.4100584829780652, "grad_norm": 1.6095062738868604, "learning_rate": 7.346631307543001e-06, "loss": 0.2822, "step": 13918 }, { "epoch": 0.4100879454355288, "grad_norm": 1.6008441302652217, "learning_rate": 7.346177208781682e-06, "loss": 0.5189, "step": 13919 }, { "epoch": 0.41011740789299234, "grad_norm": 1.4115873250949327, "learning_rate": 7.345723085203215e-06, "loss": 0.4333, "step": 13920 }, { "epoch": 0.41014687035045594, "grad_norm": 1.4702785713117343, "learning_rate": 7.345268936812406e-06, "loss": 0.4452, "step": 13921 }, { "epoch": 0.4101763328079195, "grad_norm": 1.5781355413202816, "learning_rate": 7.344814763614055e-06, "loss": 0.4972, "step": 13922 }, { "epoch": 0.4102057952653831, "grad_norm": 1.5178235636267043, "learning_rate": 7.344360565612968e-06, "loss": 0.4414, "step": 13923 }, { "epoch": 0.41023525772284664, "grad_norm": 1.603363208797533, "learning_rate": 7.343906342813951e-06, "loss": 0.4661, "step": 13924 }, { "epoch": 0.41026472018031024, "grad_norm": 1.4407522401917174, "learning_rate": 7.343452095221806e-06, "loss": 0.4696, "step": 13925 }, { "epoch": 0.41029418263777384, "grad_norm": 1.4765361654460107, "learning_rate": 7.342997822841339e-06, "loss": 0.543, "step": 13926 }, { "epoch": 0.4103236450952374, "grad_norm": 1.5222493935075803, "learning_rate": 7.342543525677356e-06, "loss": 0.3734, "step": 13927 }, { "epoch": 0.410353107552701, "grad_norm": 1.488483599214437, "learning_rate": 7.34208920373466e-06, "loss": 0.4818, "step": 13928 }, { "epoch": 0.41038257001016454, "grad_norm": 1.5748933492007389, "learning_rate": 7.341634857018059e-06, "loss": 0.5167, "step": 13929 }, { "epoch": 0.41041203246762814, "grad_norm": 1.541252445102172, "learning_rate": 7.341180485532358e-06, "loss": 0.5515, "step": 13930 }, { "epoch": 0.4104414949250917, "grad_norm": 1.477128317567131, "learning_rate": 7.340726089282364e-06, "loss": 0.4659, "step": 13931 }, { "epoch": 0.4104709573825553, "grad_norm": 1.528651132192487, "learning_rate": 7.340271668272882e-06, "loss": 0.5386, "step": 13932 }, { "epoch": 0.41050041984001884, "grad_norm": 1.7803891827852392, "learning_rate": 7.339817222508719e-06, "loss": 0.6171, "step": 13933 }, { "epoch": 0.41052988229748244, "grad_norm": 1.6302908372705234, "learning_rate": 7.339362751994683e-06, "loss": 0.5408, "step": 13934 }, { "epoch": 0.410559344754946, "grad_norm": 1.502598870885553, "learning_rate": 7.338908256735581e-06, "loss": 0.4238, "step": 13935 }, { "epoch": 0.4105888072124096, "grad_norm": 1.4786283070107635, "learning_rate": 7.338453736736221e-06, "loss": 0.4957, "step": 13936 }, { "epoch": 0.41061826966987314, "grad_norm": 1.7409320652231923, "learning_rate": 7.337999192001408e-06, "loss": 0.5103, "step": 13937 }, { "epoch": 0.41064773212733674, "grad_norm": 1.7070541265416497, "learning_rate": 7.337544622535954e-06, "loss": 0.3812, "step": 13938 }, { "epoch": 0.41067719458480034, "grad_norm": 1.6919704154814823, "learning_rate": 7.337090028344665e-06, "loss": 0.4915, "step": 13939 }, { "epoch": 0.4107066570422639, "grad_norm": 1.3949057516527388, "learning_rate": 7.336635409432349e-06, "loss": 0.3767, "step": 13940 }, { "epoch": 0.4107361194997275, "grad_norm": 1.5154121767117308, "learning_rate": 7.336180765803816e-06, "loss": 0.5184, "step": 13941 }, { "epoch": 0.41076558195719104, "grad_norm": 1.441479469033358, "learning_rate": 7.335726097463876e-06, "loss": 0.43, "step": 13942 }, { "epoch": 0.41079504441465464, "grad_norm": 1.4337925299711924, "learning_rate": 7.335271404417336e-06, "loss": 0.4405, "step": 13943 }, { "epoch": 0.4108245068721182, "grad_norm": 1.630068195228979, "learning_rate": 7.334816686669006e-06, "loss": 0.6107, "step": 13944 }, { "epoch": 0.4108539693295818, "grad_norm": 1.4987552015710683, "learning_rate": 7.334361944223698e-06, "loss": 0.4448, "step": 13945 }, { "epoch": 0.41088343178704534, "grad_norm": 1.3888800765224345, "learning_rate": 7.3339071770862215e-06, "loss": 0.3556, "step": 13946 }, { "epoch": 0.41091289424450894, "grad_norm": 1.5156475227349349, "learning_rate": 7.333452385261385e-06, "loss": 0.4358, "step": 13947 }, { "epoch": 0.4109423567019725, "grad_norm": 1.5193036861541, "learning_rate": 7.332997568754001e-06, "loss": 0.4366, "step": 13948 }, { "epoch": 0.4109718191594361, "grad_norm": 1.4175283616932302, "learning_rate": 7.3325427275688785e-06, "loss": 0.4691, "step": 13949 }, { "epoch": 0.41100128161689964, "grad_norm": 1.4148167762757295, "learning_rate": 7.3320878617108304e-06, "loss": 0.4576, "step": 13950 }, { "epoch": 0.41103074407436324, "grad_norm": 1.79541973800138, "learning_rate": 7.331632971184668e-06, "loss": 0.4645, "step": 13951 }, { "epoch": 0.41106020653182684, "grad_norm": 1.4854686079149408, "learning_rate": 7.331178055995203e-06, "loss": 0.5494, "step": 13952 }, { "epoch": 0.4110896689892904, "grad_norm": 1.470062084076679, "learning_rate": 7.330723116147246e-06, "loss": 0.4986, "step": 13953 }, { "epoch": 0.411119131446754, "grad_norm": 1.7209839870581358, "learning_rate": 7.330268151645611e-06, "loss": 0.4509, "step": 13954 }, { "epoch": 0.41114859390421754, "grad_norm": 1.6343772504886072, "learning_rate": 7.329813162495108e-06, "loss": 0.5816, "step": 13955 }, { "epoch": 0.41117805636168114, "grad_norm": 1.5895582009886433, "learning_rate": 7.329358148700554e-06, "loss": 0.4269, "step": 13956 }, { "epoch": 0.4112075188191447, "grad_norm": 1.4953981421738494, "learning_rate": 7.3289031102667585e-06, "loss": 0.4776, "step": 13957 }, { "epoch": 0.4112369812766083, "grad_norm": 1.4904160157939939, "learning_rate": 7.328448047198534e-06, "loss": 0.4427, "step": 13958 }, { "epoch": 0.41126644373407184, "grad_norm": 1.592483920875677, "learning_rate": 7.327992959500697e-06, "loss": 0.5183, "step": 13959 }, { "epoch": 0.41129590619153544, "grad_norm": 1.5863902807371955, "learning_rate": 7.32753784717806e-06, "loss": 0.4122, "step": 13960 }, { "epoch": 0.411325368648999, "grad_norm": 1.4534069524438873, "learning_rate": 7.327082710235438e-06, "loss": 0.3719, "step": 13961 }, { "epoch": 0.4113548311064626, "grad_norm": 1.4103053835720716, "learning_rate": 7.326627548677643e-06, "loss": 0.4861, "step": 13962 }, { "epoch": 0.41138429356392614, "grad_norm": 1.5133598348206225, "learning_rate": 7.326172362509491e-06, "loss": 0.6692, "step": 13963 }, { "epoch": 0.41141375602138974, "grad_norm": 1.6051315233225205, "learning_rate": 7.325717151735796e-06, "loss": 0.5581, "step": 13964 }, { "epoch": 0.41144321847885335, "grad_norm": 1.439488143728453, "learning_rate": 7.325261916361375e-06, "loss": 0.4901, "step": 13965 }, { "epoch": 0.4114726809363169, "grad_norm": 1.4321968378127803, "learning_rate": 7.324806656391041e-06, "loss": 0.4799, "step": 13966 }, { "epoch": 0.4115021433937805, "grad_norm": 1.3932022832482684, "learning_rate": 7.3243513718296125e-06, "loss": 0.4629, "step": 13967 }, { "epoch": 0.41153160585124404, "grad_norm": 1.4525735205846269, "learning_rate": 7.323896062681901e-06, "loss": 0.51, "step": 13968 }, { "epoch": 0.41156106830870764, "grad_norm": 1.4130786364174985, "learning_rate": 7.323440728952726e-06, "loss": 0.4928, "step": 13969 }, { "epoch": 0.4115905307661712, "grad_norm": 1.5194682515117646, "learning_rate": 7.3229853706469046e-06, "loss": 0.5063, "step": 13970 }, { "epoch": 0.4116199932236348, "grad_norm": 1.5377882568357983, "learning_rate": 7.32252998776925e-06, "loss": 0.2697, "step": 13971 }, { "epoch": 0.41164945568109834, "grad_norm": 1.4360182711433485, "learning_rate": 7.322074580324582e-06, "loss": 0.4951, "step": 13972 }, { "epoch": 0.41167891813856194, "grad_norm": 1.5083248210686007, "learning_rate": 7.321619148317717e-06, "loss": 0.5062, "step": 13973 }, { "epoch": 0.4117083805960255, "grad_norm": 1.7310650278092756, "learning_rate": 7.321163691753471e-06, "loss": 0.4356, "step": 13974 }, { "epoch": 0.4117378430534891, "grad_norm": 1.4556426811319831, "learning_rate": 7.320708210636665e-06, "loss": 0.3878, "step": 13975 }, { "epoch": 0.41176730551095264, "grad_norm": 1.4144090639302223, "learning_rate": 7.320252704972113e-06, "loss": 0.4419, "step": 13976 }, { "epoch": 0.41179676796841624, "grad_norm": 1.756083971588036, "learning_rate": 7.319797174764636e-06, "loss": 0.4313, "step": 13977 }, { "epoch": 0.41182623042587985, "grad_norm": 1.5061743430406185, "learning_rate": 7.31934162001905e-06, "loss": 0.5325, "step": 13978 }, { "epoch": 0.4118556928833434, "grad_norm": 1.7460388440704957, "learning_rate": 7.318886040740176e-06, "loss": 0.5144, "step": 13979 }, { "epoch": 0.411885155340807, "grad_norm": 1.6125965682862338, "learning_rate": 7.318430436932833e-06, "loss": 0.6314, "step": 13980 }, { "epoch": 0.41191461779827054, "grad_norm": 1.4358215608183162, "learning_rate": 7.317974808601837e-06, "loss": 0.3257, "step": 13981 }, { "epoch": 0.41194408025573415, "grad_norm": 1.3946028233923642, "learning_rate": 7.317519155752013e-06, "loss": 0.4611, "step": 13982 }, { "epoch": 0.4119735427131977, "grad_norm": 1.4592925207884766, "learning_rate": 7.317063478388175e-06, "loss": 0.4543, "step": 13983 }, { "epoch": 0.4120030051706613, "grad_norm": 1.4727507926974683, "learning_rate": 7.3166077765151474e-06, "loss": 0.4599, "step": 13984 }, { "epoch": 0.41203246762812484, "grad_norm": 1.6765741819385267, "learning_rate": 7.3161520501377494e-06, "loss": 0.6406, "step": 13985 }, { "epoch": 0.41206193008558845, "grad_norm": 1.8069832308828147, "learning_rate": 7.315696299260799e-06, "loss": 0.5252, "step": 13986 }, { "epoch": 0.412091392543052, "grad_norm": 1.4862353074046313, "learning_rate": 7.31524052388912e-06, "loss": 0.505, "step": 13987 }, { "epoch": 0.4121208550005156, "grad_norm": 1.6080509289957405, "learning_rate": 7.314784724027532e-06, "loss": 0.445, "step": 13988 }, { "epoch": 0.41215031745797914, "grad_norm": 1.4933077634205276, "learning_rate": 7.314328899680856e-06, "loss": 0.5094, "step": 13989 }, { "epoch": 0.41217977991544275, "grad_norm": 1.5686944366305071, "learning_rate": 7.313873050853915e-06, "loss": 0.5038, "step": 13990 }, { "epoch": 0.41220924237290635, "grad_norm": 1.641361303974986, "learning_rate": 7.31341717755153e-06, "loss": 0.4863, "step": 13991 }, { "epoch": 0.4122387048303699, "grad_norm": 1.4136079604728233, "learning_rate": 7.312961279778523e-06, "loss": 0.428, "step": 13992 }, { "epoch": 0.4122681672878335, "grad_norm": 1.758808217384615, "learning_rate": 7.312505357539716e-06, "loss": 0.5076, "step": 13993 }, { "epoch": 0.41229762974529705, "grad_norm": 1.4521470154922957, "learning_rate": 7.312049410839933e-06, "loss": 0.4499, "step": 13994 }, { "epoch": 0.41232709220276065, "grad_norm": 1.446684696606589, "learning_rate": 7.311593439683996e-06, "loss": 0.4385, "step": 13995 }, { "epoch": 0.4123565546602242, "grad_norm": 1.578205963176684, "learning_rate": 7.311137444076727e-06, "loss": 0.3196, "step": 13996 }, { "epoch": 0.4123860171176878, "grad_norm": 1.5209323196796758, "learning_rate": 7.310681424022952e-06, "loss": 0.3779, "step": 13997 }, { "epoch": 0.41241547957515134, "grad_norm": 1.4899702885090742, "learning_rate": 7.310225379527491e-06, "loss": 0.4764, "step": 13998 }, { "epoch": 0.41244494203261495, "grad_norm": 1.5398202316622553, "learning_rate": 7.309769310595171e-06, "loss": 0.5072, "step": 13999 }, { "epoch": 0.4124744044900785, "grad_norm": 1.5106177741594324, "learning_rate": 7.309313217230817e-06, "loss": 0.3871, "step": 14000 }, { "epoch": 0.4125038669475421, "grad_norm": 1.7860398263376718, "learning_rate": 7.308857099439248e-06, "loss": 0.5953, "step": 14001 }, { "epoch": 0.41253332940500564, "grad_norm": 1.868573980206421, "learning_rate": 7.308400957225295e-06, "loss": 0.6963, "step": 14002 }, { "epoch": 0.41256279186246925, "grad_norm": 1.557200208797563, "learning_rate": 7.307944790593779e-06, "loss": 0.4119, "step": 14003 }, { "epoch": 0.41259225431993285, "grad_norm": 1.7062841177112436, "learning_rate": 7.307488599549526e-06, "loss": 0.4413, "step": 14004 }, { "epoch": 0.4126217167773964, "grad_norm": 1.555092050999793, "learning_rate": 7.307032384097363e-06, "loss": 0.4857, "step": 14005 }, { "epoch": 0.41265117923486, "grad_norm": 1.3838088316686257, "learning_rate": 7.3065761442421145e-06, "loss": 0.4071, "step": 14006 }, { "epoch": 0.41268064169232355, "grad_norm": 1.475636065738771, "learning_rate": 7.306119879988606e-06, "loss": 0.3739, "step": 14007 }, { "epoch": 0.41271010414978715, "grad_norm": 1.67537068122635, "learning_rate": 7.3056635913416625e-06, "loss": 0.5043, "step": 14008 }, { "epoch": 0.4127395666072507, "grad_norm": 1.535378353795436, "learning_rate": 7.3052072783061155e-06, "loss": 0.459, "step": 14009 }, { "epoch": 0.4127690290647143, "grad_norm": 1.5717287208714383, "learning_rate": 7.304750940886787e-06, "loss": 0.4631, "step": 14010 }, { "epoch": 0.41279849152217785, "grad_norm": 1.6007139840847049, "learning_rate": 7.304294579088504e-06, "loss": 0.435, "step": 14011 }, { "epoch": 0.41282795397964145, "grad_norm": 1.5185448038310818, "learning_rate": 7.303838192916098e-06, "loss": 0.3918, "step": 14012 }, { "epoch": 0.412857416437105, "grad_norm": 1.4897661844856795, "learning_rate": 7.303381782374392e-06, "loss": 0.4761, "step": 14013 }, { "epoch": 0.4128868788945686, "grad_norm": 1.5355604656824504, "learning_rate": 7.302925347468215e-06, "loss": 0.4221, "step": 14014 }, { "epoch": 0.41291634135203215, "grad_norm": 1.9758464604887787, "learning_rate": 7.3024688882023974e-06, "loss": 0.6672, "step": 14015 }, { "epoch": 0.41294580380949575, "grad_norm": 1.5465039414270256, "learning_rate": 7.3020124045817645e-06, "loss": 0.4467, "step": 14016 }, { "epoch": 0.41297526626695935, "grad_norm": 1.668493730119949, "learning_rate": 7.301555896611146e-06, "loss": 0.4346, "step": 14017 }, { "epoch": 0.4130047287244229, "grad_norm": 1.6326255653085993, "learning_rate": 7.30109936429537e-06, "loss": 0.5614, "step": 14018 }, { "epoch": 0.4130341911818865, "grad_norm": 1.5597858341790516, "learning_rate": 7.300642807639267e-06, "loss": 0.5687, "step": 14019 }, { "epoch": 0.41306365363935005, "grad_norm": 1.389602007947689, "learning_rate": 7.300186226647665e-06, "loss": 0.5489, "step": 14020 }, { "epoch": 0.41309311609681365, "grad_norm": 1.4070854122398302, "learning_rate": 7.299729621325396e-06, "loss": 0.342, "step": 14021 }, { "epoch": 0.4131225785542772, "grad_norm": 1.5840364968463838, "learning_rate": 7.299272991677284e-06, "loss": 0.478, "step": 14022 }, { "epoch": 0.4131520410117408, "grad_norm": 1.6322937977067187, "learning_rate": 7.298816337708166e-06, "loss": 0.4426, "step": 14023 }, { "epoch": 0.41318150346920435, "grad_norm": 1.5184067973903665, "learning_rate": 7.298359659422869e-06, "loss": 0.4871, "step": 14024 }, { "epoch": 0.41321096592666795, "grad_norm": 1.4452324624594035, "learning_rate": 7.297902956826224e-06, "loss": 0.5384, "step": 14025 }, { "epoch": 0.4132404283841315, "grad_norm": 1.6411827800770964, "learning_rate": 7.297446229923061e-06, "loss": 0.38, "step": 14026 }, { "epoch": 0.4132698908415951, "grad_norm": 1.5029436949855322, "learning_rate": 7.296989478718213e-06, "loss": 0.3778, "step": 14027 }, { "epoch": 0.41329935329905865, "grad_norm": 1.5493285057147976, "learning_rate": 7.296532703216509e-06, "loss": 0.3878, "step": 14028 }, { "epoch": 0.41332881575652225, "grad_norm": 1.3372081947613002, "learning_rate": 7.296075903422783e-06, "loss": 0.4238, "step": 14029 }, { "epoch": 0.41335827821398585, "grad_norm": 1.5470858893830528, "learning_rate": 7.295619079341867e-06, "loss": 0.4043, "step": 14030 }, { "epoch": 0.4133877406714494, "grad_norm": 1.35609179573816, "learning_rate": 7.2951622309785895e-06, "loss": 0.3994, "step": 14031 }, { "epoch": 0.413417203128913, "grad_norm": 1.353568640683473, "learning_rate": 7.294705358337786e-06, "loss": 0.3102, "step": 14032 }, { "epoch": 0.41344666558637655, "grad_norm": 1.3898774847033268, "learning_rate": 7.294248461424289e-06, "loss": 0.2445, "step": 14033 }, { "epoch": 0.41347612804384015, "grad_norm": 1.5473881976270671, "learning_rate": 7.2937915402429305e-06, "loss": 0.5046, "step": 14034 }, { "epoch": 0.4135055905013037, "grad_norm": 1.641811165749875, "learning_rate": 7.2933345947985454e-06, "loss": 0.4199, "step": 14035 }, { "epoch": 0.4135350529587673, "grad_norm": 1.5271413963911535, "learning_rate": 7.292877625095965e-06, "loss": 0.3787, "step": 14036 }, { "epoch": 0.41356451541623085, "grad_norm": 1.6086719588696567, "learning_rate": 7.292420631140024e-06, "loss": 0.5029, "step": 14037 }, { "epoch": 0.41359397787369445, "grad_norm": 1.426036355662397, "learning_rate": 7.291963612935555e-06, "loss": 0.3986, "step": 14038 }, { "epoch": 0.413623440331158, "grad_norm": 1.3999573262352085, "learning_rate": 7.291506570487396e-06, "loss": 0.3361, "step": 14039 }, { "epoch": 0.4136529027886216, "grad_norm": 1.4852809813635042, "learning_rate": 7.291049503800378e-06, "loss": 0.4416, "step": 14040 }, { "epoch": 0.41368236524608515, "grad_norm": 1.4709498486679304, "learning_rate": 7.290592412879335e-06, "loss": 0.4563, "step": 14041 }, { "epoch": 0.41371182770354875, "grad_norm": 1.4854027710415356, "learning_rate": 7.290135297729105e-06, "loss": 0.4912, "step": 14042 }, { "epoch": 0.41374129016101235, "grad_norm": 1.720726587538383, "learning_rate": 7.289678158354521e-06, "loss": 0.5774, "step": 14043 }, { "epoch": 0.4137707526184759, "grad_norm": 1.5993589516573303, "learning_rate": 7.289220994760419e-06, "loss": 0.4833, "step": 14044 }, { "epoch": 0.4138002150759395, "grad_norm": 1.6255783463534712, "learning_rate": 7.288763806951636e-06, "loss": 0.5278, "step": 14045 }, { "epoch": 0.41382967753340305, "grad_norm": 1.5520064188297051, "learning_rate": 7.288306594933007e-06, "loss": 0.5692, "step": 14046 }, { "epoch": 0.41385913999086665, "grad_norm": 1.5551456890007087, "learning_rate": 7.287849358709367e-06, "loss": 0.3333, "step": 14047 }, { "epoch": 0.4138886024483302, "grad_norm": 1.4001458319047402, "learning_rate": 7.287392098285555e-06, "loss": 0.3827, "step": 14048 }, { "epoch": 0.4139180649057938, "grad_norm": 1.509496576411065, "learning_rate": 7.286934813666405e-06, "loss": 0.4192, "step": 14049 }, { "epoch": 0.41394752736325735, "grad_norm": 1.7156558738136614, "learning_rate": 7.286477504856757e-06, "loss": 0.5166, "step": 14050 }, { "epoch": 0.41397698982072095, "grad_norm": 1.4673072947510273, "learning_rate": 7.286020171861447e-06, "loss": 0.4635, "step": 14051 }, { "epoch": 0.4140064522781845, "grad_norm": 1.7505585336677676, "learning_rate": 7.285562814685311e-06, "loss": 0.4926, "step": 14052 }, { "epoch": 0.4140359147356481, "grad_norm": 1.51035512594823, "learning_rate": 7.2851054333331885e-06, "loss": 0.4024, "step": 14053 }, { "epoch": 0.41406537719311165, "grad_norm": 1.5686712625828487, "learning_rate": 7.284648027809917e-06, "loss": 0.4387, "step": 14054 }, { "epoch": 0.41409483965057525, "grad_norm": 1.468610168544306, "learning_rate": 7.284190598120335e-06, "loss": 0.5314, "step": 14055 }, { "epoch": 0.41412430210803886, "grad_norm": 1.62418446439187, "learning_rate": 7.283733144269281e-06, "loss": 0.4163, "step": 14056 }, { "epoch": 0.4141537645655024, "grad_norm": 1.5223131058711867, "learning_rate": 7.283275666261594e-06, "loss": 0.5224, "step": 14057 }, { "epoch": 0.414183227022966, "grad_norm": 1.4681137432172584, "learning_rate": 7.2828181641021126e-06, "loss": 0.4602, "step": 14058 }, { "epoch": 0.41421268948042955, "grad_norm": 1.4446763665630824, "learning_rate": 7.282360637795677e-06, "loss": 0.4641, "step": 14059 }, { "epoch": 0.41424215193789315, "grad_norm": 1.5499381300869202, "learning_rate": 7.2819030873471255e-06, "loss": 0.4697, "step": 14060 }, { "epoch": 0.4142716143953567, "grad_norm": 1.5593838073199233, "learning_rate": 7.281445512761299e-06, "loss": 0.4314, "step": 14061 }, { "epoch": 0.4143010768528203, "grad_norm": 1.4458703383118143, "learning_rate": 7.280987914043036e-06, "loss": 0.3873, "step": 14062 }, { "epoch": 0.41433053931028385, "grad_norm": 1.9726979720206614, "learning_rate": 7.280530291197181e-06, "loss": 0.5383, "step": 14063 }, { "epoch": 0.41436000176774745, "grad_norm": 1.675959438921193, "learning_rate": 7.280072644228569e-06, "loss": 0.4345, "step": 14064 }, { "epoch": 0.414389464225211, "grad_norm": 1.9496706808923174, "learning_rate": 7.279614973142044e-06, "loss": 0.5646, "step": 14065 }, { "epoch": 0.4144189266826746, "grad_norm": 1.3987335619178112, "learning_rate": 7.279157277942447e-06, "loss": 0.4085, "step": 14066 }, { "epoch": 0.41444838914013815, "grad_norm": 1.32360703040919, "learning_rate": 7.278699558634618e-06, "loss": 0.3948, "step": 14067 }, { "epoch": 0.41447785159760175, "grad_norm": 1.453786661721633, "learning_rate": 7.278241815223399e-06, "loss": 0.4728, "step": 14068 }, { "epoch": 0.41450731405506536, "grad_norm": 1.6886151737615147, "learning_rate": 7.277784047713634e-06, "loss": 0.5885, "step": 14069 }, { "epoch": 0.4145367765125289, "grad_norm": 1.4452233478796912, "learning_rate": 7.2773262561101636e-06, "loss": 0.4985, "step": 14070 }, { "epoch": 0.4145662389699925, "grad_norm": 1.445267237081696, "learning_rate": 7.276868440417828e-06, "loss": 0.4426, "step": 14071 }, { "epoch": 0.41459570142745605, "grad_norm": 1.4769536341106415, "learning_rate": 7.276410600641475e-06, "loss": 0.5436, "step": 14072 }, { "epoch": 0.41462516388491966, "grad_norm": 1.5487967166650647, "learning_rate": 7.2759527367859415e-06, "loss": 0.4107, "step": 14073 }, { "epoch": 0.4146546263423832, "grad_norm": 1.6676715136958917, "learning_rate": 7.275494848856075e-06, "loss": 0.6327, "step": 14074 }, { "epoch": 0.4146840887998468, "grad_norm": 1.6378860446453019, "learning_rate": 7.275036936856718e-06, "loss": 0.4142, "step": 14075 }, { "epoch": 0.41471355125731035, "grad_norm": 1.502379283402144, "learning_rate": 7.274579000792712e-06, "loss": 0.5023, "step": 14076 }, { "epoch": 0.41474301371477396, "grad_norm": 1.5098379838888603, "learning_rate": 7.274121040668903e-06, "loss": 0.3505, "step": 14077 }, { "epoch": 0.4147724761722375, "grad_norm": 1.4728462526939121, "learning_rate": 7.273663056490136e-06, "loss": 0.3854, "step": 14078 }, { "epoch": 0.4148019386297011, "grad_norm": 1.6207098613137363, "learning_rate": 7.273205048261253e-06, "loss": 0.518, "step": 14079 }, { "epoch": 0.41483140108716465, "grad_norm": 1.571268236124726, "learning_rate": 7.272747015987101e-06, "loss": 0.3965, "step": 14080 }, { "epoch": 0.41486086354462826, "grad_norm": 1.6458922161865783, "learning_rate": 7.272288959672521e-06, "loss": 0.5593, "step": 14081 }, { "epoch": 0.41489032600209186, "grad_norm": 1.496941222660124, "learning_rate": 7.271830879322363e-06, "loss": 0.5436, "step": 14082 }, { "epoch": 0.4149197884595554, "grad_norm": 1.3696668688564728, "learning_rate": 7.27137277494147e-06, "loss": 0.3374, "step": 14083 }, { "epoch": 0.414949250917019, "grad_norm": 1.4969712951752978, "learning_rate": 7.270914646534688e-06, "loss": 0.4201, "step": 14084 }, { "epoch": 0.41497871337448256, "grad_norm": 1.5773109903700735, "learning_rate": 7.270456494106863e-06, "loss": 0.5504, "step": 14085 }, { "epoch": 0.41500817583194616, "grad_norm": 1.438398326201168, "learning_rate": 7.269998317662841e-06, "loss": 0.3368, "step": 14086 }, { "epoch": 0.4150376382894097, "grad_norm": 1.4906587720798379, "learning_rate": 7.269540117207469e-06, "loss": 0.3853, "step": 14087 }, { "epoch": 0.4150671007468733, "grad_norm": 1.5000394696843544, "learning_rate": 7.269081892745593e-06, "loss": 0.3837, "step": 14088 }, { "epoch": 0.41509656320433685, "grad_norm": 1.628766652151584, "learning_rate": 7.26862364428206e-06, "loss": 0.4765, "step": 14089 }, { "epoch": 0.41512602566180046, "grad_norm": 1.4661782921902335, "learning_rate": 7.2681653718217185e-06, "loss": 0.4067, "step": 14090 }, { "epoch": 0.415155488119264, "grad_norm": 1.6795134059522738, "learning_rate": 7.267707075369413e-06, "loss": 0.6734, "step": 14091 }, { "epoch": 0.4151849505767276, "grad_norm": 1.7321588382014608, "learning_rate": 7.267248754929994e-06, "loss": 0.4531, "step": 14092 }, { "epoch": 0.41521441303419115, "grad_norm": 1.5646445916367753, "learning_rate": 7.26679041050831e-06, "loss": 0.4549, "step": 14093 }, { "epoch": 0.41524387549165476, "grad_norm": 1.746333710813954, "learning_rate": 7.2663320421092064e-06, "loss": 0.4689, "step": 14094 }, { "epoch": 0.41527333794911836, "grad_norm": 1.7153120287571042, "learning_rate": 7.265873649737533e-06, "loss": 0.558, "step": 14095 }, { "epoch": 0.4153028004065819, "grad_norm": 1.3908325925071363, "learning_rate": 7.26541523339814e-06, "loss": 0.4092, "step": 14096 }, { "epoch": 0.4153322628640455, "grad_norm": 1.5113703100397378, "learning_rate": 7.264956793095874e-06, "loss": 0.4431, "step": 14097 }, { "epoch": 0.41536172532150906, "grad_norm": 1.3309347641654772, "learning_rate": 7.264498328835586e-06, "loss": 0.3893, "step": 14098 }, { "epoch": 0.41539118777897266, "grad_norm": 1.3383799579141946, "learning_rate": 7.2640398406221244e-06, "loss": 0.4314, "step": 14099 }, { "epoch": 0.4154206502364362, "grad_norm": 1.5362974268943541, "learning_rate": 7.26358132846034e-06, "loss": 0.3514, "step": 14100 }, { "epoch": 0.4154501126938998, "grad_norm": 1.607323420656539, "learning_rate": 7.263122792355081e-06, "loss": 0.4042, "step": 14101 }, { "epoch": 0.41547957515136336, "grad_norm": 1.354812912331603, "learning_rate": 7.262664232311199e-06, "loss": 0.4272, "step": 14102 }, { "epoch": 0.41550903760882696, "grad_norm": 1.5512346106455976, "learning_rate": 7.2622056483335445e-06, "loss": 0.5481, "step": 14103 }, { "epoch": 0.4155385000662905, "grad_norm": 1.38866246694414, "learning_rate": 7.261747040426967e-06, "loss": 0.4095, "step": 14104 }, { "epoch": 0.4155679625237541, "grad_norm": 1.503441872599804, "learning_rate": 7.261288408596321e-06, "loss": 0.5347, "step": 14105 }, { "epoch": 0.41559742498121766, "grad_norm": 1.4061913790771257, "learning_rate": 7.260829752846455e-06, "loss": 0.4703, "step": 14106 }, { "epoch": 0.41562688743868126, "grad_norm": 1.354317316513038, "learning_rate": 7.26037107318222e-06, "loss": 0.3994, "step": 14107 }, { "epoch": 0.41565634989614486, "grad_norm": 1.4330133497826631, "learning_rate": 7.259912369608467e-06, "loss": 0.3492, "step": 14108 }, { "epoch": 0.4156858123536084, "grad_norm": 1.6110339321408287, "learning_rate": 7.2594536421300525e-06, "loss": 0.4797, "step": 14109 }, { "epoch": 0.415715274811072, "grad_norm": 1.61979914926075, "learning_rate": 7.258994890751825e-06, "loss": 0.4873, "step": 14110 }, { "epoch": 0.41574473726853556, "grad_norm": 1.6341730369100629, "learning_rate": 7.258536115478638e-06, "loss": 0.5239, "step": 14111 }, { "epoch": 0.41577419972599916, "grad_norm": 1.4787608630171227, "learning_rate": 7.258077316315344e-06, "loss": 0.4599, "step": 14112 }, { "epoch": 0.4158036621834627, "grad_norm": 1.4396522767864526, "learning_rate": 7.257618493266796e-06, "loss": 0.3198, "step": 14113 }, { "epoch": 0.4158331246409263, "grad_norm": 1.4284567949013285, "learning_rate": 7.257159646337849e-06, "loss": 0.4251, "step": 14114 }, { "epoch": 0.41586258709838986, "grad_norm": 1.614715162934449, "learning_rate": 7.256700775533354e-06, "loss": 0.4198, "step": 14115 }, { "epoch": 0.41589204955585346, "grad_norm": 1.4326390464540053, "learning_rate": 7.256241880858166e-06, "loss": 0.373, "step": 14116 }, { "epoch": 0.415921512013317, "grad_norm": 1.4381748530953469, "learning_rate": 7.255782962317139e-06, "loss": 0.4666, "step": 14117 }, { "epoch": 0.4159509744707806, "grad_norm": 1.5340807557112568, "learning_rate": 7.255324019915127e-06, "loss": 0.5203, "step": 14118 }, { "epoch": 0.41598043692824416, "grad_norm": 1.4315615554798982, "learning_rate": 7.254865053656985e-06, "loss": 0.4889, "step": 14119 }, { "epoch": 0.41600989938570776, "grad_norm": 1.583388037675793, "learning_rate": 7.254406063547569e-06, "loss": 0.4023, "step": 14120 }, { "epoch": 0.41603936184317136, "grad_norm": 1.4452586121523858, "learning_rate": 7.253947049591732e-06, "loss": 0.3533, "step": 14121 }, { "epoch": 0.4160688243006349, "grad_norm": 1.5029416149483732, "learning_rate": 7.25348801179433e-06, "loss": 0.4248, "step": 14122 }, { "epoch": 0.4160982867580985, "grad_norm": 1.4204279365599874, "learning_rate": 7.253028950160217e-06, "loss": 0.3795, "step": 14123 }, { "epoch": 0.41612774921556206, "grad_norm": 1.4418923646447195, "learning_rate": 7.2525698646942525e-06, "loss": 0.4208, "step": 14124 }, { "epoch": 0.41615721167302566, "grad_norm": 1.5602817825268689, "learning_rate": 7.25211075540129e-06, "loss": 0.5412, "step": 14125 }, { "epoch": 0.4161866741304892, "grad_norm": 1.4529127129393442, "learning_rate": 7.251651622286186e-06, "loss": 0.484, "step": 14126 }, { "epoch": 0.4162161365879528, "grad_norm": 1.4501965467578335, "learning_rate": 7.251192465353798e-06, "loss": 0.4575, "step": 14127 }, { "epoch": 0.41624559904541636, "grad_norm": 1.3446913497169721, "learning_rate": 7.250733284608981e-06, "loss": 0.3917, "step": 14128 }, { "epoch": 0.41627506150287996, "grad_norm": 1.5877590886582629, "learning_rate": 7.250274080056592e-06, "loss": 0.5125, "step": 14129 }, { "epoch": 0.4163045239603435, "grad_norm": 1.7868442870921601, "learning_rate": 7.2498148517014934e-06, "loss": 0.6078, "step": 14130 }, { "epoch": 0.4163339864178071, "grad_norm": 1.4749993880528567, "learning_rate": 7.249355599548537e-06, "loss": 0.4635, "step": 14131 }, { "epoch": 0.41636344887527066, "grad_norm": 1.4647503979504701, "learning_rate": 7.2488963236025814e-06, "loss": 0.3595, "step": 14132 }, { "epoch": 0.41639291133273426, "grad_norm": 1.4505291011661765, "learning_rate": 7.248437023868487e-06, "loss": 0.5413, "step": 14133 }, { "epoch": 0.41642237379019786, "grad_norm": 1.5537009366260488, "learning_rate": 7.2479777003511094e-06, "loss": 0.4635, "step": 14134 }, { "epoch": 0.4164518362476614, "grad_norm": 1.4141339276082425, "learning_rate": 7.247518353055312e-06, "loss": 0.4938, "step": 14135 }, { "epoch": 0.416481298705125, "grad_norm": 1.468167694593562, "learning_rate": 7.247058981985949e-06, "loss": 0.3472, "step": 14136 }, { "epoch": 0.41651076116258856, "grad_norm": 1.5808200931173415, "learning_rate": 7.24659958714788e-06, "loss": 0.6366, "step": 14137 }, { "epoch": 0.41654022362005216, "grad_norm": 1.8673407345609385, "learning_rate": 7.246140168545965e-06, "loss": 0.5042, "step": 14138 }, { "epoch": 0.4165696860775157, "grad_norm": 1.3810679590956225, "learning_rate": 7.2456807261850645e-06, "loss": 0.358, "step": 14139 }, { "epoch": 0.4165991485349793, "grad_norm": 1.5315947195682795, "learning_rate": 7.245221260070039e-06, "loss": 0.4589, "step": 14140 }, { "epoch": 0.41662861099244286, "grad_norm": 1.5383122730052061, "learning_rate": 7.244761770205746e-06, "loss": 0.4376, "step": 14141 }, { "epoch": 0.41665807344990646, "grad_norm": 1.5113959721362, "learning_rate": 7.244302256597048e-06, "loss": 0.4357, "step": 14142 }, { "epoch": 0.41668753590737, "grad_norm": 1.4136580710862563, "learning_rate": 7.243842719248803e-06, "loss": 0.3873, "step": 14143 }, { "epoch": 0.4167169983648336, "grad_norm": 1.515549985894858, "learning_rate": 7.243383158165873e-06, "loss": 0.3816, "step": 14144 }, { "epoch": 0.4167464608222972, "grad_norm": 1.5329795382031406, "learning_rate": 7.242923573353123e-06, "loss": 0.5403, "step": 14145 }, { "epoch": 0.41677592327976076, "grad_norm": 1.4927710248656871, "learning_rate": 7.24246396481541e-06, "loss": 0.4831, "step": 14146 }, { "epoch": 0.41680538573722437, "grad_norm": 1.5835893036259192, "learning_rate": 7.242004332557594e-06, "loss": 0.4749, "step": 14147 }, { "epoch": 0.4168348481946879, "grad_norm": 1.584826318692179, "learning_rate": 7.241544676584542e-06, "loss": 0.632, "step": 14148 }, { "epoch": 0.4168643106521515, "grad_norm": 1.6234686044872058, "learning_rate": 7.241084996901112e-06, "loss": 0.5986, "step": 14149 }, { "epoch": 0.41689377310961506, "grad_norm": 1.5285511926792132, "learning_rate": 7.240625293512169e-06, "loss": 0.4836, "step": 14150 }, { "epoch": 0.41692323556707866, "grad_norm": 1.4877857434912036, "learning_rate": 7.240165566422574e-06, "loss": 0.4412, "step": 14151 }, { "epoch": 0.4169526980245422, "grad_norm": 1.4524790633678388, "learning_rate": 7.239705815637191e-06, "loss": 0.4742, "step": 14152 }, { "epoch": 0.4169821604820058, "grad_norm": 1.6456967603259383, "learning_rate": 7.239246041160881e-06, "loss": 0.5355, "step": 14153 }, { "epoch": 0.41701162293946936, "grad_norm": 1.4330285895889006, "learning_rate": 7.23878624299851e-06, "loss": 0.499, "step": 14154 }, { "epoch": 0.41704108539693296, "grad_norm": 1.5680654910000091, "learning_rate": 7.23832642115494e-06, "loss": 0.6142, "step": 14155 }, { "epoch": 0.4170705478543965, "grad_norm": 1.7280856467375207, "learning_rate": 7.237866575635035e-06, "loss": 0.4699, "step": 14156 }, { "epoch": 0.4171000103118601, "grad_norm": 1.759674393729591, "learning_rate": 7.2374067064436605e-06, "loss": 0.385, "step": 14157 }, { "epoch": 0.4171294727693237, "grad_norm": 1.6275185558339684, "learning_rate": 7.236946813585679e-06, "loss": 0.4461, "step": 14158 }, { "epoch": 0.41715893522678726, "grad_norm": 1.4202669647415869, "learning_rate": 7.236486897065955e-06, "loss": 0.4467, "step": 14159 }, { "epoch": 0.41718839768425087, "grad_norm": 1.6855237034020523, "learning_rate": 7.236026956889356e-06, "loss": 0.6231, "step": 14160 }, { "epoch": 0.4172178601417144, "grad_norm": 1.386620900691488, "learning_rate": 7.235566993060744e-06, "loss": 0.5051, "step": 14161 }, { "epoch": 0.417247322599178, "grad_norm": 1.7136429497461354, "learning_rate": 7.235107005584985e-06, "loss": 0.4726, "step": 14162 }, { "epoch": 0.41727678505664156, "grad_norm": 1.513687934976301, "learning_rate": 7.234646994466947e-06, "loss": 0.5353, "step": 14163 }, { "epoch": 0.41730624751410517, "grad_norm": 1.6135579021053044, "learning_rate": 7.234186959711493e-06, "loss": 0.5425, "step": 14164 }, { "epoch": 0.4173357099715687, "grad_norm": 1.4649955674354633, "learning_rate": 7.233726901323491e-06, "loss": 0.448, "step": 14165 }, { "epoch": 0.4173651724290323, "grad_norm": 1.5670105771173586, "learning_rate": 7.233266819307807e-06, "loss": 0.4636, "step": 14166 }, { "epoch": 0.41739463488649586, "grad_norm": 1.8621846297849893, "learning_rate": 7.232806713669306e-06, "loss": 0.5999, "step": 14167 }, { "epoch": 0.41742409734395947, "grad_norm": 1.5753429081133108, "learning_rate": 7.232346584412855e-06, "loss": 0.4588, "step": 14168 }, { "epoch": 0.417453559801423, "grad_norm": 1.4736337332610474, "learning_rate": 7.231886431543325e-06, "loss": 0.4702, "step": 14169 }, { "epoch": 0.4174830222588866, "grad_norm": 1.3277039309940093, "learning_rate": 7.231426255065579e-06, "loss": 0.44, "step": 14170 }, { "epoch": 0.4175124847163502, "grad_norm": 1.5317655941374344, "learning_rate": 7.230966054984485e-06, "loss": 0.5228, "step": 14171 }, { "epoch": 0.41754194717381377, "grad_norm": 1.3432406431071509, "learning_rate": 7.230505831304915e-06, "loss": 0.2854, "step": 14172 }, { "epoch": 0.41757140963127737, "grad_norm": 1.5985655434725075, "learning_rate": 7.230045584031731e-06, "loss": 0.4803, "step": 14173 }, { "epoch": 0.4176008720887409, "grad_norm": 1.585933572780814, "learning_rate": 7.229585313169805e-06, "loss": 0.4655, "step": 14174 }, { "epoch": 0.4176303345462045, "grad_norm": 1.4078393928546769, "learning_rate": 7.229125018724007e-06, "loss": 0.3856, "step": 14175 }, { "epoch": 0.41765979700366807, "grad_norm": 1.4499606995578438, "learning_rate": 7.228664700699203e-06, "loss": 0.4125, "step": 14176 }, { "epoch": 0.41768925946113167, "grad_norm": 1.4009693159473524, "learning_rate": 7.2282043591002615e-06, "loss": 0.3657, "step": 14177 }, { "epoch": 0.4177187219185952, "grad_norm": 1.5457070016280006, "learning_rate": 7.227743993932055e-06, "loss": 0.4439, "step": 14178 }, { "epoch": 0.4177481843760588, "grad_norm": 1.3229931012635276, "learning_rate": 7.227283605199451e-06, "loss": 0.3723, "step": 14179 }, { "epoch": 0.41777764683352236, "grad_norm": 1.3675796290247624, "learning_rate": 7.22682319290732e-06, "loss": 0.4082, "step": 14180 }, { "epoch": 0.41780710929098597, "grad_norm": 1.5727129260035546, "learning_rate": 7.226362757060532e-06, "loss": 0.5564, "step": 14181 }, { "epoch": 0.4178365717484495, "grad_norm": 1.5212769443591019, "learning_rate": 7.225902297663957e-06, "loss": 0.451, "step": 14182 }, { "epoch": 0.4178660342059131, "grad_norm": 1.3272791238281958, "learning_rate": 7.225441814722465e-06, "loss": 0.3764, "step": 14183 }, { "epoch": 0.4178954966633767, "grad_norm": 1.2892435284810986, "learning_rate": 7.224981308240929e-06, "loss": 0.3529, "step": 14184 }, { "epoch": 0.41792495912084027, "grad_norm": 1.3010786564231833, "learning_rate": 7.224520778224219e-06, "loss": 0.3662, "step": 14185 }, { "epoch": 0.41795442157830387, "grad_norm": 1.5610581225653195, "learning_rate": 7.224060224677205e-06, "loss": 0.4515, "step": 14186 }, { "epoch": 0.4179838840357674, "grad_norm": 1.7233654725714564, "learning_rate": 7.223599647604761e-06, "loss": 0.4792, "step": 14187 }, { "epoch": 0.418013346493231, "grad_norm": 1.313641770729678, "learning_rate": 7.223139047011755e-06, "loss": 0.3432, "step": 14188 }, { "epoch": 0.41804280895069457, "grad_norm": 1.538880182703993, "learning_rate": 7.222678422903064e-06, "loss": 0.5829, "step": 14189 }, { "epoch": 0.41807227140815817, "grad_norm": 1.515549450276201, "learning_rate": 7.222217775283558e-06, "loss": 0.4858, "step": 14190 }, { "epoch": 0.4181017338656217, "grad_norm": 1.418970763559927, "learning_rate": 7.221757104158109e-06, "loss": 0.5355, "step": 14191 }, { "epoch": 0.4181311963230853, "grad_norm": 1.5248446880439879, "learning_rate": 7.221296409531591e-06, "loss": 0.4844, "step": 14192 }, { "epoch": 0.41816065878054887, "grad_norm": 1.5670567054478013, "learning_rate": 7.220835691408876e-06, "loss": 0.4771, "step": 14193 }, { "epoch": 0.41819012123801247, "grad_norm": 1.6456498747593895, "learning_rate": 7.220374949794837e-06, "loss": 0.4165, "step": 14194 }, { "epoch": 0.418219583695476, "grad_norm": 1.5843278768888187, "learning_rate": 7.219914184694351e-06, "loss": 0.5496, "step": 14195 }, { "epoch": 0.4182490461529396, "grad_norm": 1.496900725477087, "learning_rate": 7.219453396112287e-06, "loss": 0.5358, "step": 14196 }, { "epoch": 0.4182785086104032, "grad_norm": 1.6573262033979228, "learning_rate": 7.218992584053522e-06, "loss": 0.3721, "step": 14197 }, { "epoch": 0.41830797106786677, "grad_norm": 1.5506373402971063, "learning_rate": 7.21853174852293e-06, "loss": 0.447, "step": 14198 }, { "epoch": 0.41833743352533037, "grad_norm": 1.4538299708616633, "learning_rate": 7.218070889525386e-06, "loss": 0.4336, "step": 14199 }, { "epoch": 0.4183668959827939, "grad_norm": 1.4394480994804, "learning_rate": 7.2176100070657625e-06, "loss": 0.4485, "step": 14200 }, { "epoch": 0.4183963584402575, "grad_norm": 1.4068925494854945, "learning_rate": 7.217149101148937e-06, "loss": 0.3793, "step": 14201 }, { "epoch": 0.41842582089772107, "grad_norm": 1.3455666992827044, "learning_rate": 7.216688171779785e-06, "loss": 0.2998, "step": 14202 }, { "epoch": 0.41845528335518467, "grad_norm": 1.451820477209432, "learning_rate": 7.216227218963179e-06, "loss": 0.4511, "step": 14203 }, { "epoch": 0.4184847458126482, "grad_norm": 1.5569380608398127, "learning_rate": 7.215766242703998e-06, "loss": 0.5273, "step": 14204 }, { "epoch": 0.4185142082701118, "grad_norm": 1.2319426635115684, "learning_rate": 7.215305243007117e-06, "loss": 0.3038, "step": 14205 }, { "epoch": 0.41854367072757537, "grad_norm": 1.5208102270696064, "learning_rate": 7.214844219877413e-06, "loss": 0.4882, "step": 14206 }, { "epoch": 0.41857313318503897, "grad_norm": 1.9411611129636206, "learning_rate": 7.21438317331976e-06, "loss": 0.5021, "step": 14207 }, { "epoch": 0.4186025956425025, "grad_norm": 1.5248037150099696, "learning_rate": 7.213922103339038e-06, "loss": 0.3914, "step": 14208 }, { "epoch": 0.4186320580999661, "grad_norm": 1.31497368481844, "learning_rate": 7.213461009940123e-06, "loss": 0.3729, "step": 14209 }, { "epoch": 0.4186615205574297, "grad_norm": 1.634050051764577, "learning_rate": 7.212999893127891e-06, "loss": 0.5383, "step": 14210 }, { "epoch": 0.41869098301489327, "grad_norm": 1.5236222532421067, "learning_rate": 7.212538752907221e-06, "loss": 0.4203, "step": 14211 }, { "epoch": 0.4187204454723569, "grad_norm": 1.4653107974559514, "learning_rate": 7.212077589282991e-06, "loss": 0.4089, "step": 14212 }, { "epoch": 0.4187499079298204, "grad_norm": 1.4389521705666852, "learning_rate": 7.211616402260077e-06, "loss": 0.416, "step": 14213 }, { "epoch": 0.418779370387284, "grad_norm": 1.3223020760122386, "learning_rate": 7.21115519184336e-06, "loss": 0.3937, "step": 14214 }, { "epoch": 0.41880883284474757, "grad_norm": 1.613755456432502, "learning_rate": 7.210693958037716e-06, "loss": 0.5577, "step": 14215 }, { "epoch": 0.41883829530221117, "grad_norm": 1.3834616125668542, "learning_rate": 7.210232700848025e-06, "loss": 0.4143, "step": 14216 }, { "epoch": 0.4188677577596747, "grad_norm": 1.5174416674527902, "learning_rate": 7.209771420279167e-06, "loss": 0.4084, "step": 14217 }, { "epoch": 0.4188972202171383, "grad_norm": 1.4191193591112128, "learning_rate": 7.209310116336021e-06, "loss": 0.4207, "step": 14218 }, { "epoch": 0.41892668267460187, "grad_norm": 1.6286874228453636, "learning_rate": 7.208848789023464e-06, "loss": 0.5944, "step": 14219 }, { "epoch": 0.41895614513206547, "grad_norm": 1.7028669884188887, "learning_rate": 7.2083874383463796e-06, "loss": 0.5382, "step": 14220 }, { "epoch": 0.418985607589529, "grad_norm": 1.6984034484297896, "learning_rate": 7.207926064309645e-06, "loss": 0.5962, "step": 14221 }, { "epoch": 0.4190150700469926, "grad_norm": 1.4671800632652054, "learning_rate": 7.207464666918142e-06, "loss": 0.4642, "step": 14222 }, { "epoch": 0.4190445325044562, "grad_norm": 1.3897828539134083, "learning_rate": 7.207003246176749e-06, "loss": 0.3653, "step": 14223 }, { "epoch": 0.41907399496191977, "grad_norm": 1.5862432498450438, "learning_rate": 7.20654180209035e-06, "loss": 0.402, "step": 14224 }, { "epoch": 0.4191034574193834, "grad_norm": 1.42967977140866, "learning_rate": 7.2060803346638235e-06, "loss": 0.4708, "step": 14225 }, { "epoch": 0.4191329198768469, "grad_norm": 1.3800200020973716, "learning_rate": 7.205618843902051e-06, "loss": 0.4089, "step": 14226 }, { "epoch": 0.4191623823343105, "grad_norm": 1.590661316999009, "learning_rate": 7.205157329809916e-06, "loss": 0.4262, "step": 14227 }, { "epoch": 0.41919184479177407, "grad_norm": 1.670635695284934, "learning_rate": 7.204695792392296e-06, "loss": 0.4739, "step": 14228 }, { "epoch": 0.4192213072492377, "grad_norm": 1.5122368097024388, "learning_rate": 7.2042342316540794e-06, "loss": 0.429, "step": 14229 }, { "epoch": 0.4192507697067012, "grad_norm": 1.6417684144908737, "learning_rate": 7.203772647600142e-06, "loss": 0.4635, "step": 14230 }, { "epoch": 0.4192802321641648, "grad_norm": 1.495571298134483, "learning_rate": 7.203311040235371e-06, "loss": 0.4591, "step": 14231 }, { "epoch": 0.41930969462162837, "grad_norm": 1.4869296788490927, "learning_rate": 7.2028494095646464e-06, "loss": 0.4306, "step": 14232 }, { "epoch": 0.419339157079092, "grad_norm": 1.611870904111766, "learning_rate": 7.202387755592852e-06, "loss": 0.4499, "step": 14233 }, { "epoch": 0.4193686195365555, "grad_norm": 1.533648477428772, "learning_rate": 7.201926078324871e-06, "loss": 0.3388, "step": 14234 }, { "epoch": 0.4193980819940191, "grad_norm": 1.598280143531657, "learning_rate": 7.201464377765588e-06, "loss": 0.4219, "step": 14235 }, { "epoch": 0.4194275444514827, "grad_norm": 1.5696321256059935, "learning_rate": 7.201002653919887e-06, "loss": 0.3968, "step": 14236 }, { "epoch": 0.4194570069089463, "grad_norm": 1.5251893615131626, "learning_rate": 7.2005409067926465e-06, "loss": 0.4472, "step": 14237 }, { "epoch": 0.4194864693664099, "grad_norm": 1.5323030840336074, "learning_rate": 7.200079136388758e-06, "loss": 0.361, "step": 14238 }, { "epoch": 0.4195159318238734, "grad_norm": 1.7266184001243408, "learning_rate": 7.199617342713102e-06, "loss": 0.539, "step": 14239 }, { "epoch": 0.419545394281337, "grad_norm": 1.326420838104971, "learning_rate": 7.1991555257705645e-06, "loss": 0.3708, "step": 14240 }, { "epoch": 0.4195748567388006, "grad_norm": 1.6544958085917798, "learning_rate": 7.19869368556603e-06, "loss": 0.5942, "step": 14241 }, { "epoch": 0.4196043191962642, "grad_norm": 1.861794253093361, "learning_rate": 7.198231822104384e-06, "loss": 0.5686, "step": 14242 }, { "epoch": 0.4196337816537277, "grad_norm": 1.5458774904959145, "learning_rate": 7.197769935390511e-06, "loss": 0.5048, "step": 14243 }, { "epoch": 0.4196632441111913, "grad_norm": 1.4803153787346286, "learning_rate": 7.197308025429297e-06, "loss": 0.5185, "step": 14244 }, { "epoch": 0.41969270656865487, "grad_norm": 1.5274514837047806, "learning_rate": 7.19684609222563e-06, "loss": 0.4207, "step": 14245 }, { "epoch": 0.4197221690261185, "grad_norm": 1.5574064344254304, "learning_rate": 7.196384135784393e-06, "loss": 0.4319, "step": 14246 }, { "epoch": 0.419751631483582, "grad_norm": 1.5671892198501478, "learning_rate": 7.195922156110475e-06, "loss": 0.4667, "step": 14247 }, { "epoch": 0.4197810939410456, "grad_norm": 1.6580440965261705, "learning_rate": 7.195460153208761e-06, "loss": 0.5302, "step": 14248 }, { "epoch": 0.4198105563985092, "grad_norm": 1.447596124319258, "learning_rate": 7.1949981270841386e-06, "loss": 0.3824, "step": 14249 }, { "epoch": 0.4198400188559728, "grad_norm": 1.4298285213074649, "learning_rate": 7.194536077741495e-06, "loss": 0.4515, "step": 14250 }, { "epoch": 0.4198694813134364, "grad_norm": 1.5888174069144916, "learning_rate": 7.194074005185719e-06, "loss": 0.448, "step": 14251 }, { "epoch": 0.4198989437708999, "grad_norm": 1.4019674048256878, "learning_rate": 7.193611909421695e-06, "loss": 0.4394, "step": 14252 }, { "epoch": 0.4199284062283635, "grad_norm": 1.6118266842370748, "learning_rate": 7.193149790454312e-06, "loss": 0.584, "step": 14253 }, { "epoch": 0.4199578686858271, "grad_norm": 1.606189950190657, "learning_rate": 7.192687648288461e-06, "loss": 0.47, "step": 14254 }, { "epoch": 0.4199873311432907, "grad_norm": 1.5355595193371918, "learning_rate": 7.1922254829290285e-06, "loss": 0.5039, "step": 14255 }, { "epoch": 0.4200167936007542, "grad_norm": 1.4184939738656859, "learning_rate": 7.191763294380901e-06, "loss": 0.4444, "step": 14256 }, { "epoch": 0.4200462560582178, "grad_norm": 1.5681663336716647, "learning_rate": 7.191301082648972e-06, "loss": 0.5192, "step": 14257 }, { "epoch": 0.4200757185156814, "grad_norm": 1.4140206345580097, "learning_rate": 7.1908388477381255e-06, "loss": 0.3861, "step": 14258 }, { "epoch": 0.420105180973145, "grad_norm": 1.4786513522915565, "learning_rate": 7.190376589653254e-06, "loss": 0.4841, "step": 14259 }, { "epoch": 0.4201346434306085, "grad_norm": 1.4655955140320267, "learning_rate": 7.1899143083992475e-06, "loss": 0.4787, "step": 14260 }, { "epoch": 0.4201641058880721, "grad_norm": 1.5600255730469972, "learning_rate": 7.189452003980994e-06, "loss": 0.4908, "step": 14261 }, { "epoch": 0.42019356834553573, "grad_norm": 1.6552911961664862, "learning_rate": 7.188989676403384e-06, "loss": 0.5946, "step": 14262 }, { "epoch": 0.4202230308029993, "grad_norm": 1.6937999293487682, "learning_rate": 7.188527325671308e-06, "loss": 0.453, "step": 14263 }, { "epoch": 0.4202524932604629, "grad_norm": 1.7727263317190378, "learning_rate": 7.188064951789657e-06, "loss": 0.4549, "step": 14264 }, { "epoch": 0.4202819557179264, "grad_norm": 1.5747507926564037, "learning_rate": 7.187602554763324e-06, "loss": 0.4831, "step": 14265 }, { "epoch": 0.42031141817539003, "grad_norm": 1.462424603396336, "learning_rate": 7.187140134597197e-06, "loss": 0.3277, "step": 14266 }, { "epoch": 0.4203408806328536, "grad_norm": 1.4209428203766115, "learning_rate": 7.1866776912961666e-06, "loss": 0.4581, "step": 14267 }, { "epoch": 0.4203703430903172, "grad_norm": 1.4682478477878893, "learning_rate": 7.1862152248651265e-06, "loss": 0.3964, "step": 14268 }, { "epoch": 0.4203998055477807, "grad_norm": 1.4852217540519603, "learning_rate": 7.185752735308968e-06, "loss": 0.4802, "step": 14269 }, { "epoch": 0.4204292680052443, "grad_norm": 1.4256885467221232, "learning_rate": 7.185290222632585e-06, "loss": 0.3448, "step": 14270 }, { "epoch": 0.4204587304627079, "grad_norm": 1.5870284359440827, "learning_rate": 7.184827686840866e-06, "loss": 0.539, "step": 14271 }, { "epoch": 0.4204881929201715, "grad_norm": 1.5656853782404105, "learning_rate": 7.184365127938707e-06, "loss": 0.5637, "step": 14272 }, { "epoch": 0.420517655377635, "grad_norm": 1.5799135528125108, "learning_rate": 7.183902545930999e-06, "loss": 0.6068, "step": 14273 }, { "epoch": 0.4205471178350986, "grad_norm": 1.6764050100711523, "learning_rate": 7.183439940822634e-06, "loss": 0.5018, "step": 14274 }, { "epoch": 0.42057658029256223, "grad_norm": 1.6235197404018253, "learning_rate": 7.182977312618509e-06, "loss": 0.5723, "step": 14275 }, { "epoch": 0.4206060427500258, "grad_norm": 1.468892906765832, "learning_rate": 7.182514661323513e-06, "loss": 0.425, "step": 14276 }, { "epoch": 0.4206355052074894, "grad_norm": 1.6667569530554933, "learning_rate": 7.182051986942543e-06, "loss": 0.6502, "step": 14277 }, { "epoch": 0.4206649676649529, "grad_norm": 1.5857285572323108, "learning_rate": 7.1815892894804926e-06, "loss": 0.6289, "step": 14278 }, { "epoch": 0.42069443012241653, "grad_norm": 1.5248319113024276, "learning_rate": 7.181126568942255e-06, "loss": 0.4968, "step": 14279 }, { "epoch": 0.4207238925798801, "grad_norm": 1.541035477658729, "learning_rate": 7.180663825332726e-06, "loss": 0.5181, "step": 14280 }, { "epoch": 0.4207533550373437, "grad_norm": 1.3749554813445313, "learning_rate": 7.180201058656799e-06, "loss": 0.4711, "step": 14281 }, { "epoch": 0.4207828174948072, "grad_norm": 1.5920757912083077, "learning_rate": 7.17973826891937e-06, "loss": 0.4591, "step": 14282 }, { "epoch": 0.42081227995227083, "grad_norm": 1.4397819184892118, "learning_rate": 7.179275456125332e-06, "loss": 0.5676, "step": 14283 }, { "epoch": 0.4208417424097344, "grad_norm": 1.5789786909699899, "learning_rate": 7.178812620279585e-06, "loss": 0.4563, "step": 14284 }, { "epoch": 0.420871204867198, "grad_norm": 1.5877171088841988, "learning_rate": 7.178349761387021e-06, "loss": 0.4761, "step": 14285 }, { "epoch": 0.4209006673246615, "grad_norm": 1.4852518559119021, "learning_rate": 7.1778868794525365e-06, "loss": 0.394, "step": 14286 }, { "epoch": 0.42093012978212513, "grad_norm": 1.3892195413499906, "learning_rate": 7.17742397448103e-06, "loss": 0.3764, "step": 14287 }, { "epoch": 0.42095959223958873, "grad_norm": 1.4390899029671596, "learning_rate": 7.176961046477395e-06, "loss": 0.4798, "step": 14288 }, { "epoch": 0.4209890546970523, "grad_norm": 1.3876070419696482, "learning_rate": 7.1764980954465306e-06, "loss": 0.3325, "step": 14289 }, { "epoch": 0.4210185171545159, "grad_norm": 1.4899954986439834, "learning_rate": 7.176035121393332e-06, "loss": 0.4319, "step": 14290 }, { "epoch": 0.42104797961197943, "grad_norm": 1.4672829161290175, "learning_rate": 7.175572124322696e-06, "loss": 0.4691, "step": 14291 }, { "epoch": 0.42107744206944303, "grad_norm": 1.7321442855288778, "learning_rate": 7.1751091042395216e-06, "loss": 0.6324, "step": 14292 }, { "epoch": 0.4211069045269066, "grad_norm": 1.4158007673192423, "learning_rate": 7.174646061148707e-06, "loss": 0.4087, "step": 14293 }, { "epoch": 0.4211363669843702, "grad_norm": 1.4407312940804344, "learning_rate": 7.174182995055149e-06, "loss": 0.4738, "step": 14294 }, { "epoch": 0.42116582944183373, "grad_norm": 1.5416733800028832, "learning_rate": 7.173719905963745e-06, "loss": 0.4957, "step": 14295 }, { "epoch": 0.42119529189929733, "grad_norm": 1.5158622546948624, "learning_rate": 7.173256793879395e-06, "loss": 0.4466, "step": 14296 }, { "epoch": 0.4212247543567609, "grad_norm": 1.4895780852749543, "learning_rate": 7.172793658806996e-06, "loss": 0.4791, "step": 14297 }, { "epoch": 0.4212542168142245, "grad_norm": 1.4310878047798663, "learning_rate": 7.172330500751449e-06, "loss": 0.4577, "step": 14298 }, { "epoch": 0.421283679271688, "grad_norm": 1.7274538593560356, "learning_rate": 7.171867319717652e-06, "loss": 0.478, "step": 14299 }, { "epoch": 0.42131314172915163, "grad_norm": 1.4714581771547253, "learning_rate": 7.171404115710504e-06, "loss": 0.3585, "step": 14300 }, { "epoch": 0.42134260418661523, "grad_norm": 1.5174126105524033, "learning_rate": 7.170940888734904e-06, "loss": 0.5158, "step": 14301 }, { "epoch": 0.4213720666440788, "grad_norm": 1.3847998241188062, "learning_rate": 7.170477638795754e-06, "loss": 0.4781, "step": 14302 }, { "epoch": 0.4214015291015424, "grad_norm": 1.721673112375321, "learning_rate": 7.170014365897953e-06, "loss": 0.4908, "step": 14303 }, { "epoch": 0.42143099155900593, "grad_norm": 1.5897323269861083, "learning_rate": 7.169551070046401e-06, "loss": 0.5288, "step": 14304 }, { "epoch": 0.42146045401646953, "grad_norm": 1.5036887714020657, "learning_rate": 7.1690877512459985e-06, "loss": 0.5155, "step": 14305 }, { "epoch": 0.4214899164739331, "grad_norm": 1.5156993194511075, "learning_rate": 7.168624409501649e-06, "loss": 0.4487, "step": 14306 }, { "epoch": 0.4215193789313967, "grad_norm": 1.4538523427158265, "learning_rate": 7.168161044818249e-06, "loss": 0.4203, "step": 14307 }, { "epoch": 0.42154884138886023, "grad_norm": 1.5255145913526782, "learning_rate": 7.167697657200703e-06, "loss": 0.4524, "step": 14308 }, { "epoch": 0.42157830384632383, "grad_norm": 1.5400180647493624, "learning_rate": 7.167234246653913e-06, "loss": 0.6294, "step": 14309 }, { "epoch": 0.4216077663037874, "grad_norm": 1.4340037430641475, "learning_rate": 7.1667708131827775e-06, "loss": 0.3952, "step": 14310 }, { "epoch": 0.421637228761251, "grad_norm": 1.50983636449805, "learning_rate": 7.1663073567922014e-06, "loss": 0.5136, "step": 14311 }, { "epoch": 0.42166669121871453, "grad_norm": 1.412474806421532, "learning_rate": 7.165843877487088e-06, "loss": 0.4635, "step": 14312 }, { "epoch": 0.42169615367617813, "grad_norm": 1.7401299268655832, "learning_rate": 7.165380375272335e-06, "loss": 0.4233, "step": 14313 }, { "epoch": 0.42172561613364173, "grad_norm": 1.489637291792537, "learning_rate": 7.164916850152851e-06, "loss": 0.5868, "step": 14314 }, { "epoch": 0.4217550785911053, "grad_norm": 1.3603898193118027, "learning_rate": 7.164453302133536e-06, "loss": 0.3327, "step": 14315 }, { "epoch": 0.4217845410485689, "grad_norm": 1.4435805282661733, "learning_rate": 7.163989731219292e-06, "loss": 0.4297, "step": 14316 }, { "epoch": 0.42181400350603243, "grad_norm": 1.4019379722781113, "learning_rate": 7.163526137415027e-06, "loss": 0.5234, "step": 14317 }, { "epoch": 0.42184346596349603, "grad_norm": 1.6685532079044456, "learning_rate": 7.163062520725638e-06, "loss": 0.6969, "step": 14318 }, { "epoch": 0.4218729284209596, "grad_norm": 1.5004105988764262, "learning_rate": 7.162598881156036e-06, "loss": 0.4509, "step": 14319 }, { "epoch": 0.4219023908784232, "grad_norm": 1.549952307358861, "learning_rate": 7.162135218711121e-06, "loss": 0.3765, "step": 14320 }, { "epoch": 0.42193185333588673, "grad_norm": 1.44252570366663, "learning_rate": 7.1616715333957985e-06, "loss": 0.3381, "step": 14321 }, { "epoch": 0.42196131579335033, "grad_norm": 1.3132093913564287, "learning_rate": 7.161207825214973e-06, "loss": 0.3398, "step": 14322 }, { "epoch": 0.4219907782508139, "grad_norm": 1.4832289459661754, "learning_rate": 7.16074409417355e-06, "loss": 0.3569, "step": 14323 }, { "epoch": 0.4220202407082775, "grad_norm": 1.536252623710691, "learning_rate": 7.160280340276435e-06, "loss": 0.5414, "step": 14324 }, { "epoch": 0.42204970316574103, "grad_norm": 1.8063462331860485, "learning_rate": 7.159816563528532e-06, "loss": 0.4127, "step": 14325 }, { "epoch": 0.42207916562320463, "grad_norm": 1.3993770825358918, "learning_rate": 7.159352763934749e-06, "loss": 0.4325, "step": 14326 }, { "epoch": 0.42210862808066824, "grad_norm": 1.7107235791936408, "learning_rate": 7.15888894149999e-06, "loss": 0.5078, "step": 14327 }, { "epoch": 0.4221380905381318, "grad_norm": 1.6855148395708444, "learning_rate": 7.15842509622916e-06, "loss": 0.6072, "step": 14328 }, { "epoch": 0.4221675529955954, "grad_norm": 1.3524732433530693, "learning_rate": 7.15796122812717e-06, "loss": 0.3563, "step": 14329 }, { "epoch": 0.42219701545305893, "grad_norm": 1.3936159350697006, "learning_rate": 7.157497337198922e-06, "loss": 0.3639, "step": 14330 }, { "epoch": 0.42222647791052254, "grad_norm": 1.3860437428411831, "learning_rate": 7.157033423449325e-06, "loss": 0.3311, "step": 14331 }, { "epoch": 0.4222559403679861, "grad_norm": 1.3827895293085712, "learning_rate": 7.156569486883285e-06, "loss": 0.3818, "step": 14332 }, { "epoch": 0.4222854028254497, "grad_norm": 1.4168822921997628, "learning_rate": 7.156105527505712e-06, "loss": 0.431, "step": 14333 }, { "epoch": 0.42231486528291323, "grad_norm": 1.508444165391125, "learning_rate": 7.155641545321511e-06, "loss": 0.4413, "step": 14334 }, { "epoch": 0.42234432774037683, "grad_norm": 1.4012974858106733, "learning_rate": 7.15517754033559e-06, "loss": 0.4008, "step": 14335 }, { "epoch": 0.4223737901978404, "grad_norm": 1.495478943887546, "learning_rate": 7.154713512552859e-06, "loss": 0.3227, "step": 14336 }, { "epoch": 0.422403252655304, "grad_norm": 1.7646005333017065, "learning_rate": 7.154249461978225e-06, "loss": 0.4816, "step": 14337 }, { "epoch": 0.42243271511276753, "grad_norm": 1.5535421404434568, "learning_rate": 7.153785388616594e-06, "loss": 0.4033, "step": 14338 }, { "epoch": 0.42246217757023113, "grad_norm": 1.5696780583016399, "learning_rate": 7.153321292472882e-06, "loss": 0.5028, "step": 14339 }, { "epoch": 0.42249164002769474, "grad_norm": 1.5149124745802136, "learning_rate": 7.15285717355199e-06, "loss": 0.4935, "step": 14340 }, { "epoch": 0.4225211024851583, "grad_norm": 1.6309450840188886, "learning_rate": 7.152393031858833e-06, "loss": 0.5542, "step": 14341 }, { "epoch": 0.4225505649426219, "grad_norm": 1.5454756256718027, "learning_rate": 7.151928867398318e-06, "loss": 0.3676, "step": 14342 }, { "epoch": 0.42258002740008543, "grad_norm": 1.5684255194474708, "learning_rate": 7.151464680175355e-06, "loss": 0.4626, "step": 14343 }, { "epoch": 0.42260948985754904, "grad_norm": 1.5433513970071484, "learning_rate": 7.151000470194854e-06, "loss": 0.5216, "step": 14344 }, { "epoch": 0.4226389523150126, "grad_norm": 1.2845044078103247, "learning_rate": 7.1505362374617274e-06, "loss": 0.3684, "step": 14345 }, { "epoch": 0.4226684147724762, "grad_norm": 1.4784839479749818, "learning_rate": 7.150071981980881e-06, "loss": 0.5418, "step": 14346 }, { "epoch": 0.42269787722993973, "grad_norm": 1.502627397499712, "learning_rate": 7.1496077037572295e-06, "loss": 0.4219, "step": 14347 }, { "epoch": 0.42272733968740334, "grad_norm": 1.570828112436545, "learning_rate": 7.149143402795684e-06, "loss": 0.5099, "step": 14348 }, { "epoch": 0.4227568021448669, "grad_norm": 1.4830400955927308, "learning_rate": 7.148679079101154e-06, "loss": 0.4478, "step": 14349 }, { "epoch": 0.4227862646023305, "grad_norm": 1.5592609919569793, "learning_rate": 7.148214732678551e-06, "loss": 0.4191, "step": 14350 }, { "epoch": 0.42281572705979403, "grad_norm": 1.7902160060550165, "learning_rate": 7.147750363532787e-06, "loss": 0.5784, "step": 14351 }, { "epoch": 0.42284518951725764, "grad_norm": 1.3529208756870261, "learning_rate": 7.147285971668775e-06, "loss": 0.4327, "step": 14352 }, { "epoch": 0.42287465197472124, "grad_norm": 1.441707930489096, "learning_rate": 7.1468215570914254e-06, "loss": 0.4754, "step": 14353 }, { "epoch": 0.4229041144321848, "grad_norm": 1.8230707921002114, "learning_rate": 7.1463571198056536e-06, "loss": 0.6916, "step": 14354 }, { "epoch": 0.4229335768896484, "grad_norm": 1.5889182143221645, "learning_rate": 7.145892659816369e-06, "loss": 0.533, "step": 14355 }, { "epoch": 0.42296303934711194, "grad_norm": 1.5417808598872453, "learning_rate": 7.145428177128485e-06, "loss": 0.4145, "step": 14356 }, { "epoch": 0.42299250180457554, "grad_norm": 1.600819745318043, "learning_rate": 7.1449636717469164e-06, "loss": 0.4714, "step": 14357 }, { "epoch": 0.4230219642620391, "grad_norm": 1.5590183563852573, "learning_rate": 7.144499143676575e-06, "loss": 0.4731, "step": 14358 }, { "epoch": 0.4230514267195027, "grad_norm": 1.5656778820752317, "learning_rate": 7.144034592922377e-06, "loss": 0.4419, "step": 14359 }, { "epoch": 0.42308088917696623, "grad_norm": 1.5596937141990745, "learning_rate": 7.143570019489235e-06, "loss": 0.4212, "step": 14360 }, { "epoch": 0.42311035163442984, "grad_norm": 1.5232333465886785, "learning_rate": 7.1431054233820594e-06, "loss": 0.5841, "step": 14361 }, { "epoch": 0.4231398140918934, "grad_norm": 1.616150339497027, "learning_rate": 7.142640804605769e-06, "loss": 0.4936, "step": 14362 }, { "epoch": 0.423169276549357, "grad_norm": 1.422105736297443, "learning_rate": 7.142176163165279e-06, "loss": 0.4997, "step": 14363 }, { "epoch": 0.42319873900682053, "grad_norm": 1.7570986103605248, "learning_rate": 7.141711499065501e-06, "loss": 0.4741, "step": 14364 }, { "epoch": 0.42322820146428414, "grad_norm": 1.7487986095930461, "learning_rate": 7.141246812311351e-06, "loss": 0.5149, "step": 14365 }, { "epoch": 0.42325766392174774, "grad_norm": 1.6470420545558693, "learning_rate": 7.1407821029077465e-06, "loss": 0.5319, "step": 14366 }, { "epoch": 0.4232871263792113, "grad_norm": 1.6931441269407725, "learning_rate": 7.140317370859599e-06, "loss": 0.5643, "step": 14367 }, { "epoch": 0.4233165888366749, "grad_norm": 1.7180260047102958, "learning_rate": 7.1398526161718275e-06, "loss": 0.5098, "step": 14368 }, { "epoch": 0.42334605129413844, "grad_norm": 1.6177710532982348, "learning_rate": 7.139387838849349e-06, "loss": 0.5679, "step": 14369 }, { "epoch": 0.42337551375160204, "grad_norm": 1.7066267491017224, "learning_rate": 7.138923038897075e-06, "loss": 0.5225, "step": 14370 }, { "epoch": 0.4234049762090656, "grad_norm": 1.4528796964258488, "learning_rate": 7.138458216319926e-06, "loss": 0.5324, "step": 14371 }, { "epoch": 0.4234344386665292, "grad_norm": 1.4248244208559886, "learning_rate": 7.137993371122819e-06, "loss": 0.4417, "step": 14372 }, { "epoch": 0.42346390112399274, "grad_norm": 1.4978180383581792, "learning_rate": 7.1375285033106676e-06, "loss": 0.4613, "step": 14373 }, { "epoch": 0.42349336358145634, "grad_norm": 1.6869752320095928, "learning_rate": 7.137063612888393e-06, "loss": 0.572, "step": 14374 }, { "epoch": 0.4235228260389199, "grad_norm": 1.3781063585680617, "learning_rate": 7.136598699860911e-06, "loss": 0.52, "step": 14375 }, { "epoch": 0.4235522884963835, "grad_norm": 1.4448971271438718, "learning_rate": 7.136133764233138e-06, "loss": 0.4355, "step": 14376 }, { "epoch": 0.42358175095384704, "grad_norm": 1.3421619700849206, "learning_rate": 7.135668806009992e-06, "loss": 0.3365, "step": 14377 }, { "epoch": 0.42361121341131064, "grad_norm": 1.430352690468124, "learning_rate": 7.135203825196394e-06, "loss": 0.4901, "step": 14378 }, { "epoch": 0.42364067586877424, "grad_norm": 1.567581673331265, "learning_rate": 7.13473882179726e-06, "loss": 0.4279, "step": 14379 }, { "epoch": 0.4236701383262378, "grad_norm": 1.5544500097888472, "learning_rate": 7.13427379581751e-06, "loss": 0.577, "step": 14380 }, { "epoch": 0.4236996007837014, "grad_norm": 1.4951262995412824, "learning_rate": 7.133808747262061e-06, "loss": 0.3553, "step": 14381 }, { "epoch": 0.42372906324116494, "grad_norm": 1.5333557717418387, "learning_rate": 7.133343676135834e-06, "loss": 0.4731, "step": 14382 }, { "epoch": 0.42375852569862854, "grad_norm": 1.7043187500859729, "learning_rate": 7.132878582443747e-06, "loss": 0.402, "step": 14383 }, { "epoch": 0.4237879881560921, "grad_norm": 1.609992433152114, "learning_rate": 7.132413466190723e-06, "loss": 0.5509, "step": 14384 }, { "epoch": 0.4238174506135557, "grad_norm": 1.6358661507500039, "learning_rate": 7.131948327381677e-06, "loss": 0.5113, "step": 14385 }, { "epoch": 0.42384691307101924, "grad_norm": 1.652906415627864, "learning_rate": 7.131483166021531e-06, "loss": 0.5061, "step": 14386 }, { "epoch": 0.42387637552848284, "grad_norm": 1.4611668358245775, "learning_rate": 7.131017982115207e-06, "loss": 0.4541, "step": 14387 }, { "epoch": 0.4239058379859464, "grad_norm": 1.5738390089443492, "learning_rate": 7.130552775667623e-06, "loss": 0.4827, "step": 14388 }, { "epoch": 0.42393530044341, "grad_norm": 1.496615446242689, "learning_rate": 7.130087546683701e-06, "loss": 0.5042, "step": 14389 }, { "epoch": 0.42396476290087354, "grad_norm": 1.6324525010237163, "learning_rate": 7.129622295168364e-06, "loss": 0.3992, "step": 14390 }, { "epoch": 0.42399422535833714, "grad_norm": 1.3723606898461136, "learning_rate": 7.129157021126529e-06, "loss": 0.3793, "step": 14391 }, { "epoch": 0.42402368781580074, "grad_norm": 1.3015271887668434, "learning_rate": 7.128691724563121e-06, "loss": 0.4206, "step": 14392 }, { "epoch": 0.4240531502732643, "grad_norm": 1.5186234422773233, "learning_rate": 7.128226405483062e-06, "loss": 0.4922, "step": 14393 }, { "epoch": 0.4240826127307279, "grad_norm": 1.3478062974349436, "learning_rate": 7.127761063891272e-06, "loss": 0.354, "step": 14394 }, { "epoch": 0.42411207518819144, "grad_norm": 1.457957397996869, "learning_rate": 7.127295699792672e-06, "loss": 0.3321, "step": 14395 }, { "epoch": 0.42414153764565504, "grad_norm": 1.531378262093911, "learning_rate": 7.126830313192188e-06, "loss": 0.2607, "step": 14396 }, { "epoch": 0.4241710001031186, "grad_norm": 1.5941018681720465, "learning_rate": 7.1263649040947405e-06, "loss": 0.5555, "step": 14397 }, { "epoch": 0.4242004625605822, "grad_norm": 1.750476422980042, "learning_rate": 7.125899472505253e-06, "loss": 0.5797, "step": 14398 }, { "epoch": 0.42422992501804574, "grad_norm": 1.5590007863294268, "learning_rate": 7.1254340184286494e-06, "loss": 0.4266, "step": 14399 }, { "epoch": 0.42425938747550934, "grad_norm": 1.5721417611859958, "learning_rate": 7.124968541869853e-06, "loss": 0.3898, "step": 14400 }, { "epoch": 0.4242888499329729, "grad_norm": 1.495104197415685, "learning_rate": 7.124503042833786e-06, "loss": 0.3612, "step": 14401 }, { "epoch": 0.4243183123904365, "grad_norm": 1.641518514896944, "learning_rate": 7.124037521325374e-06, "loss": 0.5229, "step": 14402 }, { "epoch": 0.42434777484790004, "grad_norm": 1.4581117276061875, "learning_rate": 7.123571977349539e-06, "loss": 0.4561, "step": 14403 }, { "epoch": 0.42437723730536364, "grad_norm": 1.4493457910892409, "learning_rate": 7.123106410911209e-06, "loss": 0.4853, "step": 14404 }, { "epoch": 0.42440669976282724, "grad_norm": 1.5135964422717398, "learning_rate": 7.122640822015305e-06, "loss": 0.4227, "step": 14405 }, { "epoch": 0.4244361622202908, "grad_norm": 1.5515351747513835, "learning_rate": 7.122175210666753e-06, "loss": 0.4195, "step": 14406 }, { "epoch": 0.4244656246777544, "grad_norm": 1.492844023633781, "learning_rate": 7.121709576870479e-06, "loss": 0.4859, "step": 14407 }, { "epoch": 0.42449508713521794, "grad_norm": 1.4904690131633491, "learning_rate": 7.121243920631408e-06, "loss": 0.4386, "step": 14408 }, { "epoch": 0.42452454959268154, "grad_norm": 1.7619978370524765, "learning_rate": 7.1207782419544645e-06, "loss": 0.4371, "step": 14409 }, { "epoch": 0.4245540120501451, "grad_norm": 1.503979405370692, "learning_rate": 7.120312540844575e-06, "loss": 0.3959, "step": 14410 }, { "epoch": 0.4245834745076087, "grad_norm": 1.4743628723180453, "learning_rate": 7.119846817306667e-06, "loss": 0.4421, "step": 14411 }, { "epoch": 0.42461293696507224, "grad_norm": 1.4081287549176036, "learning_rate": 7.119381071345664e-06, "loss": 0.4097, "step": 14412 }, { "epoch": 0.42464239942253584, "grad_norm": 1.5787061495452215, "learning_rate": 7.118915302966494e-06, "loss": 0.5287, "step": 14413 }, { "epoch": 0.4246718618799994, "grad_norm": 1.4318792139579446, "learning_rate": 7.118449512174083e-06, "loss": 0.5082, "step": 14414 }, { "epoch": 0.424701324337463, "grad_norm": 1.4588585407092307, "learning_rate": 7.1179836989733605e-06, "loss": 0.4518, "step": 14415 }, { "epoch": 0.42473078679492654, "grad_norm": 1.4089399797658033, "learning_rate": 7.117517863369252e-06, "loss": 0.5517, "step": 14416 }, { "epoch": 0.42476024925239014, "grad_norm": 1.4932112317707067, "learning_rate": 7.117052005366683e-06, "loss": 0.4024, "step": 14417 }, { "epoch": 0.42478971170985375, "grad_norm": 1.6220399572024562, "learning_rate": 7.116586124970584e-06, "loss": 0.492, "step": 14418 }, { "epoch": 0.4248191741673173, "grad_norm": 1.5572198995997932, "learning_rate": 7.1161202221858825e-06, "loss": 0.4994, "step": 14419 }, { "epoch": 0.4248486366247809, "grad_norm": 1.6794886442531796, "learning_rate": 7.115654297017506e-06, "loss": 0.5778, "step": 14420 }, { "epoch": 0.42487809908224444, "grad_norm": 1.370009109600376, "learning_rate": 7.115188349470383e-06, "loss": 0.3743, "step": 14421 }, { "epoch": 0.42490756153970805, "grad_norm": 1.706168909580945, "learning_rate": 7.114722379549442e-06, "loss": 0.5011, "step": 14422 }, { "epoch": 0.4249370239971716, "grad_norm": 1.5809588418595426, "learning_rate": 7.114256387259612e-06, "loss": 0.6038, "step": 14423 }, { "epoch": 0.4249664864546352, "grad_norm": 1.5153527421641588, "learning_rate": 7.113790372605824e-06, "loss": 0.4669, "step": 14424 }, { "epoch": 0.42499594891209874, "grad_norm": 1.4812237138955633, "learning_rate": 7.1133243355930035e-06, "loss": 0.4495, "step": 14425 }, { "epoch": 0.42502541136956234, "grad_norm": 1.4438455680076154, "learning_rate": 7.112858276226082e-06, "loss": 0.4818, "step": 14426 }, { "epoch": 0.4250548738270259, "grad_norm": 1.5019593392288975, "learning_rate": 7.112392194509992e-06, "loss": 0.422, "step": 14427 }, { "epoch": 0.4250843362844895, "grad_norm": 1.4449770280950307, "learning_rate": 7.111926090449659e-06, "loss": 0.5646, "step": 14428 }, { "epoch": 0.42511379874195304, "grad_norm": 1.3874653967953754, "learning_rate": 7.111459964050016e-06, "loss": 0.4656, "step": 14429 }, { "epoch": 0.42514326119941664, "grad_norm": 1.2974738417568041, "learning_rate": 7.110993815315994e-06, "loss": 0.3857, "step": 14430 }, { "epoch": 0.42517272365688025, "grad_norm": 1.421344758980772, "learning_rate": 7.1105276442525215e-06, "loss": 0.4062, "step": 14431 }, { "epoch": 0.4252021861143438, "grad_norm": 1.3308682954233295, "learning_rate": 7.110061450864531e-06, "loss": 0.3873, "step": 14432 }, { "epoch": 0.4252316485718074, "grad_norm": 1.4445968273171026, "learning_rate": 7.109595235156954e-06, "loss": 0.4959, "step": 14433 }, { "epoch": 0.42526111102927094, "grad_norm": 1.6867066368337402, "learning_rate": 7.109128997134722e-06, "loss": 0.6032, "step": 14434 }, { "epoch": 0.42529057348673455, "grad_norm": 1.3118003731827772, "learning_rate": 7.108662736802765e-06, "loss": 0.4457, "step": 14435 }, { "epoch": 0.4253200359441981, "grad_norm": 1.4296054966154206, "learning_rate": 7.108196454166017e-06, "loss": 0.485, "step": 14436 }, { "epoch": 0.4253494984016617, "grad_norm": 1.7556238740471504, "learning_rate": 7.1077301492294084e-06, "loss": 0.5072, "step": 14437 }, { "epoch": 0.42537896085912524, "grad_norm": 1.3594232712227388, "learning_rate": 7.107263821997874e-06, "loss": 0.4573, "step": 14438 }, { "epoch": 0.42540842331658885, "grad_norm": 1.4545008420758518, "learning_rate": 7.106797472476345e-06, "loss": 0.3877, "step": 14439 }, { "epoch": 0.4254378857740524, "grad_norm": 1.776130523908483, "learning_rate": 7.106331100669753e-06, "loss": 0.5061, "step": 14440 }, { "epoch": 0.425467348231516, "grad_norm": 1.8646077945671342, "learning_rate": 7.105864706583034e-06, "loss": 0.4699, "step": 14441 }, { "epoch": 0.42549681068897954, "grad_norm": 1.4516442434246677, "learning_rate": 7.10539829022112e-06, "loss": 0.3649, "step": 14442 }, { "epoch": 0.42552627314644315, "grad_norm": 1.5624138267722099, "learning_rate": 7.104931851588942e-06, "loss": 0.361, "step": 14443 }, { "epoch": 0.42555573560390675, "grad_norm": 1.4850413163016765, "learning_rate": 7.1044653906914384e-06, "loss": 0.4414, "step": 14444 }, { "epoch": 0.4255851980613703, "grad_norm": 1.5271243315996454, "learning_rate": 7.103998907533542e-06, "loss": 0.3972, "step": 14445 }, { "epoch": 0.4256146605188339, "grad_norm": 1.5922365460668737, "learning_rate": 7.103532402120185e-06, "loss": 0.3566, "step": 14446 }, { "epoch": 0.42564412297629745, "grad_norm": 1.4732992315357598, "learning_rate": 7.103065874456303e-06, "loss": 0.425, "step": 14447 }, { "epoch": 0.42567358543376105, "grad_norm": 1.7799361467716943, "learning_rate": 7.102599324546833e-06, "loss": 0.4585, "step": 14448 }, { "epoch": 0.4257030478912246, "grad_norm": 1.5565186211207105, "learning_rate": 7.1021327523967066e-06, "loss": 0.5411, "step": 14449 }, { "epoch": 0.4257325103486882, "grad_norm": 1.4016880267354128, "learning_rate": 7.10166615801086e-06, "loss": 0.4323, "step": 14450 }, { "epoch": 0.42576197280615175, "grad_norm": 1.4878502374941243, "learning_rate": 7.101199541394232e-06, "loss": 0.418, "step": 14451 }, { "epoch": 0.42579143526361535, "grad_norm": 1.4593086897778986, "learning_rate": 7.100732902551753e-06, "loss": 0.4303, "step": 14452 }, { "epoch": 0.4258208977210789, "grad_norm": 1.508144062751396, "learning_rate": 7.100266241488363e-06, "loss": 0.6068, "step": 14453 }, { "epoch": 0.4258503601785425, "grad_norm": 1.6394137381854463, "learning_rate": 7.0997995582089966e-06, "loss": 0.4015, "step": 14454 }, { "epoch": 0.42587982263600604, "grad_norm": 1.661961871714157, "learning_rate": 7.099332852718589e-06, "loss": 0.4618, "step": 14455 }, { "epoch": 0.42590928509346965, "grad_norm": 1.4539749199890686, "learning_rate": 7.098866125022079e-06, "loss": 0.4067, "step": 14456 }, { "epoch": 0.42593874755093325, "grad_norm": 1.482665550749871, "learning_rate": 7.0983993751244025e-06, "loss": 0.567, "step": 14457 }, { "epoch": 0.4259682100083968, "grad_norm": 1.3505716246621713, "learning_rate": 7.0979326030304975e-06, "loss": 0.3472, "step": 14458 }, { "epoch": 0.4259976724658604, "grad_norm": 1.4291390152505552, "learning_rate": 7.097465808745302e-06, "loss": 0.4549, "step": 14459 }, { "epoch": 0.42602713492332395, "grad_norm": 1.6479657214291272, "learning_rate": 7.0969989922737514e-06, "loss": 0.4994, "step": 14460 }, { "epoch": 0.42605659738078755, "grad_norm": 1.3733721678796658, "learning_rate": 7.096532153620784e-06, "loss": 0.3878, "step": 14461 }, { "epoch": 0.4260860598382511, "grad_norm": 1.4331532672720915, "learning_rate": 7.096065292791338e-06, "loss": 0.5454, "step": 14462 }, { "epoch": 0.4261155222957147, "grad_norm": 1.4979966670591414, "learning_rate": 7.095598409790354e-06, "loss": 0.5203, "step": 14463 }, { "epoch": 0.42614498475317825, "grad_norm": 1.4373675792271676, "learning_rate": 7.095131504622767e-06, "loss": 0.6054, "step": 14464 }, { "epoch": 0.42617444721064185, "grad_norm": 1.5591195288353217, "learning_rate": 7.094664577293518e-06, "loss": 0.4428, "step": 14465 }, { "epoch": 0.4262039096681054, "grad_norm": 1.398923680948957, "learning_rate": 7.0941976278075466e-06, "loss": 0.4724, "step": 14466 }, { "epoch": 0.426233372125569, "grad_norm": 1.7586978070797645, "learning_rate": 7.093730656169788e-06, "loss": 0.5331, "step": 14467 }, { "epoch": 0.42626283458303255, "grad_norm": 1.6686792599797107, "learning_rate": 7.0932636623851866e-06, "loss": 0.5849, "step": 14468 }, { "epoch": 0.42629229704049615, "grad_norm": 1.599462146839458, "learning_rate": 7.092796646458682e-06, "loss": 0.5935, "step": 14469 }, { "epoch": 0.42632175949795975, "grad_norm": 1.577599720496795, "learning_rate": 7.092329608395208e-06, "loss": 0.4682, "step": 14470 }, { "epoch": 0.4263512219554233, "grad_norm": 1.4983220932928811, "learning_rate": 7.091862548199712e-06, "loss": 0.4761, "step": 14471 }, { "epoch": 0.4263806844128869, "grad_norm": 1.7275042943524546, "learning_rate": 7.091395465877131e-06, "loss": 0.4395, "step": 14472 }, { "epoch": 0.42641014687035045, "grad_norm": 1.6717588398883243, "learning_rate": 7.090928361432405e-06, "loss": 0.4885, "step": 14473 }, { "epoch": 0.42643960932781405, "grad_norm": 1.5247218583211866, "learning_rate": 7.090461234870478e-06, "loss": 0.351, "step": 14474 }, { "epoch": 0.4264690717852776, "grad_norm": 1.7331484924763583, "learning_rate": 7.089994086196288e-06, "loss": 0.3871, "step": 14475 }, { "epoch": 0.4264985342427412, "grad_norm": 1.3829539050619775, "learning_rate": 7.089526915414778e-06, "loss": 0.4519, "step": 14476 }, { "epoch": 0.42652799670020475, "grad_norm": 1.381609668732052, "learning_rate": 7.089059722530888e-06, "loss": 0.4522, "step": 14477 }, { "epoch": 0.42655745915766835, "grad_norm": 1.5538449034566262, "learning_rate": 7.088592507549561e-06, "loss": 0.3527, "step": 14478 }, { "epoch": 0.4265869216151319, "grad_norm": 1.6282655966595854, "learning_rate": 7.088125270475741e-06, "loss": 0.4945, "step": 14479 }, { "epoch": 0.4266163840725955, "grad_norm": 1.75127595453566, "learning_rate": 7.0876580113143654e-06, "loss": 0.4625, "step": 14480 }, { "epoch": 0.42664584653005905, "grad_norm": 1.415650303121633, "learning_rate": 7.087190730070381e-06, "loss": 0.3734, "step": 14481 }, { "epoch": 0.42667530898752265, "grad_norm": 1.419043016122698, "learning_rate": 7.086723426748729e-06, "loss": 0.4361, "step": 14482 }, { "epoch": 0.42670477144498625, "grad_norm": 1.5575853474603916, "learning_rate": 7.086256101354353e-06, "loss": 0.3653, "step": 14483 }, { "epoch": 0.4267342339024498, "grad_norm": 1.3859212544389001, "learning_rate": 7.085788753892197e-06, "loss": 0.3619, "step": 14484 }, { "epoch": 0.4267636963599134, "grad_norm": 1.40582343351101, "learning_rate": 7.085321384367201e-06, "loss": 0.4136, "step": 14485 }, { "epoch": 0.42679315881737695, "grad_norm": 1.4180183049969168, "learning_rate": 7.084853992784311e-06, "loss": 0.5047, "step": 14486 }, { "epoch": 0.42682262127484055, "grad_norm": 1.6679602937805418, "learning_rate": 7.084386579148472e-06, "loss": 0.4952, "step": 14487 }, { "epoch": 0.4268520837323041, "grad_norm": 1.4380129319617223, "learning_rate": 7.083919143464626e-06, "loss": 0.3835, "step": 14488 }, { "epoch": 0.4268815461897677, "grad_norm": 1.6485733909339437, "learning_rate": 7.083451685737719e-06, "loss": 0.5427, "step": 14489 }, { "epoch": 0.42691100864723125, "grad_norm": 1.4232778228687302, "learning_rate": 7.082984205972695e-06, "loss": 0.4055, "step": 14490 }, { "epoch": 0.42694047110469485, "grad_norm": 1.4099068256913947, "learning_rate": 7.082516704174498e-06, "loss": 0.4194, "step": 14491 }, { "epoch": 0.4269699335621584, "grad_norm": 1.6870675256377874, "learning_rate": 7.082049180348075e-06, "loss": 0.5223, "step": 14492 }, { "epoch": 0.426999396019622, "grad_norm": 1.5863335630516675, "learning_rate": 7.08158163449837e-06, "loss": 0.469, "step": 14493 }, { "epoch": 0.42702885847708555, "grad_norm": 1.4406851119059605, "learning_rate": 7.08111406663033e-06, "loss": 0.5078, "step": 14494 }, { "epoch": 0.42705832093454915, "grad_norm": 1.5785277949106495, "learning_rate": 7.080646476748898e-06, "loss": 0.5188, "step": 14495 }, { "epoch": 0.42708778339201275, "grad_norm": 1.622150835569991, "learning_rate": 7.080178864859022e-06, "loss": 0.4879, "step": 14496 }, { "epoch": 0.4271172458494763, "grad_norm": 1.6892754806488124, "learning_rate": 7.079711230965648e-06, "loss": 0.3876, "step": 14497 }, { "epoch": 0.4271467083069399, "grad_norm": 1.4552000991750738, "learning_rate": 7.0792435750737225e-06, "loss": 0.3988, "step": 14498 }, { "epoch": 0.42717617076440345, "grad_norm": 1.6307590186996983, "learning_rate": 7.078775897188193e-06, "loss": 0.4214, "step": 14499 }, { "epoch": 0.42720563322186705, "grad_norm": 1.6653893153372434, "learning_rate": 7.078308197314004e-06, "loss": 0.503, "step": 14500 }, { "epoch": 0.4272350956793306, "grad_norm": 1.5514789493488546, "learning_rate": 7.077840475456105e-06, "loss": 0.4543, "step": 14501 }, { "epoch": 0.4272645581367942, "grad_norm": 1.5046204596985213, "learning_rate": 7.077372731619443e-06, "loss": 0.4987, "step": 14502 }, { "epoch": 0.42729402059425775, "grad_norm": 1.4235970425525586, "learning_rate": 7.076904965808964e-06, "loss": 0.3518, "step": 14503 }, { "epoch": 0.42732348305172135, "grad_norm": 1.6020368286596278, "learning_rate": 7.0764371780296196e-06, "loss": 0.5679, "step": 14504 }, { "epoch": 0.4273529455091849, "grad_norm": 1.4509664114021883, "learning_rate": 7.075969368286354e-06, "loss": 0.4655, "step": 14505 }, { "epoch": 0.4273824079666485, "grad_norm": 1.4623430523690955, "learning_rate": 7.075501536584117e-06, "loss": 0.4179, "step": 14506 }, { "epoch": 0.42741187042411205, "grad_norm": 1.7119109815996072, "learning_rate": 7.075033682927857e-06, "loss": 0.5312, "step": 14507 }, { "epoch": 0.42744133288157565, "grad_norm": 1.376777842011053, "learning_rate": 7.074565807322524e-06, "loss": 0.3853, "step": 14508 }, { "epoch": 0.42747079533903926, "grad_norm": 1.585594228493043, "learning_rate": 7.074097909773066e-06, "loss": 0.5269, "step": 14509 }, { "epoch": 0.4275002577965028, "grad_norm": 1.3777647764807959, "learning_rate": 7.073629990284432e-06, "loss": 0.3407, "step": 14510 }, { "epoch": 0.4275297202539664, "grad_norm": 1.416375044976593, "learning_rate": 7.073162048861573e-06, "loss": 0.4862, "step": 14511 }, { "epoch": 0.42755918271142995, "grad_norm": 1.4289046480309202, "learning_rate": 7.072694085509436e-06, "loss": 0.3178, "step": 14512 }, { "epoch": 0.42758864516889356, "grad_norm": 1.4925949653673878, "learning_rate": 7.072226100232973e-06, "loss": 0.3324, "step": 14513 }, { "epoch": 0.4276181076263571, "grad_norm": 1.732846880110616, "learning_rate": 7.071758093037134e-06, "loss": 0.3918, "step": 14514 }, { "epoch": 0.4276475700838207, "grad_norm": 1.3082637360648381, "learning_rate": 7.07129006392687e-06, "loss": 0.4101, "step": 14515 }, { "epoch": 0.42767703254128425, "grad_norm": 1.4582397726571208, "learning_rate": 7.070822012907131e-06, "loss": 0.4259, "step": 14516 }, { "epoch": 0.42770649499874785, "grad_norm": 1.5069023949130098, "learning_rate": 7.070353939982868e-06, "loss": 0.4941, "step": 14517 }, { "epoch": 0.4277359574562114, "grad_norm": 1.5284091977012926, "learning_rate": 7.0698858451590305e-06, "loss": 0.4902, "step": 14518 }, { "epoch": 0.427765419913675, "grad_norm": 1.6564087944558874, "learning_rate": 7.069417728440573e-06, "loss": 0.4706, "step": 14519 }, { "epoch": 0.42779488237113855, "grad_norm": 1.6244506672372518, "learning_rate": 7.068949589832445e-06, "loss": 0.5402, "step": 14520 }, { "epoch": 0.42782434482860215, "grad_norm": 1.385867130314223, "learning_rate": 7.068481429339598e-06, "loss": 0.4006, "step": 14521 }, { "epoch": 0.42785380728606576, "grad_norm": 1.5257285713341915, "learning_rate": 7.068013246966984e-06, "loss": 0.6206, "step": 14522 }, { "epoch": 0.4278832697435293, "grad_norm": 1.5412099774288892, "learning_rate": 7.067545042719558e-06, "loss": 0.2562, "step": 14523 }, { "epoch": 0.4279127322009929, "grad_norm": 1.4736680462020264, "learning_rate": 7.06707681660227e-06, "loss": 0.3653, "step": 14524 }, { "epoch": 0.42794219465845645, "grad_norm": 1.465504277157401, "learning_rate": 7.066608568620074e-06, "loss": 0.4301, "step": 14525 }, { "epoch": 0.42797165711592006, "grad_norm": 1.4712416524109035, "learning_rate": 7.066140298777921e-06, "loss": 0.5649, "step": 14526 }, { "epoch": 0.4280011195733836, "grad_norm": 1.302198836956887, "learning_rate": 7.065672007080766e-06, "loss": 0.339, "step": 14527 }, { "epoch": 0.4280305820308472, "grad_norm": 1.6051135017817497, "learning_rate": 7.0652036935335624e-06, "loss": 0.5988, "step": 14528 }, { "epoch": 0.42806004448831075, "grad_norm": 1.4231211116404427, "learning_rate": 7.064735358141264e-06, "loss": 0.4638, "step": 14529 }, { "epoch": 0.42808950694577436, "grad_norm": 1.6105421312589128, "learning_rate": 7.064267000908824e-06, "loss": 0.6196, "step": 14530 }, { "epoch": 0.4281189694032379, "grad_norm": 1.7696824102888566, "learning_rate": 7.063798621841196e-06, "loss": 0.382, "step": 14531 }, { "epoch": 0.4281484318607015, "grad_norm": 1.6047982275930714, "learning_rate": 7.063330220943336e-06, "loss": 0.4577, "step": 14532 }, { "epoch": 0.42817789431816505, "grad_norm": 1.4929602302572131, "learning_rate": 7.0628617982201975e-06, "loss": 0.4199, "step": 14533 }, { "epoch": 0.42820735677562866, "grad_norm": 1.6248953456886304, "learning_rate": 7.0623933536767355e-06, "loss": 0.5259, "step": 14534 }, { "epoch": 0.42823681923309226, "grad_norm": 1.406061722323351, "learning_rate": 7.0619248873179044e-06, "loss": 0.3684, "step": 14535 }, { "epoch": 0.4282662816905558, "grad_norm": 1.714620869258376, "learning_rate": 7.061456399148662e-06, "loss": 0.4878, "step": 14536 }, { "epoch": 0.4282957441480194, "grad_norm": 1.451928755027098, "learning_rate": 7.0609878891739605e-06, "loss": 0.3968, "step": 14537 }, { "epoch": 0.42832520660548296, "grad_norm": 1.5723737953325483, "learning_rate": 7.060519357398759e-06, "loss": 0.4778, "step": 14538 }, { "epoch": 0.42835466906294656, "grad_norm": 1.5175388711283773, "learning_rate": 7.06005080382801e-06, "loss": 0.4585, "step": 14539 }, { "epoch": 0.4283841315204101, "grad_norm": 1.4117915971056036, "learning_rate": 7.0595822284666716e-06, "loss": 0.4715, "step": 14540 }, { "epoch": 0.4284135939778737, "grad_norm": 1.728041741424681, "learning_rate": 7.059113631319701e-06, "loss": 0.5292, "step": 14541 }, { "epoch": 0.42844305643533726, "grad_norm": 1.4321649270179768, "learning_rate": 7.0586450123920525e-06, "loss": 0.4668, "step": 14542 }, { "epoch": 0.42847251889280086, "grad_norm": 1.4556874615333737, "learning_rate": 7.058176371688684e-06, "loss": 0.3823, "step": 14543 }, { "epoch": 0.4285019813502644, "grad_norm": 1.9483595823577586, "learning_rate": 7.057707709214555e-06, "loss": 0.4841, "step": 14544 }, { "epoch": 0.428531443807728, "grad_norm": 1.3646158898712384, "learning_rate": 7.057239024974622e-06, "loss": 0.3307, "step": 14545 }, { "epoch": 0.42856090626519155, "grad_norm": 1.2856300290612115, "learning_rate": 7.056770318973838e-06, "loss": 0.3843, "step": 14546 }, { "epoch": 0.42859036872265516, "grad_norm": 1.4172592392446224, "learning_rate": 7.0563015912171674e-06, "loss": 0.481, "step": 14547 }, { "epoch": 0.42861983118011876, "grad_norm": 1.422923297542577, "learning_rate": 7.055832841709563e-06, "loss": 0.5112, "step": 14548 }, { "epoch": 0.4286492936375823, "grad_norm": 1.509622413471682, "learning_rate": 7.055364070455988e-06, "loss": 0.5588, "step": 14549 }, { "epoch": 0.4286787560950459, "grad_norm": 1.3605882509638898, "learning_rate": 7.0548952774613955e-06, "loss": 0.3967, "step": 14550 }, { "epoch": 0.42870821855250946, "grad_norm": 1.5169130650161895, "learning_rate": 7.054426462730748e-06, "loss": 0.5214, "step": 14551 }, { "epoch": 0.42873768100997306, "grad_norm": 1.5005188050476226, "learning_rate": 7.053957626269003e-06, "loss": 0.4102, "step": 14552 }, { "epoch": 0.4287671434674366, "grad_norm": 1.5503944464959456, "learning_rate": 7.053488768081121e-06, "loss": 0.6143, "step": 14553 }, { "epoch": 0.4287966059249002, "grad_norm": 1.5757940233222194, "learning_rate": 7.053019888172061e-06, "loss": 0.6126, "step": 14554 }, { "epoch": 0.42882606838236376, "grad_norm": 1.487955676607095, "learning_rate": 7.0525509865467825e-06, "loss": 0.4868, "step": 14555 }, { "epoch": 0.42885553083982736, "grad_norm": 1.4115200139206683, "learning_rate": 7.052082063210244e-06, "loss": 0.4262, "step": 14556 }, { "epoch": 0.4288849932972909, "grad_norm": 1.556672163551556, "learning_rate": 7.0516131181674075e-06, "loss": 0.5261, "step": 14557 }, { "epoch": 0.4289144557547545, "grad_norm": 1.511101819355442, "learning_rate": 7.051144151423232e-06, "loss": 0.4034, "step": 14558 }, { "epoch": 0.42894391821221806, "grad_norm": 1.4230648397404486, "learning_rate": 7.05067516298268e-06, "loss": 0.363, "step": 14559 }, { "epoch": 0.42897338066968166, "grad_norm": 1.4151742224328046, "learning_rate": 7.05020615285071e-06, "loss": 0.4783, "step": 14560 }, { "epoch": 0.42900284312714526, "grad_norm": 1.6786443584443436, "learning_rate": 7.049737121032284e-06, "loss": 0.4849, "step": 14561 }, { "epoch": 0.4290323055846088, "grad_norm": 1.5697638030736132, "learning_rate": 7.0492680675323635e-06, "loss": 0.5395, "step": 14562 }, { "epoch": 0.4290617680420724, "grad_norm": 1.669740355429385, "learning_rate": 7.0487989923559104e-06, "loss": 0.5397, "step": 14563 }, { "epoch": 0.42909123049953596, "grad_norm": 1.3634966353188136, "learning_rate": 7.048329895507887e-06, "loss": 0.4567, "step": 14564 }, { "epoch": 0.42912069295699956, "grad_norm": 1.4783606038082304, "learning_rate": 7.047860776993253e-06, "loss": 0.5422, "step": 14565 }, { "epoch": 0.4291501554144631, "grad_norm": 1.5741718382078362, "learning_rate": 7.047391636816971e-06, "loss": 0.4737, "step": 14566 }, { "epoch": 0.4291796178719267, "grad_norm": 1.5520521907382543, "learning_rate": 7.046922474984006e-06, "loss": 0.4699, "step": 14567 }, { "epoch": 0.42920908032939026, "grad_norm": 1.5346589908546966, "learning_rate": 7.046453291499318e-06, "loss": 0.5366, "step": 14568 }, { "epoch": 0.42923854278685386, "grad_norm": 1.4300316924435497, "learning_rate": 7.045984086367871e-06, "loss": 0.5678, "step": 14569 }, { "epoch": 0.4292680052443174, "grad_norm": 1.7648018700387338, "learning_rate": 7.045514859594627e-06, "loss": 0.6804, "step": 14570 }, { "epoch": 0.429297467701781, "grad_norm": 1.6362717306991754, "learning_rate": 7.045045611184551e-06, "loss": 0.4999, "step": 14571 }, { "epoch": 0.42932693015924456, "grad_norm": 1.297756981278719, "learning_rate": 7.044576341142606e-06, "loss": 0.3859, "step": 14572 }, { "epoch": 0.42935639261670816, "grad_norm": 1.5848938707913922, "learning_rate": 7.044107049473754e-06, "loss": 0.3834, "step": 14573 }, { "epoch": 0.42938585507417176, "grad_norm": 1.3487800470264144, "learning_rate": 7.043637736182962e-06, "loss": 0.4641, "step": 14574 }, { "epoch": 0.4294153175316353, "grad_norm": 1.4942857185381502, "learning_rate": 7.043168401275193e-06, "loss": 0.4947, "step": 14575 }, { "epoch": 0.4294447799890989, "grad_norm": 1.6621335728977966, "learning_rate": 7.042699044755411e-06, "loss": 0.5283, "step": 14576 }, { "epoch": 0.42947424244656246, "grad_norm": 1.4093709791967122, "learning_rate": 7.0422296666285805e-06, "loss": 0.451, "step": 14577 }, { "epoch": 0.42950370490402606, "grad_norm": 1.526201291648729, "learning_rate": 7.041760266899667e-06, "loss": 0.4635, "step": 14578 }, { "epoch": 0.4295331673614896, "grad_norm": 1.523550030627365, "learning_rate": 7.041290845573637e-06, "loss": 0.3865, "step": 14579 }, { "epoch": 0.4295626298189532, "grad_norm": 1.4730882906650113, "learning_rate": 7.0408214026554535e-06, "loss": 0.4364, "step": 14580 }, { "epoch": 0.42959209227641676, "grad_norm": 1.5225254899064316, "learning_rate": 7.040351938150084e-06, "loss": 0.5527, "step": 14581 }, { "epoch": 0.42962155473388036, "grad_norm": 1.8786755510594009, "learning_rate": 7.039882452062492e-06, "loss": 0.632, "step": 14582 }, { "epoch": 0.4296510171913439, "grad_norm": 1.5174611605002308, "learning_rate": 7.039412944397646e-06, "loss": 0.507, "step": 14583 }, { "epoch": 0.4296804796488075, "grad_norm": 1.516033859610128, "learning_rate": 7.038943415160512e-06, "loss": 0.397, "step": 14584 }, { "epoch": 0.42970994210627106, "grad_norm": 1.4307862422268671, "learning_rate": 7.038473864356054e-06, "loss": 0.5087, "step": 14585 }, { "epoch": 0.42973940456373466, "grad_norm": 1.6265014005587233, "learning_rate": 7.038004291989242e-06, "loss": 0.5936, "step": 14586 }, { "epoch": 0.42976886702119826, "grad_norm": 1.6279506902422007, "learning_rate": 7.037534698065043e-06, "loss": 0.4785, "step": 14587 }, { "epoch": 0.4297983294786618, "grad_norm": 1.5384277044799255, "learning_rate": 7.03706508258842e-06, "loss": 0.5859, "step": 14588 }, { "epoch": 0.4298277919361254, "grad_norm": 1.4119898177524748, "learning_rate": 7.036595445564346e-06, "loss": 0.4123, "step": 14589 }, { "epoch": 0.42985725439358896, "grad_norm": 1.5583092861016719, "learning_rate": 7.036125786997785e-06, "loss": 0.6084, "step": 14590 }, { "epoch": 0.42988671685105256, "grad_norm": 1.450341431753571, "learning_rate": 7.035656106893705e-06, "loss": 0.5168, "step": 14591 }, { "epoch": 0.4299161793085161, "grad_norm": 1.5696029062693782, "learning_rate": 7.035186405257076e-06, "loss": 0.4362, "step": 14592 }, { "epoch": 0.4299456417659797, "grad_norm": 1.640606882151774, "learning_rate": 7.034716682092866e-06, "loss": 0.4201, "step": 14593 }, { "epoch": 0.42997510422344326, "grad_norm": 1.3981892407687468, "learning_rate": 7.034246937406043e-06, "loss": 0.5246, "step": 14594 }, { "epoch": 0.43000456668090686, "grad_norm": 1.550388384891658, "learning_rate": 7.033777171201574e-06, "loss": 0.4313, "step": 14595 }, { "epoch": 0.4300340291383704, "grad_norm": 1.4737496158490544, "learning_rate": 7.033307383484434e-06, "loss": 0.4686, "step": 14596 }, { "epoch": 0.430063491595834, "grad_norm": 1.447808642611763, "learning_rate": 7.032837574259583e-06, "loss": 0.5075, "step": 14597 }, { "epoch": 0.43009295405329756, "grad_norm": 1.4167775273042629, "learning_rate": 7.032367743531998e-06, "loss": 0.485, "step": 14598 }, { "epoch": 0.43012241651076116, "grad_norm": 1.9873896001449625, "learning_rate": 7.031897891306648e-06, "loss": 0.6775, "step": 14599 }, { "epoch": 0.43015187896822477, "grad_norm": 1.461693761899886, "learning_rate": 7.0314280175884995e-06, "loss": 0.3621, "step": 14600 }, { "epoch": 0.4301813414256883, "grad_norm": 1.4273348805643167, "learning_rate": 7.030958122382525e-06, "loss": 0.3351, "step": 14601 }, { "epoch": 0.4302108038831519, "grad_norm": 1.5072069388476026, "learning_rate": 7.030488205693694e-06, "loss": 0.4174, "step": 14602 }, { "epoch": 0.43024026634061546, "grad_norm": 1.6320622214742124, "learning_rate": 7.030018267526978e-06, "loss": 0.3697, "step": 14603 }, { "epoch": 0.43026972879807907, "grad_norm": 1.5915824495003719, "learning_rate": 7.029548307887347e-06, "loss": 0.5716, "step": 14604 }, { "epoch": 0.4302991912555426, "grad_norm": 1.4879045197365919, "learning_rate": 7.029078326779773e-06, "loss": 0.4482, "step": 14605 }, { "epoch": 0.4303286537130062, "grad_norm": 1.4901140467358003, "learning_rate": 7.028608324209227e-06, "loss": 0.4041, "step": 14606 }, { "epoch": 0.43035811617046976, "grad_norm": 1.4413647273862014, "learning_rate": 7.02813830018068e-06, "loss": 0.522, "step": 14607 }, { "epoch": 0.43038757862793336, "grad_norm": 1.532789127787068, "learning_rate": 7.0276682546991045e-06, "loss": 0.3664, "step": 14608 }, { "epoch": 0.4304170410853969, "grad_norm": 1.4411085595452293, "learning_rate": 7.0271981877694715e-06, "loss": 0.4094, "step": 14609 }, { "epoch": 0.4304465035428605, "grad_norm": 1.5780618864557439, "learning_rate": 7.026728099396755e-06, "loss": 0.426, "step": 14610 }, { "epoch": 0.43047596600032406, "grad_norm": 1.4668009709979921, "learning_rate": 7.026257989585925e-06, "loss": 0.4216, "step": 14611 }, { "epoch": 0.43050542845778766, "grad_norm": 1.5035818578544329, "learning_rate": 7.025787858341956e-06, "loss": 0.432, "step": 14612 }, { "epoch": 0.43053489091525127, "grad_norm": 1.5181851297366709, "learning_rate": 7.025317705669821e-06, "loss": 0.4517, "step": 14613 }, { "epoch": 0.4305643533727148, "grad_norm": 1.7004916670400994, "learning_rate": 7.024847531574492e-06, "loss": 0.5746, "step": 14614 }, { "epoch": 0.4305938158301784, "grad_norm": 1.4019934244381516, "learning_rate": 7.024377336060943e-06, "loss": 0.3502, "step": 14615 }, { "epoch": 0.43062327828764196, "grad_norm": 1.4729695153289495, "learning_rate": 7.023907119134147e-06, "loss": 0.4633, "step": 14616 }, { "epoch": 0.43065274074510557, "grad_norm": 1.5936656166790335, "learning_rate": 7.023436880799079e-06, "loss": 0.5289, "step": 14617 }, { "epoch": 0.4306822032025691, "grad_norm": 1.4232476727088967, "learning_rate": 7.022966621060713e-06, "loss": 0.3936, "step": 14618 }, { "epoch": 0.4307116656600327, "grad_norm": 1.6154639832224844, "learning_rate": 7.022496339924021e-06, "loss": 0.425, "step": 14619 }, { "epoch": 0.43074112811749626, "grad_norm": 1.7202433675073507, "learning_rate": 7.02202603739398e-06, "loss": 0.6357, "step": 14620 }, { "epoch": 0.43077059057495987, "grad_norm": 1.3044681581514175, "learning_rate": 7.021555713475563e-06, "loss": 0.4158, "step": 14621 }, { "epoch": 0.4308000530324234, "grad_norm": 1.4673162466248175, "learning_rate": 7.021085368173746e-06, "loss": 0.5737, "step": 14622 }, { "epoch": 0.430829515489887, "grad_norm": 1.5095702359734422, "learning_rate": 7.020615001493505e-06, "loss": 0.4471, "step": 14623 }, { "epoch": 0.43085897794735056, "grad_norm": 1.3551080183977287, "learning_rate": 7.020144613439814e-06, "loss": 0.238, "step": 14624 }, { "epoch": 0.43088844040481417, "grad_norm": 1.5527818388384103, "learning_rate": 7.019674204017648e-06, "loss": 0.3848, "step": 14625 }, { "epoch": 0.43091790286227777, "grad_norm": 1.5674884996914396, "learning_rate": 7.019203773231984e-06, "loss": 0.4338, "step": 14626 }, { "epoch": 0.4309473653197413, "grad_norm": 1.501138100210141, "learning_rate": 7.018733321087799e-06, "loss": 0.5102, "step": 14627 }, { "epoch": 0.4309768277772049, "grad_norm": 1.670358749154271, "learning_rate": 7.0182628475900675e-06, "loss": 0.5273, "step": 14628 }, { "epoch": 0.43100629023466847, "grad_norm": 1.3677354324024862, "learning_rate": 7.017792352743766e-06, "loss": 0.4026, "step": 14629 }, { "epoch": 0.43103575269213207, "grad_norm": 1.9219219254118112, "learning_rate": 7.017321836553872e-06, "loss": 0.6394, "step": 14630 }, { "epoch": 0.4310652151495956, "grad_norm": 1.5153236142616369, "learning_rate": 7.016851299025362e-06, "loss": 0.3811, "step": 14631 }, { "epoch": 0.4310946776070592, "grad_norm": 1.6854198642029519, "learning_rate": 7.016380740163216e-06, "loss": 0.6139, "step": 14632 }, { "epoch": 0.43112414006452277, "grad_norm": 1.36891549014031, "learning_rate": 7.0159101599724075e-06, "loss": 0.4028, "step": 14633 }, { "epoch": 0.43115360252198637, "grad_norm": 1.4924689705896295, "learning_rate": 7.015439558457915e-06, "loss": 0.4295, "step": 14634 }, { "epoch": 0.4311830649794499, "grad_norm": 1.4659015491077199, "learning_rate": 7.014968935624718e-06, "loss": 0.4425, "step": 14635 }, { "epoch": 0.4312125274369135, "grad_norm": 1.7872069771056638, "learning_rate": 7.014498291477794e-06, "loss": 0.652, "step": 14636 }, { "epoch": 0.43124198989437706, "grad_norm": 1.5409371001377854, "learning_rate": 7.0140276260221196e-06, "loss": 0.48, "step": 14637 }, { "epoch": 0.43127145235184067, "grad_norm": 1.5488798488783269, "learning_rate": 7.013556939262676e-06, "loss": 0.494, "step": 14638 }, { "epoch": 0.43130091480930427, "grad_norm": 1.4978536257945727, "learning_rate": 7.01308623120444e-06, "loss": 0.4234, "step": 14639 }, { "epoch": 0.4313303772667678, "grad_norm": 1.6393703796676973, "learning_rate": 7.012615501852392e-06, "loss": 0.5263, "step": 14640 }, { "epoch": 0.4313598397242314, "grad_norm": 1.593353782156959, "learning_rate": 7.0121447512115105e-06, "loss": 0.5105, "step": 14641 }, { "epoch": 0.43138930218169497, "grad_norm": 1.49078178418258, "learning_rate": 7.011673979286775e-06, "loss": 0.4354, "step": 14642 }, { "epoch": 0.43141876463915857, "grad_norm": 1.599172957077431, "learning_rate": 7.011203186083165e-06, "loss": 0.3931, "step": 14643 }, { "epoch": 0.4314482270966221, "grad_norm": 1.6390280776152641, "learning_rate": 7.010732371605659e-06, "loss": 0.3414, "step": 14644 }, { "epoch": 0.4314776895540857, "grad_norm": 1.499231897490284, "learning_rate": 7.010261535859242e-06, "loss": 0.4473, "step": 14645 }, { "epoch": 0.43150715201154927, "grad_norm": 1.517941597206194, "learning_rate": 7.009790678848889e-06, "loss": 0.4555, "step": 14646 }, { "epoch": 0.43153661446901287, "grad_norm": 1.3244913113897183, "learning_rate": 7.009319800579584e-06, "loss": 0.295, "step": 14647 }, { "epoch": 0.4315660769264764, "grad_norm": 1.4329467375462126, "learning_rate": 7.0088489010563065e-06, "loss": 0.4402, "step": 14648 }, { "epoch": 0.43159553938394, "grad_norm": 1.5285955019896353, "learning_rate": 7.008377980284035e-06, "loss": 0.4807, "step": 14649 }, { "epoch": 0.43162500184140357, "grad_norm": 1.621698704926118, "learning_rate": 7.007907038267756e-06, "loss": 0.4687, "step": 14650 }, { "epoch": 0.43165446429886717, "grad_norm": 1.5411309718895534, "learning_rate": 7.0074360750124485e-06, "loss": 0.4775, "step": 14651 }, { "epoch": 0.43168392675633077, "grad_norm": 1.5967658058413436, "learning_rate": 7.006965090523092e-06, "loss": 0.415, "step": 14652 }, { "epoch": 0.4317133892137943, "grad_norm": 1.5096144980634278, "learning_rate": 7.006494084804671e-06, "loss": 0.5517, "step": 14653 }, { "epoch": 0.4317428516712579, "grad_norm": 1.6779259098782275, "learning_rate": 7.006023057862168e-06, "loss": 0.4034, "step": 14654 }, { "epoch": 0.43177231412872147, "grad_norm": 1.4980463451615982, "learning_rate": 7.005552009700563e-06, "loss": 0.4822, "step": 14655 }, { "epoch": 0.43180177658618507, "grad_norm": 1.6263858475805526, "learning_rate": 7.005080940324842e-06, "loss": 0.588, "step": 14656 }, { "epoch": 0.4318312390436486, "grad_norm": 1.5211177561961953, "learning_rate": 7.004609849739985e-06, "loss": 0.3457, "step": 14657 }, { "epoch": 0.4318607015011122, "grad_norm": 1.3504738844914956, "learning_rate": 7.004138737950976e-06, "loss": 0.3738, "step": 14658 }, { "epoch": 0.43189016395857577, "grad_norm": 1.609892852881808, "learning_rate": 7.003667604962797e-06, "loss": 0.4424, "step": 14659 }, { "epoch": 0.43191962641603937, "grad_norm": 1.4139316796421744, "learning_rate": 7.003196450780434e-06, "loss": 0.4484, "step": 14660 }, { "epoch": 0.4319490888735029, "grad_norm": 1.494064191929503, "learning_rate": 7.002725275408868e-06, "loss": 0.4663, "step": 14661 }, { "epoch": 0.4319785513309665, "grad_norm": 1.4018504286854845, "learning_rate": 7.002254078853087e-06, "loss": 0.406, "step": 14662 }, { "epoch": 0.43200801378843007, "grad_norm": 1.7910652951568167, "learning_rate": 7.001782861118071e-06, "loss": 0.4442, "step": 14663 }, { "epoch": 0.43203747624589367, "grad_norm": 1.6553580316525591, "learning_rate": 7.0013116222088066e-06, "loss": 0.4577, "step": 14664 }, { "epoch": 0.4320669387033573, "grad_norm": 1.5737123225033522, "learning_rate": 7.000840362130276e-06, "loss": 0.5879, "step": 14665 }, { "epoch": 0.4320964011608208, "grad_norm": 1.601136798464414, "learning_rate": 7.000369080887467e-06, "loss": 0.4643, "step": 14666 }, { "epoch": 0.4321258636182844, "grad_norm": 1.4898412482474577, "learning_rate": 6.9998977784853625e-06, "loss": 0.3458, "step": 14667 }, { "epoch": 0.43215532607574797, "grad_norm": 1.5359316119595998, "learning_rate": 6.99942645492895e-06, "loss": 0.4474, "step": 14668 }, { "epoch": 0.4321847885332116, "grad_norm": 1.5341763374805102, "learning_rate": 6.998955110223215e-06, "loss": 0.4693, "step": 14669 }, { "epoch": 0.4322142509906751, "grad_norm": 1.5914404293897948, "learning_rate": 6.99848374437314e-06, "loss": 0.5327, "step": 14670 }, { "epoch": 0.4322437134481387, "grad_norm": 1.5437666532226297, "learning_rate": 6.998012357383713e-06, "loss": 0.4415, "step": 14671 }, { "epoch": 0.43227317590560227, "grad_norm": 1.5266113297839932, "learning_rate": 6.997540949259921e-06, "loss": 0.5201, "step": 14672 }, { "epoch": 0.43230263836306587, "grad_norm": 1.3864369676617316, "learning_rate": 6.997069520006751e-06, "loss": 0.4194, "step": 14673 }, { "epoch": 0.4323321008205294, "grad_norm": 1.708840003246226, "learning_rate": 6.996598069629186e-06, "loss": 0.4399, "step": 14674 }, { "epoch": 0.432361563277993, "grad_norm": 1.431032562501547, "learning_rate": 6.996126598132216e-06, "loss": 0.4426, "step": 14675 }, { "epoch": 0.43239102573545657, "grad_norm": 1.7498758602035007, "learning_rate": 6.995655105520827e-06, "loss": 0.4581, "step": 14676 }, { "epoch": 0.43242048819292017, "grad_norm": 1.5210889810460932, "learning_rate": 6.9951835918000055e-06, "loss": 0.4698, "step": 14677 }, { "epoch": 0.4324499506503838, "grad_norm": 1.4527141743495369, "learning_rate": 6.994712056974742e-06, "loss": 0.4403, "step": 14678 }, { "epoch": 0.4324794131078473, "grad_norm": 1.6902367068254394, "learning_rate": 6.99424050105002e-06, "loss": 0.4928, "step": 14679 }, { "epoch": 0.4325088755653109, "grad_norm": 1.464686386823426, "learning_rate": 6.993768924030831e-06, "loss": 0.4374, "step": 14680 }, { "epoch": 0.43253833802277447, "grad_norm": 1.4592735023448895, "learning_rate": 6.993297325922162e-06, "loss": 0.4611, "step": 14681 }, { "epoch": 0.4325678004802381, "grad_norm": 1.4722711319914565, "learning_rate": 6.9928257067290015e-06, "loss": 0.3606, "step": 14682 }, { "epoch": 0.4325972629377016, "grad_norm": 1.3315705919798209, "learning_rate": 6.992354066456338e-06, "loss": 0.3922, "step": 14683 }, { "epoch": 0.4326267253951652, "grad_norm": 1.6094247339913832, "learning_rate": 6.991882405109161e-06, "loss": 0.5535, "step": 14684 }, { "epoch": 0.43265618785262877, "grad_norm": 1.4612009021488621, "learning_rate": 6.991410722692457e-06, "loss": 0.4463, "step": 14685 }, { "epoch": 0.4326856503100924, "grad_norm": 1.6724482693153104, "learning_rate": 6.990939019211218e-06, "loss": 0.4303, "step": 14686 }, { "epoch": 0.4327151127675559, "grad_norm": 1.77539054786622, "learning_rate": 6.990467294670435e-06, "loss": 0.5986, "step": 14687 }, { "epoch": 0.4327445752250195, "grad_norm": 1.4603112066007662, "learning_rate": 6.9899955490750935e-06, "loss": 0.3831, "step": 14688 }, { "epoch": 0.43277403768248307, "grad_norm": 1.493845406180534, "learning_rate": 6.989523782430185e-06, "loss": 0.4711, "step": 14689 }, { "epoch": 0.4328035001399467, "grad_norm": 1.317291417664495, "learning_rate": 6.989051994740702e-06, "loss": 0.3239, "step": 14690 }, { "epoch": 0.4328329625974103, "grad_norm": 1.3610243559230257, "learning_rate": 6.988580186011631e-06, "loss": 0.3562, "step": 14691 }, { "epoch": 0.4328624250548738, "grad_norm": 1.3032298574616263, "learning_rate": 6.988108356247967e-06, "loss": 0.3512, "step": 14692 }, { "epoch": 0.4328918875123374, "grad_norm": 1.7315115227559656, "learning_rate": 6.987636505454698e-06, "loss": 0.425, "step": 14693 }, { "epoch": 0.432921349969801, "grad_norm": 1.6891138451994414, "learning_rate": 6.987164633636816e-06, "loss": 0.5128, "step": 14694 }, { "epoch": 0.4329508124272646, "grad_norm": 1.8529561321961054, "learning_rate": 6.98669274079931e-06, "loss": 0.6139, "step": 14695 }, { "epoch": 0.4329802748847281, "grad_norm": 1.3704900179510577, "learning_rate": 6.986220826947177e-06, "loss": 0.287, "step": 14696 }, { "epoch": 0.4330097373421917, "grad_norm": 1.5747542733035516, "learning_rate": 6.9857488920854036e-06, "loss": 0.5319, "step": 14697 }, { "epoch": 0.43303919979965527, "grad_norm": 1.361102868132645, "learning_rate": 6.9852769362189835e-06, "loss": 0.4594, "step": 14698 }, { "epoch": 0.4330686622571189, "grad_norm": 1.5491230105621294, "learning_rate": 6.98480495935291e-06, "loss": 0.3625, "step": 14699 }, { "epoch": 0.4330981247145824, "grad_norm": 1.7094188496950355, "learning_rate": 6.9843329614921725e-06, "loss": 0.5485, "step": 14700 }, { "epoch": 0.433127587172046, "grad_norm": 1.605065199463076, "learning_rate": 6.983860942641767e-06, "loss": 0.4591, "step": 14701 }, { "epoch": 0.43315704962950957, "grad_norm": 1.5733489949534079, "learning_rate": 6.983388902806686e-06, "loss": 0.5205, "step": 14702 }, { "epoch": 0.4331865120869732, "grad_norm": 1.4289403493039934, "learning_rate": 6.982916841991921e-06, "loss": 0.3837, "step": 14703 }, { "epoch": 0.4332159745444368, "grad_norm": 1.574495047595415, "learning_rate": 6.982444760202465e-06, "loss": 0.4211, "step": 14704 }, { "epoch": 0.4332454370019003, "grad_norm": 1.585281844516049, "learning_rate": 6.981972657443313e-06, "loss": 0.5108, "step": 14705 }, { "epoch": 0.4332748994593639, "grad_norm": 1.503364960945007, "learning_rate": 6.981500533719459e-06, "loss": 0.4846, "step": 14706 }, { "epoch": 0.4333043619168275, "grad_norm": 1.3364319473410309, "learning_rate": 6.981028389035896e-06, "loss": 0.4603, "step": 14707 }, { "epoch": 0.4333338243742911, "grad_norm": 1.3896940859201663, "learning_rate": 6.980556223397619e-06, "loss": 0.4613, "step": 14708 }, { "epoch": 0.4333632868317546, "grad_norm": 1.3723899302708633, "learning_rate": 6.98008403680962e-06, "loss": 0.3457, "step": 14709 }, { "epoch": 0.4333927492892182, "grad_norm": 1.6202689911415522, "learning_rate": 6.979611829276896e-06, "loss": 0.4597, "step": 14710 }, { "epoch": 0.4334222117466818, "grad_norm": 1.7379440902602978, "learning_rate": 6.979139600804442e-06, "loss": 0.4774, "step": 14711 }, { "epoch": 0.4334516742041454, "grad_norm": 1.4830351830892183, "learning_rate": 6.978667351397251e-06, "loss": 0.6707, "step": 14712 }, { "epoch": 0.4334811366616089, "grad_norm": 1.3854719074113846, "learning_rate": 6.978195081060323e-06, "loss": 0.4276, "step": 14713 }, { "epoch": 0.4335105991190725, "grad_norm": 1.6716832657847607, "learning_rate": 6.977722789798648e-06, "loss": 0.3992, "step": 14714 }, { "epoch": 0.4335400615765361, "grad_norm": 1.7892349220783794, "learning_rate": 6.977250477617224e-06, "loss": 0.5033, "step": 14715 }, { "epoch": 0.4335695240339997, "grad_norm": 1.5134399271589436, "learning_rate": 6.9767781445210466e-06, "loss": 0.5563, "step": 14716 }, { "epoch": 0.4335989864914633, "grad_norm": 1.460808188533476, "learning_rate": 6.976305790515114e-06, "loss": 0.3114, "step": 14717 }, { "epoch": 0.4336284489489268, "grad_norm": 1.5682225979512705, "learning_rate": 6.975833415604421e-06, "loss": 0.5552, "step": 14718 }, { "epoch": 0.43365791140639043, "grad_norm": 1.6856751398685277, "learning_rate": 6.975361019793963e-06, "loss": 0.4927, "step": 14719 }, { "epoch": 0.433687373863854, "grad_norm": 1.652525281270332, "learning_rate": 6.97488860308874e-06, "loss": 0.3785, "step": 14720 }, { "epoch": 0.4337168363213176, "grad_norm": 1.6309727081596492, "learning_rate": 6.9744161654937446e-06, "loss": 0.6097, "step": 14721 }, { "epoch": 0.4337462987787811, "grad_norm": 1.589722357264878, "learning_rate": 6.973943707013978e-06, "loss": 0.4539, "step": 14722 }, { "epoch": 0.43377576123624473, "grad_norm": 1.5439370110964459, "learning_rate": 6.973471227654438e-06, "loss": 0.5219, "step": 14723 }, { "epoch": 0.4338052236937083, "grad_norm": 1.6728613178422218, "learning_rate": 6.972998727420119e-06, "loss": 0.5116, "step": 14724 }, { "epoch": 0.4338346861511719, "grad_norm": 1.5003886637064094, "learning_rate": 6.9725262063160205e-06, "loss": 0.4665, "step": 14725 }, { "epoch": 0.4338641486086354, "grad_norm": 1.323000386423473, "learning_rate": 6.972053664347142e-06, "loss": 0.3839, "step": 14726 }, { "epoch": 0.433893611066099, "grad_norm": 1.492531875861106, "learning_rate": 6.97158110151848e-06, "loss": 0.5004, "step": 14727 }, { "epoch": 0.4339230735235626, "grad_norm": 1.5523711407446328, "learning_rate": 6.971108517835034e-06, "loss": 0.4938, "step": 14728 }, { "epoch": 0.4339525359810262, "grad_norm": 1.3590747441810929, "learning_rate": 6.9706359133018046e-06, "loss": 0.4116, "step": 14729 }, { "epoch": 0.4339819984384898, "grad_norm": 1.5447936945210958, "learning_rate": 6.970163287923787e-06, "loss": 0.4954, "step": 14730 }, { "epoch": 0.4340114608959533, "grad_norm": 1.6316307430163008, "learning_rate": 6.969690641705983e-06, "loss": 0.459, "step": 14731 }, { "epoch": 0.43404092335341693, "grad_norm": 1.700177376481086, "learning_rate": 6.969217974653391e-06, "loss": 0.4765, "step": 14732 }, { "epoch": 0.4340703858108805, "grad_norm": 1.67417687852757, "learning_rate": 6.9687452867710135e-06, "loss": 0.5166, "step": 14733 }, { "epoch": 0.4340998482683441, "grad_norm": 1.3982357609696061, "learning_rate": 6.968272578063847e-06, "loss": 0.3794, "step": 14734 }, { "epoch": 0.4341293107258076, "grad_norm": 1.5892349020735517, "learning_rate": 6.967799848536893e-06, "loss": 0.5183, "step": 14735 }, { "epoch": 0.43415877318327123, "grad_norm": 1.8810374493955357, "learning_rate": 6.967327098195152e-06, "loss": 0.6289, "step": 14736 }, { "epoch": 0.4341882356407348, "grad_norm": 1.3752802845802097, "learning_rate": 6.966854327043624e-06, "loss": 0.3901, "step": 14737 }, { "epoch": 0.4342176980981984, "grad_norm": 1.442333656416854, "learning_rate": 6.966381535087312e-06, "loss": 0.3755, "step": 14738 }, { "epoch": 0.4342471605556619, "grad_norm": 1.4577726870954812, "learning_rate": 6.965908722331214e-06, "loss": 0.4297, "step": 14739 }, { "epoch": 0.43427662301312553, "grad_norm": 1.3302714664901998, "learning_rate": 6.965435888780333e-06, "loss": 0.3542, "step": 14740 }, { "epoch": 0.4343060854705891, "grad_norm": 1.377511133023651, "learning_rate": 6.96496303443967e-06, "loss": 0.425, "step": 14741 }, { "epoch": 0.4343355479280527, "grad_norm": 1.3348684935067066, "learning_rate": 6.964490159314227e-06, "loss": 0.382, "step": 14742 }, { "epoch": 0.4343650103855163, "grad_norm": 1.6559469247175305, "learning_rate": 6.964017263409006e-06, "loss": 0.5272, "step": 14743 }, { "epoch": 0.43439447284297983, "grad_norm": 1.6107424934981749, "learning_rate": 6.96354434672901e-06, "loss": 0.4526, "step": 14744 }, { "epoch": 0.43442393530044343, "grad_norm": 1.5875784972206264, "learning_rate": 6.963071409279238e-06, "loss": 0.5293, "step": 14745 }, { "epoch": 0.434453397757907, "grad_norm": 1.5091454215410107, "learning_rate": 6.962598451064695e-06, "loss": 0.4806, "step": 14746 }, { "epoch": 0.4344828602153706, "grad_norm": 1.287178164254776, "learning_rate": 6.962125472090387e-06, "loss": 0.3461, "step": 14747 }, { "epoch": 0.43451232267283413, "grad_norm": 1.5635405812132996, "learning_rate": 6.961652472361311e-06, "loss": 0.3844, "step": 14748 }, { "epoch": 0.43454178513029773, "grad_norm": 1.516229472069212, "learning_rate": 6.9611794518824724e-06, "loss": 0.4202, "step": 14749 }, { "epoch": 0.4345712475877613, "grad_norm": 1.4880634047158612, "learning_rate": 6.9607064106588776e-06, "loss": 0.5548, "step": 14750 }, { "epoch": 0.4346007100452249, "grad_norm": 1.6382098571433035, "learning_rate": 6.960233348695525e-06, "loss": 0.5276, "step": 14751 }, { "epoch": 0.4346301725026884, "grad_norm": 1.513653470227214, "learning_rate": 6.959760265997423e-06, "loss": 0.4714, "step": 14752 }, { "epoch": 0.43465963496015203, "grad_norm": 1.4453559548482326, "learning_rate": 6.959287162569575e-06, "loss": 0.5759, "step": 14753 }, { "epoch": 0.4346890974176156, "grad_norm": 1.5501495762335862, "learning_rate": 6.958814038416983e-06, "loss": 0.444, "step": 14754 }, { "epoch": 0.4347185598750792, "grad_norm": 1.641992353138084, "learning_rate": 6.958340893544654e-06, "loss": 0.4085, "step": 14755 }, { "epoch": 0.4347480223325428, "grad_norm": 1.350144971326427, "learning_rate": 6.957867727957592e-06, "loss": 0.4105, "step": 14756 }, { "epoch": 0.43477748479000633, "grad_norm": 1.473227529143006, "learning_rate": 6.9573945416608e-06, "loss": 0.2804, "step": 14757 }, { "epoch": 0.43480694724746993, "grad_norm": 1.520646907917888, "learning_rate": 6.956921334659288e-06, "loss": 0.3907, "step": 14758 }, { "epoch": 0.4348364097049335, "grad_norm": 1.3448590222921373, "learning_rate": 6.956448106958056e-06, "loss": 0.4049, "step": 14759 }, { "epoch": 0.4348658721623971, "grad_norm": 1.5466583280759958, "learning_rate": 6.955974858562113e-06, "loss": 0.3723, "step": 14760 }, { "epoch": 0.43489533461986063, "grad_norm": 1.5152745015565583, "learning_rate": 6.955501589476464e-06, "loss": 0.4433, "step": 14761 }, { "epoch": 0.43492479707732423, "grad_norm": 1.5721677683880515, "learning_rate": 6.955028299706117e-06, "loss": 0.5086, "step": 14762 }, { "epoch": 0.4349542595347878, "grad_norm": 1.4812462969802391, "learning_rate": 6.9545549892560745e-06, "loss": 0.4535, "step": 14763 }, { "epoch": 0.4349837219922514, "grad_norm": 1.5723939596880256, "learning_rate": 6.954081658131345e-06, "loss": 0.5232, "step": 14764 }, { "epoch": 0.43501318444971493, "grad_norm": 1.5943307299333638, "learning_rate": 6.953608306336935e-06, "loss": 0.4353, "step": 14765 }, { "epoch": 0.43504264690717853, "grad_norm": 1.5311645975377803, "learning_rate": 6.953134933877853e-06, "loss": 0.4745, "step": 14766 }, { "epoch": 0.4350721093646421, "grad_norm": 1.5813465246417764, "learning_rate": 6.9526615407591035e-06, "loss": 0.5355, "step": 14767 }, { "epoch": 0.4351015718221057, "grad_norm": 1.5928224286816401, "learning_rate": 6.952188126985697e-06, "loss": 0.4581, "step": 14768 }, { "epoch": 0.4351310342795693, "grad_norm": 1.4804424968669978, "learning_rate": 6.9517146925626385e-06, "loss": 0.5254, "step": 14769 }, { "epoch": 0.43516049673703283, "grad_norm": 1.7433540440928137, "learning_rate": 6.951241237494937e-06, "loss": 0.588, "step": 14770 }, { "epoch": 0.43518995919449643, "grad_norm": 1.493742370584083, "learning_rate": 6.9507677617876e-06, "loss": 0.4702, "step": 14771 }, { "epoch": 0.43521942165196, "grad_norm": 1.5395017049180397, "learning_rate": 6.950294265445636e-06, "loss": 0.4041, "step": 14772 }, { "epoch": 0.4352488841094236, "grad_norm": 1.8122473791871998, "learning_rate": 6.949820748474054e-06, "loss": 0.5542, "step": 14773 }, { "epoch": 0.43527834656688713, "grad_norm": 1.7680213282911859, "learning_rate": 6.949347210877861e-06, "loss": 0.4928, "step": 14774 }, { "epoch": 0.43530780902435073, "grad_norm": 1.5517832667609306, "learning_rate": 6.948873652662069e-06, "loss": 0.6139, "step": 14775 }, { "epoch": 0.4353372714818143, "grad_norm": 1.5591292253516262, "learning_rate": 6.948400073831685e-06, "loss": 0.4282, "step": 14776 }, { "epoch": 0.4353667339392779, "grad_norm": 1.3803756409115708, "learning_rate": 6.94792647439172e-06, "loss": 0.3813, "step": 14777 }, { "epoch": 0.43539619639674143, "grad_norm": 1.5332973304718247, "learning_rate": 6.947452854347181e-06, "loss": 0.5038, "step": 14778 }, { "epoch": 0.43542565885420503, "grad_norm": 1.4364037559513132, "learning_rate": 6.946979213703079e-06, "loss": 0.5301, "step": 14779 }, { "epoch": 0.4354551213116686, "grad_norm": 1.553052290244163, "learning_rate": 6.946505552464424e-06, "loss": 0.4039, "step": 14780 }, { "epoch": 0.4354845837691322, "grad_norm": 1.5768162267340935, "learning_rate": 6.9460318706362275e-06, "loss": 0.5038, "step": 14781 }, { "epoch": 0.4355140462265958, "grad_norm": 1.571010665226836, "learning_rate": 6.945558168223499e-06, "loss": 0.5734, "step": 14782 }, { "epoch": 0.43554350868405933, "grad_norm": 1.553005628931243, "learning_rate": 6.9450844452312485e-06, "loss": 0.4148, "step": 14783 }, { "epoch": 0.43557297114152294, "grad_norm": 1.4543640503884763, "learning_rate": 6.944610701664488e-06, "loss": 0.4534, "step": 14784 }, { "epoch": 0.4356024335989865, "grad_norm": 1.3415733924586717, "learning_rate": 6.944136937528227e-06, "loss": 0.3607, "step": 14785 }, { "epoch": 0.4356318960564501, "grad_norm": 1.543730948906789, "learning_rate": 6.943663152827478e-06, "loss": 0.5997, "step": 14786 }, { "epoch": 0.43566135851391363, "grad_norm": 1.4638823849087117, "learning_rate": 6.943189347567254e-06, "loss": 0.5312, "step": 14787 }, { "epoch": 0.43569082097137724, "grad_norm": 1.490418543606286, "learning_rate": 6.942715521752565e-06, "loss": 0.2982, "step": 14788 }, { "epoch": 0.4357202834288408, "grad_norm": 1.3784623740115196, "learning_rate": 6.942241675388423e-06, "loss": 0.3978, "step": 14789 }, { "epoch": 0.4357497458863044, "grad_norm": 1.617812172420893, "learning_rate": 6.94176780847984e-06, "loss": 0.4952, "step": 14790 }, { "epoch": 0.43577920834376793, "grad_norm": 1.5537834811906053, "learning_rate": 6.941293921031828e-06, "loss": 0.3808, "step": 14791 }, { "epoch": 0.43580867080123153, "grad_norm": 1.6562180491264469, "learning_rate": 6.940820013049403e-06, "loss": 0.5323, "step": 14792 }, { "epoch": 0.4358381332586951, "grad_norm": 1.5565472607016981, "learning_rate": 6.940346084537575e-06, "loss": 0.4233, "step": 14793 }, { "epoch": 0.4358675957161587, "grad_norm": 1.3322177435411615, "learning_rate": 6.939872135501357e-06, "loss": 0.4281, "step": 14794 }, { "epoch": 0.4358970581736223, "grad_norm": 1.3985517955789484, "learning_rate": 6.939398165945761e-06, "loss": 0.5616, "step": 14795 }, { "epoch": 0.43592652063108583, "grad_norm": 1.5129800624702787, "learning_rate": 6.938924175875804e-06, "loss": 0.581, "step": 14796 }, { "epoch": 0.43595598308854944, "grad_norm": 1.6583358388180076, "learning_rate": 6.938450165296497e-06, "loss": 0.4552, "step": 14797 }, { "epoch": 0.435985445546013, "grad_norm": 1.4625651357922944, "learning_rate": 6.937976134212856e-06, "loss": 0.4391, "step": 14798 }, { "epoch": 0.4360149080034766, "grad_norm": 1.7111180557048926, "learning_rate": 6.937502082629894e-06, "loss": 0.4811, "step": 14799 }, { "epoch": 0.43604437046094013, "grad_norm": 1.6354186233965347, "learning_rate": 6.937028010552624e-06, "loss": 0.5217, "step": 14800 }, { "epoch": 0.43607383291840374, "grad_norm": 1.3347229558555687, "learning_rate": 6.936553917986063e-06, "loss": 0.4231, "step": 14801 }, { "epoch": 0.4361032953758673, "grad_norm": 1.4881768953409542, "learning_rate": 6.9360798049352256e-06, "loss": 0.5707, "step": 14802 }, { "epoch": 0.4361327578333309, "grad_norm": 1.446142221586037, "learning_rate": 6.935605671405126e-06, "loss": 0.4407, "step": 14803 }, { "epoch": 0.43616222029079443, "grad_norm": 1.416769629476254, "learning_rate": 6.9351315174007785e-06, "loss": 0.2726, "step": 14804 }, { "epoch": 0.43619168274825804, "grad_norm": 1.6214072192606697, "learning_rate": 6.9346573429272e-06, "loss": 0.4563, "step": 14805 }, { "epoch": 0.4362211452057216, "grad_norm": 1.6508098054009, "learning_rate": 6.934183147989405e-06, "loss": 0.4834, "step": 14806 }, { "epoch": 0.4362506076631852, "grad_norm": 1.6359059449976432, "learning_rate": 6.9337089325924115e-06, "loss": 0.5784, "step": 14807 }, { "epoch": 0.4362800701206488, "grad_norm": 1.3697872042017498, "learning_rate": 6.933234696741235e-06, "loss": 0.3408, "step": 14808 }, { "epoch": 0.43630953257811234, "grad_norm": 1.4444363776321958, "learning_rate": 6.932760440440889e-06, "loss": 0.517, "step": 14809 }, { "epoch": 0.43633899503557594, "grad_norm": 1.5896412451025985, "learning_rate": 6.932286163696391e-06, "loss": 0.5632, "step": 14810 }, { "epoch": 0.4363684574930395, "grad_norm": 1.5647999375429535, "learning_rate": 6.931811866512761e-06, "loss": 0.4795, "step": 14811 }, { "epoch": 0.4363979199505031, "grad_norm": 1.5861359503377637, "learning_rate": 6.9313375488950126e-06, "loss": 0.3872, "step": 14812 }, { "epoch": 0.43642738240796664, "grad_norm": 1.5244981976842389, "learning_rate": 6.930863210848166e-06, "loss": 0.5722, "step": 14813 }, { "epoch": 0.43645684486543024, "grad_norm": 1.5785508836967896, "learning_rate": 6.930388852377237e-06, "loss": 0.524, "step": 14814 }, { "epoch": 0.4364863073228938, "grad_norm": 1.6988248198593452, "learning_rate": 6.92991447348724e-06, "loss": 0.5578, "step": 14815 }, { "epoch": 0.4365157697803574, "grad_norm": 1.7303610238245402, "learning_rate": 6.929440074183197e-06, "loss": 0.3875, "step": 14816 }, { "epoch": 0.43654523223782093, "grad_norm": 1.3055858388232173, "learning_rate": 6.9289656544701255e-06, "loss": 0.389, "step": 14817 }, { "epoch": 0.43657469469528454, "grad_norm": 1.4429512412533279, "learning_rate": 6.9284912143530445e-06, "loss": 0.4439, "step": 14818 }, { "epoch": 0.4366041571527481, "grad_norm": 1.4801602215032432, "learning_rate": 6.928016753836969e-06, "loss": 0.373, "step": 14819 }, { "epoch": 0.4366336196102117, "grad_norm": 1.4848443822223052, "learning_rate": 6.927542272926922e-06, "loss": 0.3808, "step": 14820 }, { "epoch": 0.4366630820676753, "grad_norm": 1.331259055860265, "learning_rate": 6.927067771627918e-06, "loss": 0.2907, "step": 14821 }, { "epoch": 0.43669254452513884, "grad_norm": 1.6239841844098424, "learning_rate": 6.926593249944978e-06, "loss": 0.5376, "step": 14822 }, { "epoch": 0.43672200698260244, "grad_norm": 1.461336569903299, "learning_rate": 6.926118707883125e-06, "loss": 0.3611, "step": 14823 }, { "epoch": 0.436751469440066, "grad_norm": 1.6242492714860037, "learning_rate": 6.925644145447373e-06, "loss": 0.5346, "step": 14824 }, { "epoch": 0.4367809318975296, "grad_norm": 1.523054654379583, "learning_rate": 6.925169562642744e-06, "loss": 0.4121, "step": 14825 }, { "epoch": 0.43681039435499314, "grad_norm": 1.5890386051237206, "learning_rate": 6.92469495947426e-06, "loss": 0.5483, "step": 14826 }, { "epoch": 0.43683985681245674, "grad_norm": 1.469495644186876, "learning_rate": 6.9242203359469366e-06, "loss": 0.5021, "step": 14827 }, { "epoch": 0.4368693192699203, "grad_norm": 1.512079625462161, "learning_rate": 6.923745692065799e-06, "loss": 0.3813, "step": 14828 }, { "epoch": 0.4368987817273839, "grad_norm": 1.7035735386193394, "learning_rate": 6.923271027835865e-06, "loss": 0.5548, "step": 14829 }, { "epoch": 0.43692824418484744, "grad_norm": 1.714134526344482, "learning_rate": 6.922796343262156e-06, "loss": 0.559, "step": 14830 }, { "epoch": 0.43695770664231104, "grad_norm": 1.620315357659081, "learning_rate": 6.922321638349693e-06, "loss": 0.4099, "step": 14831 }, { "epoch": 0.4369871690997746, "grad_norm": 1.5424041645631552, "learning_rate": 6.921846913103498e-06, "loss": 0.3686, "step": 14832 }, { "epoch": 0.4370166315572382, "grad_norm": 1.377558004018848, "learning_rate": 6.9213721675285925e-06, "loss": 0.3578, "step": 14833 }, { "epoch": 0.4370460940147018, "grad_norm": 1.5848688914077755, "learning_rate": 6.920897401629997e-06, "loss": 0.581, "step": 14834 }, { "epoch": 0.43707555647216534, "grad_norm": 1.4738513203841332, "learning_rate": 6.920422615412735e-06, "loss": 0.5061, "step": 14835 }, { "epoch": 0.43710501892962894, "grad_norm": 1.3997506274922829, "learning_rate": 6.919947808881828e-06, "loss": 0.4972, "step": 14836 }, { "epoch": 0.4371344813870925, "grad_norm": 1.519630278258873, "learning_rate": 6.919472982042296e-06, "loss": 0.5242, "step": 14837 }, { "epoch": 0.4371639438445561, "grad_norm": 1.3822004755549049, "learning_rate": 6.9189981348991665e-06, "loss": 0.2888, "step": 14838 }, { "epoch": 0.43719340630201964, "grad_norm": 1.4966760721420203, "learning_rate": 6.918523267457458e-06, "loss": 0.4954, "step": 14839 }, { "epoch": 0.43722286875948324, "grad_norm": 1.3945880800497983, "learning_rate": 6.9180483797221955e-06, "loss": 0.4146, "step": 14840 }, { "epoch": 0.4372523312169468, "grad_norm": 1.5692212498138978, "learning_rate": 6.917573471698402e-06, "loss": 0.4268, "step": 14841 }, { "epoch": 0.4372817936744104, "grad_norm": 1.5394400180328847, "learning_rate": 6.917098543391099e-06, "loss": 0.5366, "step": 14842 }, { "epoch": 0.43731125613187394, "grad_norm": 1.438808497911595, "learning_rate": 6.916623594805314e-06, "loss": 0.343, "step": 14843 }, { "epoch": 0.43734071858933754, "grad_norm": 1.5091238820210005, "learning_rate": 6.916148625946068e-06, "loss": 0.4184, "step": 14844 }, { "epoch": 0.4373701810468011, "grad_norm": 1.4182679801705866, "learning_rate": 6.9156736368183865e-06, "loss": 0.356, "step": 14845 }, { "epoch": 0.4373996435042647, "grad_norm": 1.556071528762643, "learning_rate": 6.915198627427292e-06, "loss": 0.4338, "step": 14846 }, { "epoch": 0.4374291059617283, "grad_norm": 1.525897581805253, "learning_rate": 6.91472359777781e-06, "loss": 0.3896, "step": 14847 }, { "epoch": 0.43745856841919184, "grad_norm": 1.454483483201744, "learning_rate": 6.914248547874966e-06, "loss": 0.4755, "step": 14848 }, { "epoch": 0.43748803087665544, "grad_norm": 1.529009505068223, "learning_rate": 6.913773477723784e-06, "loss": 0.3277, "step": 14849 }, { "epoch": 0.437517493334119, "grad_norm": 1.8293662708754015, "learning_rate": 6.9132983873292905e-06, "loss": 0.6215, "step": 14850 }, { "epoch": 0.4375469557915826, "grad_norm": 1.7492608253502318, "learning_rate": 6.912823276696509e-06, "loss": 0.5824, "step": 14851 }, { "epoch": 0.43757641824904614, "grad_norm": 1.6190857855298675, "learning_rate": 6.9123481458304645e-06, "loss": 0.3713, "step": 14852 }, { "epoch": 0.43760588070650974, "grad_norm": 1.7691838114412695, "learning_rate": 6.911872994736186e-06, "loss": 0.5329, "step": 14853 }, { "epoch": 0.4376353431639733, "grad_norm": 1.6124094209127744, "learning_rate": 6.9113978234186976e-06, "loss": 0.5965, "step": 14854 }, { "epoch": 0.4376648056214369, "grad_norm": 1.5370131347855023, "learning_rate": 6.910922631883023e-06, "loss": 0.485, "step": 14855 }, { "epoch": 0.43769426807890044, "grad_norm": 1.5984792347080767, "learning_rate": 6.910447420134194e-06, "loss": 0.4758, "step": 14856 }, { "epoch": 0.43772373053636404, "grad_norm": 1.3219034918646886, "learning_rate": 6.909972188177233e-06, "loss": 0.3669, "step": 14857 }, { "epoch": 0.4377531929938276, "grad_norm": 1.6090285349355251, "learning_rate": 6.909496936017168e-06, "loss": 0.6203, "step": 14858 }, { "epoch": 0.4377826554512912, "grad_norm": 1.5159647255429445, "learning_rate": 6.909021663659028e-06, "loss": 0.4115, "step": 14859 }, { "epoch": 0.4378121179087548, "grad_norm": 1.5209490647166768, "learning_rate": 6.908546371107837e-06, "loss": 0.3498, "step": 14860 }, { "epoch": 0.43784158036621834, "grad_norm": 1.5820669290950795, "learning_rate": 6.908071058368624e-06, "loss": 0.4617, "step": 14861 }, { "epoch": 0.43787104282368194, "grad_norm": 1.446925032635149, "learning_rate": 6.907595725446419e-06, "loss": 0.4325, "step": 14862 }, { "epoch": 0.4379005052811455, "grad_norm": 1.5492012130742576, "learning_rate": 6.907120372346247e-06, "loss": 0.5655, "step": 14863 }, { "epoch": 0.4379299677386091, "grad_norm": 1.5903224240038731, "learning_rate": 6.906644999073136e-06, "loss": 0.5777, "step": 14864 }, { "epoch": 0.43795943019607264, "grad_norm": 1.3080842658268466, "learning_rate": 6.906169605632116e-06, "loss": 0.3877, "step": 14865 }, { "epoch": 0.43798889265353624, "grad_norm": 1.5800345049320437, "learning_rate": 6.905694192028213e-06, "loss": 0.3677, "step": 14866 }, { "epoch": 0.4380183551109998, "grad_norm": 1.6151768774046753, "learning_rate": 6.905218758266459e-06, "loss": 0.4461, "step": 14867 }, { "epoch": 0.4380478175684634, "grad_norm": 1.5275952300246514, "learning_rate": 6.904743304351882e-06, "loss": 0.5006, "step": 14868 }, { "epoch": 0.43807728002592694, "grad_norm": 1.5126053814932419, "learning_rate": 6.9042678302895105e-06, "loss": 0.3656, "step": 14869 }, { "epoch": 0.43810674248339054, "grad_norm": 1.755600133163707, "learning_rate": 6.903792336084374e-06, "loss": 0.5008, "step": 14870 }, { "epoch": 0.4381362049408541, "grad_norm": 1.3456803158248771, "learning_rate": 6.903316821741502e-06, "loss": 0.3974, "step": 14871 }, { "epoch": 0.4381656673983177, "grad_norm": 1.3156606082076923, "learning_rate": 6.902841287265926e-06, "loss": 0.4091, "step": 14872 }, { "epoch": 0.4381951298557813, "grad_norm": 1.4750386810487066, "learning_rate": 6.902365732662674e-06, "loss": 0.3849, "step": 14873 }, { "epoch": 0.43822459231324484, "grad_norm": 1.6247899890019168, "learning_rate": 6.901890157936777e-06, "loss": 0.3399, "step": 14874 }, { "epoch": 0.43825405477070845, "grad_norm": 1.663624209839335, "learning_rate": 6.901414563093265e-06, "loss": 0.4578, "step": 14875 }, { "epoch": 0.438283517228172, "grad_norm": 1.571288693964981, "learning_rate": 6.90093894813717e-06, "loss": 0.4995, "step": 14876 }, { "epoch": 0.4383129796856356, "grad_norm": 1.6667038115550203, "learning_rate": 6.900463313073522e-06, "loss": 0.5471, "step": 14877 }, { "epoch": 0.43834244214309914, "grad_norm": 1.832474952247398, "learning_rate": 6.899987657907353e-06, "loss": 0.3214, "step": 14878 }, { "epoch": 0.43837190460056275, "grad_norm": 1.3159025547005523, "learning_rate": 6.899511982643692e-06, "loss": 0.2522, "step": 14879 }, { "epoch": 0.4384013670580263, "grad_norm": 1.526396580973155, "learning_rate": 6.899036287287574e-06, "loss": 0.4314, "step": 14880 }, { "epoch": 0.4384308295154899, "grad_norm": 1.4188969482183682, "learning_rate": 6.898560571844027e-06, "loss": 0.4262, "step": 14881 }, { "epoch": 0.43846029197295344, "grad_norm": 1.5470578202286598, "learning_rate": 6.898084836318085e-06, "loss": 0.465, "step": 14882 }, { "epoch": 0.43848975443041704, "grad_norm": 1.5020345768355174, "learning_rate": 6.897609080714781e-06, "loss": 0.5083, "step": 14883 }, { "epoch": 0.4385192168878806, "grad_norm": 1.7754249345302673, "learning_rate": 6.897133305039148e-06, "loss": 0.521, "step": 14884 }, { "epoch": 0.4385486793453442, "grad_norm": 1.4633127081599488, "learning_rate": 6.896657509296214e-06, "loss": 0.4081, "step": 14885 }, { "epoch": 0.4385781418028078, "grad_norm": 1.4616793515699045, "learning_rate": 6.896181693491016e-06, "loss": 0.4792, "step": 14886 }, { "epoch": 0.43860760426027134, "grad_norm": 1.5778911056561256, "learning_rate": 6.895705857628586e-06, "loss": 0.5953, "step": 14887 }, { "epoch": 0.43863706671773495, "grad_norm": 1.5325789308136784, "learning_rate": 6.8952300017139585e-06, "loss": 0.4483, "step": 14888 }, { "epoch": 0.4386665291751985, "grad_norm": 1.5182209737619057, "learning_rate": 6.8947541257521635e-06, "loss": 0.4126, "step": 14889 }, { "epoch": 0.4386959916326621, "grad_norm": 1.574621497376316, "learning_rate": 6.894278229748238e-06, "loss": 0.5655, "step": 14890 }, { "epoch": 0.43872545409012564, "grad_norm": 1.3824289984427447, "learning_rate": 6.893802313707214e-06, "loss": 0.5075, "step": 14891 }, { "epoch": 0.43875491654758925, "grad_norm": 1.5183243320914666, "learning_rate": 6.893326377634128e-06, "loss": 0.3171, "step": 14892 }, { "epoch": 0.4387843790050528, "grad_norm": 1.7285994337773665, "learning_rate": 6.892850421534011e-06, "loss": 0.5084, "step": 14893 }, { "epoch": 0.4388138414625164, "grad_norm": 1.6974372740183996, "learning_rate": 6.892374445411899e-06, "loss": 0.5205, "step": 14894 }, { "epoch": 0.43884330391997994, "grad_norm": 1.3827922522170544, "learning_rate": 6.891898449272827e-06, "loss": 0.3661, "step": 14895 }, { "epoch": 0.43887276637744355, "grad_norm": 1.4821142341625315, "learning_rate": 6.891422433121831e-06, "loss": 0.4803, "step": 14896 }, { "epoch": 0.4389022288349071, "grad_norm": 1.6628079385327947, "learning_rate": 6.890946396963943e-06, "loss": 0.4739, "step": 14897 }, { "epoch": 0.4389316912923707, "grad_norm": 1.6434950572704605, "learning_rate": 6.890470340804202e-06, "loss": 0.7026, "step": 14898 }, { "epoch": 0.4389611537498343, "grad_norm": 1.603177742838711, "learning_rate": 6.889994264647641e-06, "loss": 0.5237, "step": 14899 }, { "epoch": 0.43899061620729785, "grad_norm": 1.3823715816769155, "learning_rate": 6.889518168499296e-06, "loss": 0.3686, "step": 14900 }, { "epoch": 0.43902007866476145, "grad_norm": 1.5715966007606943, "learning_rate": 6.889042052364204e-06, "loss": 0.4864, "step": 14901 }, { "epoch": 0.439049541122225, "grad_norm": 1.3349870487631688, "learning_rate": 6.888565916247401e-06, "loss": 0.2852, "step": 14902 }, { "epoch": 0.4390790035796886, "grad_norm": 1.378643081956989, "learning_rate": 6.888089760153926e-06, "loss": 0.3795, "step": 14903 }, { "epoch": 0.43910846603715215, "grad_norm": 1.3915140161667374, "learning_rate": 6.887613584088809e-06, "loss": 0.4248, "step": 14904 }, { "epoch": 0.43913792849461575, "grad_norm": 1.5670097011330326, "learning_rate": 6.887137388057093e-06, "loss": 0.5898, "step": 14905 }, { "epoch": 0.4391673909520793, "grad_norm": 1.5843303419329124, "learning_rate": 6.886661172063811e-06, "loss": 0.519, "step": 14906 }, { "epoch": 0.4391968534095429, "grad_norm": 1.4619087252917782, "learning_rate": 6.8861849361140045e-06, "loss": 0.4137, "step": 14907 }, { "epoch": 0.43922631586700644, "grad_norm": 1.4593473260214742, "learning_rate": 6.885708680212708e-06, "loss": 0.4849, "step": 14908 }, { "epoch": 0.43925577832447005, "grad_norm": 1.5895868716948922, "learning_rate": 6.8852324043649585e-06, "loss": 0.4998, "step": 14909 }, { "epoch": 0.4392852407819336, "grad_norm": 1.4309014955995187, "learning_rate": 6.884756108575795e-06, "loss": 0.4669, "step": 14910 }, { "epoch": 0.4393147032393972, "grad_norm": 1.6643950586468463, "learning_rate": 6.884279792850257e-06, "loss": 0.5313, "step": 14911 }, { "epoch": 0.4393441656968608, "grad_norm": 1.356886918962519, "learning_rate": 6.883803457193381e-06, "loss": 0.4663, "step": 14912 }, { "epoch": 0.43937362815432435, "grad_norm": 1.7623224639767985, "learning_rate": 6.8833271016102065e-06, "loss": 0.5069, "step": 14913 }, { "epoch": 0.43940309061178795, "grad_norm": 1.5158585241348126, "learning_rate": 6.882850726105772e-06, "loss": 0.4549, "step": 14914 }, { "epoch": 0.4394325530692515, "grad_norm": 1.822965650573294, "learning_rate": 6.882374330685115e-06, "loss": 0.4881, "step": 14915 }, { "epoch": 0.4394620155267151, "grad_norm": 1.5381487293636662, "learning_rate": 6.881897915353277e-06, "loss": 0.3565, "step": 14916 }, { "epoch": 0.43949147798417865, "grad_norm": 1.3617059164929985, "learning_rate": 6.881421480115297e-06, "loss": 0.4343, "step": 14917 }, { "epoch": 0.43952094044164225, "grad_norm": 1.6130543533738093, "learning_rate": 6.880945024976213e-06, "loss": 0.5561, "step": 14918 }, { "epoch": 0.4395504028991058, "grad_norm": 1.5049294868400551, "learning_rate": 6.880468549941066e-06, "loss": 0.4926, "step": 14919 }, { "epoch": 0.4395798653565694, "grad_norm": 1.7555840718942468, "learning_rate": 6.879992055014897e-06, "loss": 0.7263, "step": 14920 }, { "epoch": 0.43960932781403295, "grad_norm": 1.4155671224264557, "learning_rate": 6.879515540202743e-06, "loss": 0.526, "step": 14921 }, { "epoch": 0.43963879027149655, "grad_norm": 1.4610413588897693, "learning_rate": 6.879039005509647e-06, "loss": 0.3995, "step": 14922 }, { "epoch": 0.4396682527289601, "grad_norm": 1.5296180041754543, "learning_rate": 6.878562450940651e-06, "loss": 0.4096, "step": 14923 }, { "epoch": 0.4396977151864237, "grad_norm": 1.4985971342425626, "learning_rate": 6.878085876500791e-06, "loss": 0.4874, "step": 14924 }, { "epoch": 0.4397271776438873, "grad_norm": 1.4395707699008258, "learning_rate": 6.877609282195113e-06, "loss": 0.3734, "step": 14925 }, { "epoch": 0.43975664010135085, "grad_norm": 1.621736539616316, "learning_rate": 6.877132668028655e-06, "loss": 0.5094, "step": 14926 }, { "epoch": 0.43978610255881445, "grad_norm": 1.6153199222758274, "learning_rate": 6.87665603400646e-06, "loss": 0.5628, "step": 14927 }, { "epoch": 0.439815565016278, "grad_norm": 1.5243565098390983, "learning_rate": 6.87617938013357e-06, "loss": 0.5154, "step": 14928 }, { "epoch": 0.4398450274737416, "grad_norm": 1.5165660566842807, "learning_rate": 6.875702706415026e-06, "loss": 0.5073, "step": 14929 }, { "epoch": 0.43987448993120515, "grad_norm": 1.4857094950521827, "learning_rate": 6.87522601285587e-06, "loss": 0.4071, "step": 14930 }, { "epoch": 0.43990395238866875, "grad_norm": 1.5121527350299397, "learning_rate": 6.874749299461144e-06, "loss": 0.5649, "step": 14931 }, { "epoch": 0.4399334148461323, "grad_norm": 1.7100356277941955, "learning_rate": 6.874272566235895e-06, "loss": 0.5993, "step": 14932 }, { "epoch": 0.4399628773035959, "grad_norm": 1.5704423907269789, "learning_rate": 6.873795813185158e-06, "loss": 0.4294, "step": 14933 }, { "epoch": 0.43999233976105945, "grad_norm": 1.5204113270696513, "learning_rate": 6.87331904031398e-06, "loss": 0.399, "step": 14934 }, { "epoch": 0.44002180221852305, "grad_norm": 1.6384096626364977, "learning_rate": 6.872842247627407e-06, "loss": 0.4452, "step": 14935 }, { "epoch": 0.4400512646759866, "grad_norm": 1.5270003243969876, "learning_rate": 6.872365435130477e-06, "loss": 0.4217, "step": 14936 }, { "epoch": 0.4400807271334502, "grad_norm": 1.4766693425508985, "learning_rate": 6.8718886028282375e-06, "loss": 0.4759, "step": 14937 }, { "epoch": 0.4401101895909138, "grad_norm": 1.7638461089140767, "learning_rate": 6.87141175072573e-06, "loss": 0.5307, "step": 14938 }, { "epoch": 0.44013965204837735, "grad_norm": 1.5477576526800079, "learning_rate": 6.8709348788279995e-06, "loss": 0.5271, "step": 14939 }, { "epoch": 0.44016911450584095, "grad_norm": 1.518209925491985, "learning_rate": 6.870457987140089e-06, "loss": 0.4244, "step": 14940 }, { "epoch": 0.4401985769633045, "grad_norm": 1.8053280496918969, "learning_rate": 6.869981075667046e-06, "loss": 0.4815, "step": 14941 }, { "epoch": 0.4402280394207681, "grad_norm": 1.4079342913240918, "learning_rate": 6.869504144413912e-06, "loss": 0.4554, "step": 14942 }, { "epoch": 0.44025750187823165, "grad_norm": 1.565826979110857, "learning_rate": 6.8690271933857325e-06, "loss": 0.4801, "step": 14943 }, { "epoch": 0.44028696433569525, "grad_norm": 1.450917964626461, "learning_rate": 6.8685502225875535e-06, "loss": 0.444, "step": 14944 }, { "epoch": 0.4403164267931588, "grad_norm": 1.4597192495022575, "learning_rate": 6.868073232024419e-06, "loss": 0.4285, "step": 14945 }, { "epoch": 0.4403458892506224, "grad_norm": 1.4284461070432575, "learning_rate": 6.867596221701376e-06, "loss": 0.4531, "step": 14946 }, { "epoch": 0.44037535170808595, "grad_norm": 1.356336612082695, "learning_rate": 6.86711919162347e-06, "loss": 0.3835, "step": 14947 }, { "epoch": 0.44040481416554955, "grad_norm": 1.599276956440954, "learning_rate": 6.866642141795745e-06, "loss": 0.5687, "step": 14948 }, { "epoch": 0.4404342766230131, "grad_norm": 1.7707571372315534, "learning_rate": 6.866165072223249e-06, "loss": 0.6136, "step": 14949 }, { "epoch": 0.4404637390804767, "grad_norm": 1.5073872513965059, "learning_rate": 6.865687982911027e-06, "loss": 0.3698, "step": 14950 }, { "epoch": 0.4404932015379403, "grad_norm": 1.3285049571593759, "learning_rate": 6.865210873864125e-06, "loss": 0.4536, "step": 14951 }, { "epoch": 0.44052266399540385, "grad_norm": 1.5184212597740792, "learning_rate": 6.864733745087594e-06, "loss": 0.3961, "step": 14952 }, { "epoch": 0.44055212645286745, "grad_norm": 1.5652251164680402, "learning_rate": 6.864256596586476e-06, "loss": 0.5448, "step": 14953 }, { "epoch": 0.440581588910331, "grad_norm": 1.5035376717837485, "learning_rate": 6.86377942836582e-06, "loss": 0.3373, "step": 14954 }, { "epoch": 0.4406110513677946, "grad_norm": 1.5732514375899362, "learning_rate": 6.863302240430672e-06, "loss": 0.3284, "step": 14955 }, { "epoch": 0.44064051382525815, "grad_norm": 1.462882790569873, "learning_rate": 6.862825032786082e-06, "loss": 0.4557, "step": 14956 }, { "epoch": 0.44066997628272175, "grad_norm": 1.4831525330382256, "learning_rate": 6.862347805437097e-06, "loss": 0.3942, "step": 14957 }, { "epoch": 0.4406994387401853, "grad_norm": 1.457523698884124, "learning_rate": 6.861870558388763e-06, "loss": 0.4762, "step": 14958 }, { "epoch": 0.4407289011976489, "grad_norm": 1.4733676623318193, "learning_rate": 6.8613932916461315e-06, "loss": 0.3182, "step": 14959 }, { "epoch": 0.44075836365511245, "grad_norm": 1.6123434324991084, "learning_rate": 6.860916005214246e-06, "loss": 0.4844, "step": 14960 }, { "epoch": 0.44078782611257605, "grad_norm": 1.5977173427408373, "learning_rate": 6.860438699098161e-06, "loss": 0.422, "step": 14961 }, { "epoch": 0.4408172885700396, "grad_norm": 1.4359526193310703, "learning_rate": 6.859961373302921e-06, "loss": 0.4983, "step": 14962 }, { "epoch": 0.4408467510275032, "grad_norm": 1.753461125534322, "learning_rate": 6.859484027833579e-06, "loss": 0.5535, "step": 14963 }, { "epoch": 0.4408762134849668, "grad_norm": 1.5517377972285626, "learning_rate": 6.859006662695178e-06, "loss": 0.4609, "step": 14964 }, { "epoch": 0.44090567594243035, "grad_norm": 1.5385722889007447, "learning_rate": 6.8585292778927734e-06, "loss": 0.4084, "step": 14965 }, { "epoch": 0.44093513839989396, "grad_norm": 1.8338050103563863, "learning_rate": 6.858051873431411e-06, "loss": 0.584, "step": 14966 }, { "epoch": 0.4409646008573575, "grad_norm": 1.5965350886884144, "learning_rate": 6.857574449316143e-06, "loss": 0.6158, "step": 14967 }, { "epoch": 0.4409940633148211, "grad_norm": 1.4931026650111425, "learning_rate": 6.857097005552019e-06, "loss": 0.5481, "step": 14968 }, { "epoch": 0.44102352577228465, "grad_norm": 1.7590897372267384, "learning_rate": 6.8566195421440884e-06, "loss": 0.6452, "step": 14969 }, { "epoch": 0.44105298822974826, "grad_norm": 1.74165467017143, "learning_rate": 6.856142059097402e-06, "loss": 0.4492, "step": 14970 }, { "epoch": 0.4410824506872118, "grad_norm": 1.4072304170820626, "learning_rate": 6.85566455641701e-06, "loss": 0.4515, "step": 14971 }, { "epoch": 0.4411119131446754, "grad_norm": 1.5141503850377407, "learning_rate": 6.8551870341079656e-06, "loss": 0.4919, "step": 14972 }, { "epoch": 0.44114137560213895, "grad_norm": 1.4336088291232616, "learning_rate": 6.854709492175316e-06, "loss": 0.4937, "step": 14973 }, { "epoch": 0.44117083805960255, "grad_norm": 1.21575300677613, "learning_rate": 6.854231930624117e-06, "loss": 0.3382, "step": 14974 }, { "epoch": 0.4412003005170661, "grad_norm": 1.4368233271968247, "learning_rate": 6.853754349459415e-06, "loss": 0.4002, "step": 14975 }, { "epoch": 0.4412297629745297, "grad_norm": 1.3907419998023882, "learning_rate": 6.853276748686266e-06, "loss": 0.3871, "step": 14976 }, { "epoch": 0.4412592254319933, "grad_norm": 1.5128716878410005, "learning_rate": 6.85279912830972e-06, "loss": 0.5486, "step": 14977 }, { "epoch": 0.44128868788945685, "grad_norm": 1.656903298096749, "learning_rate": 6.85232148833483e-06, "loss": 0.4563, "step": 14978 }, { "epoch": 0.44131815034692046, "grad_norm": 1.5222743725235104, "learning_rate": 6.851843828766648e-06, "loss": 0.4765, "step": 14979 }, { "epoch": 0.441347612804384, "grad_norm": 1.4913227806908258, "learning_rate": 6.8513661496102255e-06, "loss": 0.273, "step": 14980 }, { "epoch": 0.4413770752618476, "grad_norm": 1.3494047855753009, "learning_rate": 6.850888450870616e-06, "loss": 0.3255, "step": 14981 }, { "epoch": 0.44140653771931115, "grad_norm": 1.342130944612433, "learning_rate": 6.850410732552874e-06, "loss": 0.3942, "step": 14982 }, { "epoch": 0.44143600017677476, "grad_norm": 1.4955525725145606, "learning_rate": 6.84993299466205e-06, "loss": 0.539, "step": 14983 }, { "epoch": 0.4414654626342383, "grad_norm": 1.5056780323468106, "learning_rate": 6.8494552372031985e-06, "loss": 0.5064, "step": 14984 }, { "epoch": 0.4414949250917019, "grad_norm": 1.6016462462952834, "learning_rate": 6.848977460181373e-06, "loss": 0.4565, "step": 14985 }, { "epoch": 0.44152438754916545, "grad_norm": 1.6204852272349757, "learning_rate": 6.848499663601628e-06, "loss": 0.5003, "step": 14986 }, { "epoch": 0.44155385000662906, "grad_norm": 1.408839805155662, "learning_rate": 6.848021847469017e-06, "loss": 0.451, "step": 14987 }, { "epoch": 0.4415833124640926, "grad_norm": 1.4968387623054362, "learning_rate": 6.847544011788594e-06, "loss": 0.4149, "step": 14988 }, { "epoch": 0.4416127749215562, "grad_norm": 1.55240809042734, "learning_rate": 6.847066156565412e-06, "loss": 0.4837, "step": 14989 }, { "epoch": 0.4416422373790198, "grad_norm": 1.6064545042153333, "learning_rate": 6.84658828180453e-06, "loss": 0.5832, "step": 14990 }, { "epoch": 0.44167169983648336, "grad_norm": 1.5848088086519883, "learning_rate": 6.846110387510997e-06, "loss": 0.5836, "step": 14991 }, { "epoch": 0.44170116229394696, "grad_norm": 1.8361522271674184, "learning_rate": 6.8456324736898724e-06, "loss": 0.5302, "step": 14992 }, { "epoch": 0.4417306247514105, "grad_norm": 1.5263824418531207, "learning_rate": 6.84515454034621e-06, "loss": 0.493, "step": 14993 }, { "epoch": 0.4417600872088741, "grad_norm": 1.4406671006646736, "learning_rate": 6.844676587485065e-06, "loss": 0.3869, "step": 14994 }, { "epoch": 0.44178954966633766, "grad_norm": 1.56337396961114, "learning_rate": 6.8441986151114935e-06, "loss": 0.6012, "step": 14995 }, { "epoch": 0.44181901212380126, "grad_norm": 1.7529898458181574, "learning_rate": 6.843720623230551e-06, "loss": 0.4968, "step": 14996 }, { "epoch": 0.4418484745812648, "grad_norm": 1.626063101186482, "learning_rate": 6.8432426118472935e-06, "loss": 0.4816, "step": 14997 }, { "epoch": 0.4418779370387284, "grad_norm": 1.5357098610864142, "learning_rate": 6.842764580966778e-06, "loss": 0.404, "step": 14998 }, { "epoch": 0.44190739949619195, "grad_norm": 1.5551053987675685, "learning_rate": 6.84228653059406e-06, "loss": 0.6578, "step": 14999 }, { "epoch": 0.44193686195365556, "grad_norm": 1.4548446325547326, "learning_rate": 6.841808460734195e-06, "loss": 0.4823, "step": 15000 }, { "epoch": 0.4419663244111191, "grad_norm": 1.5738491312822362, "learning_rate": 6.841330371392243e-06, "loss": 0.5912, "step": 15001 }, { "epoch": 0.4419957868685827, "grad_norm": 1.3685332440090106, "learning_rate": 6.84085226257326e-06, "loss": 0.4437, "step": 15002 }, { "epoch": 0.4420252493260463, "grad_norm": 1.4390421481893059, "learning_rate": 6.840374134282302e-06, "loss": 0.4537, "step": 15003 }, { "epoch": 0.44205471178350986, "grad_norm": 1.4010774203765997, "learning_rate": 6.839895986524426e-06, "loss": 0.4126, "step": 15004 }, { "epoch": 0.44208417424097346, "grad_norm": 1.5070403184353443, "learning_rate": 6.8394178193046935e-06, "loss": 0.4947, "step": 15005 }, { "epoch": 0.442113636698437, "grad_norm": 1.5498558356618415, "learning_rate": 6.8389396326281576e-06, "loss": 0.4415, "step": 15006 }, { "epoch": 0.4421430991559006, "grad_norm": 1.6131453080576021, "learning_rate": 6.83846142649988e-06, "loss": 0.4994, "step": 15007 }, { "epoch": 0.44217256161336416, "grad_norm": 1.2984624427272962, "learning_rate": 6.837983200924918e-06, "loss": 0.3303, "step": 15008 }, { "epoch": 0.44220202407082776, "grad_norm": 1.4445603739263924, "learning_rate": 6.83750495590833e-06, "loss": 0.4377, "step": 15009 }, { "epoch": 0.4422314865282913, "grad_norm": 1.4500393409661065, "learning_rate": 6.837026691455172e-06, "loss": 0.4593, "step": 15010 }, { "epoch": 0.4422609489857549, "grad_norm": 1.4869332546026928, "learning_rate": 6.836548407570508e-06, "loss": 0.4876, "step": 15011 }, { "epoch": 0.44229041144321846, "grad_norm": 1.5568147570671826, "learning_rate": 6.836070104259395e-06, "loss": 0.4322, "step": 15012 }, { "epoch": 0.44231987390068206, "grad_norm": 1.563734351506395, "learning_rate": 6.83559178152689e-06, "loss": 0.5355, "step": 15013 }, { "epoch": 0.4423493363581456, "grad_norm": 1.6591979250697493, "learning_rate": 6.835113439378056e-06, "loss": 0.636, "step": 15014 }, { "epoch": 0.4423787988156092, "grad_norm": 1.460775742689641, "learning_rate": 6.834635077817951e-06, "loss": 0.415, "step": 15015 }, { "epoch": 0.4424082612730728, "grad_norm": 1.5573025711591881, "learning_rate": 6.834156696851635e-06, "loss": 0.4377, "step": 15016 }, { "epoch": 0.44243772373053636, "grad_norm": 1.5869919873564535, "learning_rate": 6.8336782964841695e-06, "loss": 0.4843, "step": 15017 }, { "epoch": 0.44246718618799996, "grad_norm": 1.4500681020216235, "learning_rate": 6.833199876720612e-06, "loss": 0.4188, "step": 15018 }, { "epoch": 0.4424966486454635, "grad_norm": 1.284303954267259, "learning_rate": 6.832721437566026e-06, "loss": 0.3006, "step": 15019 }, { "epoch": 0.4425261111029271, "grad_norm": 1.6175853335841952, "learning_rate": 6.832242979025471e-06, "loss": 0.4563, "step": 15020 }, { "epoch": 0.44255557356039066, "grad_norm": 1.625047738562538, "learning_rate": 6.831764501104008e-06, "loss": 0.4564, "step": 15021 }, { "epoch": 0.44258503601785426, "grad_norm": 1.6495411245954834, "learning_rate": 6.831286003806698e-06, "loss": 0.5395, "step": 15022 }, { "epoch": 0.4426144984753178, "grad_norm": 1.35283936752353, "learning_rate": 6.830807487138605e-06, "loss": 0.4109, "step": 15023 }, { "epoch": 0.4426439609327814, "grad_norm": 1.660797969859377, "learning_rate": 6.830328951104786e-06, "loss": 0.5307, "step": 15024 }, { "epoch": 0.44267342339024496, "grad_norm": 1.4912918831049617, "learning_rate": 6.829850395710304e-06, "loss": 0.4386, "step": 15025 }, { "epoch": 0.44270288584770856, "grad_norm": 1.439407748110188, "learning_rate": 6.829371820960225e-06, "loss": 0.4372, "step": 15026 }, { "epoch": 0.4427323483051721, "grad_norm": 1.351453749107252, "learning_rate": 6.828893226859608e-06, "loss": 0.3822, "step": 15027 }, { "epoch": 0.4427618107626357, "grad_norm": 1.5122743492245556, "learning_rate": 6.828414613413514e-06, "loss": 0.5135, "step": 15028 }, { "epoch": 0.4427912732200993, "grad_norm": 1.6869166076794062, "learning_rate": 6.827935980627009e-06, "loss": 0.5543, "step": 15029 }, { "epoch": 0.44282073567756286, "grad_norm": 1.471240583995817, "learning_rate": 6.827457328505154e-06, "loss": 0.4629, "step": 15030 }, { "epoch": 0.44285019813502646, "grad_norm": 1.5014247856291776, "learning_rate": 6.8269786570530116e-06, "loss": 0.4517, "step": 15031 }, { "epoch": 0.44287966059249, "grad_norm": 1.5020475449020696, "learning_rate": 6.826499966275648e-06, "loss": 0.4959, "step": 15032 }, { "epoch": 0.4429091230499536, "grad_norm": 1.4716828295179374, "learning_rate": 6.826021256178122e-06, "loss": 0.5708, "step": 15033 }, { "epoch": 0.44293858550741716, "grad_norm": 1.393134754999615, "learning_rate": 6.8255425267655005e-06, "loss": 0.4216, "step": 15034 }, { "epoch": 0.44296804796488076, "grad_norm": 1.7348884386728654, "learning_rate": 6.825063778042846e-06, "loss": 0.4468, "step": 15035 }, { "epoch": 0.4429975104223443, "grad_norm": 1.523222325587573, "learning_rate": 6.824585010015223e-06, "loss": 0.3823, "step": 15036 }, { "epoch": 0.4430269728798079, "grad_norm": 1.6669333193020806, "learning_rate": 6.824106222687698e-06, "loss": 0.4237, "step": 15037 }, { "epoch": 0.44305643533727146, "grad_norm": 1.5189862066757966, "learning_rate": 6.823627416065331e-06, "loss": 0.399, "step": 15038 }, { "epoch": 0.44308589779473506, "grad_norm": 1.5153329685153445, "learning_rate": 6.823148590153189e-06, "loss": 0.3896, "step": 15039 }, { "epoch": 0.4431153602521986, "grad_norm": 1.5713589800584782, "learning_rate": 6.822669744956337e-06, "loss": 0.5938, "step": 15040 }, { "epoch": 0.4431448227096622, "grad_norm": 1.579752627334605, "learning_rate": 6.822190880479841e-06, "loss": 0.4863, "step": 15041 }, { "epoch": 0.4431742851671258, "grad_norm": 1.3795378082220515, "learning_rate": 6.8217119967287644e-06, "loss": 0.4233, "step": 15042 }, { "epoch": 0.44320374762458936, "grad_norm": 1.5656410788088275, "learning_rate": 6.821233093708174e-06, "loss": 0.545, "step": 15043 }, { "epoch": 0.44323321008205296, "grad_norm": 1.6453389356690564, "learning_rate": 6.820754171423135e-06, "loss": 0.5609, "step": 15044 }, { "epoch": 0.4432626725395165, "grad_norm": 1.7506090603976965, "learning_rate": 6.820275229878712e-06, "loss": 0.3795, "step": 15045 }, { "epoch": 0.4432921349969801, "grad_norm": 1.4044570353460997, "learning_rate": 6.819796269079972e-06, "loss": 0.3817, "step": 15046 }, { "epoch": 0.44332159745444366, "grad_norm": 1.467013163682557, "learning_rate": 6.819317289031986e-06, "loss": 0.4042, "step": 15047 }, { "epoch": 0.44335105991190726, "grad_norm": 1.309661542482076, "learning_rate": 6.818838289739812e-06, "loss": 0.4547, "step": 15048 }, { "epoch": 0.4433805223693708, "grad_norm": 1.3460648323093645, "learning_rate": 6.818359271208521e-06, "loss": 0.3305, "step": 15049 }, { "epoch": 0.4434099848268344, "grad_norm": 1.5161949915605728, "learning_rate": 6.817880233443181e-06, "loss": 0.4842, "step": 15050 }, { "epoch": 0.44343944728429796, "grad_norm": 1.7227908422450036, "learning_rate": 6.8174011764488565e-06, "loss": 0.5822, "step": 15051 }, { "epoch": 0.44346890974176156, "grad_norm": 1.6115615319916892, "learning_rate": 6.816922100230618e-06, "loss": 0.4464, "step": 15052 }, { "epoch": 0.4434983721992251, "grad_norm": 1.423835871845857, "learning_rate": 6.816443004793531e-06, "loss": 0.4781, "step": 15053 }, { "epoch": 0.4435278346566887, "grad_norm": 1.534610094466089, "learning_rate": 6.815963890142663e-06, "loss": 0.5458, "step": 15054 }, { "epoch": 0.4435572971141523, "grad_norm": 1.7677850176979024, "learning_rate": 6.815484756283081e-06, "loss": 0.4323, "step": 15055 }, { "epoch": 0.44358675957161586, "grad_norm": 1.5832224285522833, "learning_rate": 6.815005603219858e-06, "loss": 0.5612, "step": 15056 }, { "epoch": 0.44361622202907947, "grad_norm": 1.7667197348076913, "learning_rate": 6.814526430958057e-06, "loss": 0.6786, "step": 15057 }, { "epoch": 0.443645684486543, "grad_norm": 1.3109136921217466, "learning_rate": 6.814047239502748e-06, "loss": 0.4741, "step": 15058 }, { "epoch": 0.4436751469440066, "grad_norm": 1.4178915474508722, "learning_rate": 6.8135680288590015e-06, "loss": 0.3833, "step": 15059 }, { "epoch": 0.44370460940147016, "grad_norm": 1.5899078836794152, "learning_rate": 6.813088799031883e-06, "loss": 0.4679, "step": 15060 }, { "epoch": 0.44373407185893377, "grad_norm": 1.4492975042379914, "learning_rate": 6.812609550026463e-06, "loss": 0.4651, "step": 15061 }, { "epoch": 0.4437635343163973, "grad_norm": 1.6192233763066557, "learning_rate": 6.812130281847815e-06, "loss": 0.5015, "step": 15062 }, { "epoch": 0.4437929967738609, "grad_norm": 1.4428687330000385, "learning_rate": 6.811650994501003e-06, "loss": 0.4999, "step": 15063 }, { "epoch": 0.44382245923132446, "grad_norm": 1.8379262868770312, "learning_rate": 6.811171687991099e-06, "loss": 0.5455, "step": 15064 }, { "epoch": 0.44385192168878806, "grad_norm": 1.656087741021847, "learning_rate": 6.810692362323174e-06, "loss": 0.4011, "step": 15065 }, { "epoch": 0.4438813841462516, "grad_norm": 1.5273479949044586, "learning_rate": 6.810213017502296e-06, "loss": 0.3496, "step": 15066 }, { "epoch": 0.4439108466037152, "grad_norm": 1.3831329011371707, "learning_rate": 6.809733653533537e-06, "loss": 0.4721, "step": 15067 }, { "epoch": 0.4439403090611788, "grad_norm": 1.5472317433734122, "learning_rate": 6.809254270421968e-06, "loss": 0.5585, "step": 15068 }, { "epoch": 0.44396977151864236, "grad_norm": 1.6149917268029805, "learning_rate": 6.8087748681726565e-06, "loss": 0.4394, "step": 15069 }, { "epoch": 0.44399923397610597, "grad_norm": 1.6061880638906718, "learning_rate": 6.808295446790676e-06, "loss": 0.5313, "step": 15070 }, { "epoch": 0.4440286964335695, "grad_norm": 1.5974393664754938, "learning_rate": 6.807816006281098e-06, "loss": 0.4592, "step": 15071 }, { "epoch": 0.4440581588910331, "grad_norm": 1.7391804543246376, "learning_rate": 6.8073365466489935e-06, "loss": 0.4938, "step": 15072 }, { "epoch": 0.44408762134849666, "grad_norm": 1.282482371210898, "learning_rate": 6.806857067899433e-06, "loss": 0.3879, "step": 15073 }, { "epoch": 0.44411708380596027, "grad_norm": 1.6734476583843745, "learning_rate": 6.80637757003749e-06, "loss": 0.5856, "step": 15074 }, { "epoch": 0.4441465462634238, "grad_norm": 1.586196018360719, "learning_rate": 6.8058980530682335e-06, "loss": 0.4607, "step": 15075 }, { "epoch": 0.4441760087208874, "grad_norm": 1.6304724165916855, "learning_rate": 6.80541851699674e-06, "loss": 0.5631, "step": 15076 }, { "epoch": 0.44420547117835096, "grad_norm": 1.4187301597536242, "learning_rate": 6.804938961828079e-06, "loss": 0.5074, "step": 15077 }, { "epoch": 0.44423493363581457, "grad_norm": 1.6257537052499056, "learning_rate": 6.804459387567324e-06, "loss": 0.6006, "step": 15078 }, { "epoch": 0.4442643960932781, "grad_norm": 1.3531906645057288, "learning_rate": 6.803979794219547e-06, "loss": 0.3215, "step": 15079 }, { "epoch": 0.4442938585507417, "grad_norm": 1.733085108166214, "learning_rate": 6.8035001817898225e-06, "loss": 0.4075, "step": 15080 }, { "epoch": 0.4443233210082053, "grad_norm": 1.5035722714402389, "learning_rate": 6.803020550283222e-06, "loss": 0.3202, "step": 15081 }, { "epoch": 0.44435278346566887, "grad_norm": 1.6346103332787754, "learning_rate": 6.802540899704819e-06, "loss": 0.5251, "step": 15082 }, { "epoch": 0.44438224592313247, "grad_norm": 1.7149238294619589, "learning_rate": 6.8020612300596885e-06, "loss": 0.518, "step": 15083 }, { "epoch": 0.444411708380596, "grad_norm": 1.5682810406851408, "learning_rate": 6.801581541352904e-06, "loss": 0.4676, "step": 15084 }, { "epoch": 0.4444411708380596, "grad_norm": 1.5008451071449112, "learning_rate": 6.801101833589538e-06, "loss": 0.4716, "step": 15085 }, { "epoch": 0.44447063329552317, "grad_norm": 1.5906324201980822, "learning_rate": 6.800622106774666e-06, "loss": 0.5084, "step": 15086 }, { "epoch": 0.44450009575298677, "grad_norm": 1.765085667958603, "learning_rate": 6.800142360913362e-06, "loss": 0.4779, "step": 15087 }, { "epoch": 0.4445295582104503, "grad_norm": 1.5236363142610014, "learning_rate": 6.799662596010701e-06, "loss": 0.5319, "step": 15088 }, { "epoch": 0.4445590206679139, "grad_norm": 1.4352470387106404, "learning_rate": 6.799182812071757e-06, "loss": 0.4423, "step": 15089 }, { "epoch": 0.44458848312537746, "grad_norm": 1.4436028785942794, "learning_rate": 6.798703009101606e-06, "loss": 0.4699, "step": 15090 }, { "epoch": 0.44461794558284107, "grad_norm": 1.2801052334096363, "learning_rate": 6.798223187105322e-06, "loss": 0.369, "step": 15091 }, { "epoch": 0.4446474080403046, "grad_norm": 1.2942517017652964, "learning_rate": 6.797743346087982e-06, "loss": 0.3367, "step": 15092 }, { "epoch": 0.4446768704977682, "grad_norm": 1.5251935185760588, "learning_rate": 6.797263486054662e-06, "loss": 0.561, "step": 15093 }, { "epoch": 0.4447063329552318, "grad_norm": 1.4395959664112317, "learning_rate": 6.796783607010433e-06, "loss": 0.444, "step": 15094 }, { "epoch": 0.44473579541269537, "grad_norm": 1.4010601206760662, "learning_rate": 6.796303708960378e-06, "loss": 0.3704, "step": 15095 }, { "epoch": 0.44476525787015897, "grad_norm": 1.3422528417172759, "learning_rate": 6.795823791909568e-06, "loss": 0.3787, "step": 15096 }, { "epoch": 0.4447947203276225, "grad_norm": 1.7231205282958162, "learning_rate": 6.795343855863082e-06, "loss": 0.4727, "step": 15097 }, { "epoch": 0.4448241827850861, "grad_norm": 1.3365496259393213, "learning_rate": 6.794863900825995e-06, "loss": 0.4076, "step": 15098 }, { "epoch": 0.44485364524254967, "grad_norm": 1.3806732710252059, "learning_rate": 6.794383926803386e-06, "loss": 0.4117, "step": 15099 }, { "epoch": 0.44488310770001327, "grad_norm": 1.6661583789111571, "learning_rate": 6.79390393380033e-06, "loss": 0.5563, "step": 15100 }, { "epoch": 0.4449125701574768, "grad_norm": 1.6508382619541384, "learning_rate": 6.793423921821905e-06, "loss": 0.4908, "step": 15101 }, { "epoch": 0.4449420326149404, "grad_norm": 1.5403683881484254, "learning_rate": 6.792943890873188e-06, "loss": 0.4556, "step": 15102 }, { "epoch": 0.44497149507240397, "grad_norm": 1.549620412261822, "learning_rate": 6.792463840959257e-06, "loss": 0.4251, "step": 15103 }, { "epoch": 0.44500095752986757, "grad_norm": 1.5348722023530401, "learning_rate": 6.79198377208519e-06, "loss": 0.5483, "step": 15104 }, { "epoch": 0.4450304199873311, "grad_norm": 1.5740544738203897, "learning_rate": 6.791503684256064e-06, "loss": 0.4217, "step": 15105 }, { "epoch": 0.4450598824447947, "grad_norm": 1.5128508267825092, "learning_rate": 6.7910235774769585e-06, "loss": 0.301, "step": 15106 }, { "epoch": 0.4450893449022583, "grad_norm": 1.5705856977108976, "learning_rate": 6.790543451752952e-06, "loss": 0.4959, "step": 15107 }, { "epoch": 0.44511880735972187, "grad_norm": 1.4518694221210175, "learning_rate": 6.790063307089123e-06, "loss": 0.4623, "step": 15108 }, { "epoch": 0.44514826981718547, "grad_norm": 1.5567201766916219, "learning_rate": 6.789583143490548e-06, "loss": 0.5099, "step": 15109 }, { "epoch": 0.445177732274649, "grad_norm": 1.559686890950666, "learning_rate": 6.7891029609623096e-06, "loss": 0.5252, "step": 15110 }, { "epoch": 0.4452071947321126, "grad_norm": 1.5663314783549775, "learning_rate": 6.788622759509484e-06, "loss": 0.5005, "step": 15111 }, { "epoch": 0.44523665718957617, "grad_norm": 1.5139307512193454, "learning_rate": 6.788142539137152e-06, "loss": 0.5279, "step": 15112 }, { "epoch": 0.44526611964703977, "grad_norm": 1.8568587284360838, "learning_rate": 6.787662299850394e-06, "loss": 0.5281, "step": 15113 }, { "epoch": 0.4452955821045033, "grad_norm": 1.6442096971696316, "learning_rate": 6.78718204165429e-06, "loss": 0.5775, "step": 15114 }, { "epoch": 0.4453250445619669, "grad_norm": 1.7022212174222138, "learning_rate": 6.786701764553917e-06, "loss": 0.6018, "step": 15115 }, { "epoch": 0.44535450701943047, "grad_norm": 1.648910067983956, "learning_rate": 6.786221468554358e-06, "loss": 0.4139, "step": 15116 }, { "epoch": 0.44538396947689407, "grad_norm": 1.444780485080353, "learning_rate": 6.785741153660693e-06, "loss": 0.4252, "step": 15117 }, { "epoch": 0.4454134319343576, "grad_norm": 1.3976294236318012, "learning_rate": 6.785260819878001e-06, "loss": 0.5206, "step": 15118 }, { "epoch": 0.4454428943918212, "grad_norm": 1.4478366867439676, "learning_rate": 6.784780467211365e-06, "loss": 0.5174, "step": 15119 }, { "epoch": 0.4454723568492848, "grad_norm": 1.819119664805471, "learning_rate": 6.784300095665866e-06, "loss": 0.5128, "step": 15120 }, { "epoch": 0.44550181930674837, "grad_norm": 1.461814860308589, "learning_rate": 6.783819705246584e-06, "loss": 0.4882, "step": 15121 }, { "epoch": 0.445531281764212, "grad_norm": 1.501831278215889, "learning_rate": 6.7833392959586e-06, "loss": 0.5038, "step": 15122 }, { "epoch": 0.4455607442216755, "grad_norm": 1.3228309980568842, "learning_rate": 6.782858867806997e-06, "loss": 0.3891, "step": 15123 }, { "epoch": 0.4455902066791391, "grad_norm": 1.3483873901628218, "learning_rate": 6.782378420796856e-06, "loss": 0.3688, "step": 15124 }, { "epoch": 0.44561966913660267, "grad_norm": 1.5445597377865883, "learning_rate": 6.781897954933259e-06, "loss": 0.4311, "step": 15125 }, { "epoch": 0.4456491315940663, "grad_norm": 1.67405331786921, "learning_rate": 6.78141747022129e-06, "loss": 0.4601, "step": 15126 }, { "epoch": 0.4456785940515298, "grad_norm": 1.376574530321129, "learning_rate": 6.780936966666028e-06, "loss": 0.4871, "step": 15127 }, { "epoch": 0.4457080565089934, "grad_norm": 1.5787829463008503, "learning_rate": 6.780456444272559e-06, "loss": 0.5142, "step": 15128 }, { "epoch": 0.44573751896645697, "grad_norm": 1.383101622430993, "learning_rate": 6.779975903045963e-06, "loss": 0.3852, "step": 15129 }, { "epoch": 0.44576698142392057, "grad_norm": 1.4740690127558582, "learning_rate": 6.7794953429913245e-06, "loss": 0.4119, "step": 15130 }, { "epoch": 0.4457964438813841, "grad_norm": 1.6730369662433446, "learning_rate": 6.779014764113727e-06, "loss": 0.508, "step": 15131 }, { "epoch": 0.4458259063388477, "grad_norm": 1.4459704160675677, "learning_rate": 6.778534166418255e-06, "loss": 0.437, "step": 15132 }, { "epoch": 0.4458553687963113, "grad_norm": 1.6147502880858444, "learning_rate": 6.778053549909989e-06, "loss": 0.4375, "step": 15133 }, { "epoch": 0.44588483125377487, "grad_norm": 1.44674269427874, "learning_rate": 6.777572914594014e-06, "loss": 0.4659, "step": 15134 }, { "epoch": 0.4459142937112385, "grad_norm": 1.6353025867198934, "learning_rate": 6.777092260475415e-06, "loss": 0.4279, "step": 15135 }, { "epoch": 0.445943756168702, "grad_norm": 1.85320760949973, "learning_rate": 6.776611587559275e-06, "loss": 0.4852, "step": 15136 }, { "epoch": 0.4459732186261656, "grad_norm": 1.4997580395768857, "learning_rate": 6.776130895850681e-06, "loss": 0.3428, "step": 15137 }, { "epoch": 0.44600268108362917, "grad_norm": 1.5643258564654692, "learning_rate": 6.775650185354714e-06, "loss": 0.5781, "step": 15138 }, { "epoch": 0.4460321435410928, "grad_norm": 1.6038903898766206, "learning_rate": 6.77516945607646e-06, "loss": 0.4167, "step": 15139 }, { "epoch": 0.4460616059985563, "grad_norm": 1.5010996479487095, "learning_rate": 6.774688708021005e-06, "loss": 0.5233, "step": 15140 }, { "epoch": 0.4460910684560199, "grad_norm": 1.3547352341088208, "learning_rate": 6.774207941193435e-06, "loss": 0.407, "step": 15141 }, { "epoch": 0.44612053091348347, "grad_norm": 1.4013641902403977, "learning_rate": 6.773727155598834e-06, "loss": 0.3176, "step": 15142 }, { "epoch": 0.4461499933709471, "grad_norm": 1.5655374290910926, "learning_rate": 6.773246351242286e-06, "loss": 0.4518, "step": 15143 }, { "epoch": 0.4461794558284106, "grad_norm": 1.3830753537916978, "learning_rate": 6.77276552812888e-06, "loss": 0.5098, "step": 15144 }, { "epoch": 0.4462089182858742, "grad_norm": 1.3286834597335495, "learning_rate": 6.772284686263699e-06, "loss": 0.3252, "step": 15145 }, { "epoch": 0.4462383807433378, "grad_norm": 1.56756049827534, "learning_rate": 6.7718038256518316e-06, "loss": 0.5537, "step": 15146 }, { "epoch": 0.4462678432008014, "grad_norm": 1.5729574345649118, "learning_rate": 6.7713229462983655e-06, "loss": 0.4221, "step": 15147 }, { "epoch": 0.446297305658265, "grad_norm": 1.3000392851899358, "learning_rate": 6.770842048208382e-06, "loss": 0.3498, "step": 15148 }, { "epoch": 0.4463267681157285, "grad_norm": 1.752754089761632, "learning_rate": 6.770361131386971e-06, "loss": 0.5028, "step": 15149 }, { "epoch": 0.4463562305731921, "grad_norm": 1.4903607086450574, "learning_rate": 6.769880195839221e-06, "loss": 0.4879, "step": 15150 }, { "epoch": 0.4463856930306557, "grad_norm": 1.5046181324657473, "learning_rate": 6.769399241570217e-06, "loss": 0.5763, "step": 15151 }, { "epoch": 0.4464151554881193, "grad_norm": 1.3534844559532266, "learning_rate": 6.768918268585048e-06, "loss": 0.3732, "step": 15152 }, { "epoch": 0.4464446179455828, "grad_norm": 1.4717394450808279, "learning_rate": 6.7684372768888e-06, "loss": 0.4389, "step": 15153 }, { "epoch": 0.4464740804030464, "grad_norm": 1.3018070594869766, "learning_rate": 6.767956266486561e-06, "loss": 0.2883, "step": 15154 }, { "epoch": 0.44650354286050997, "grad_norm": 1.4722226263488274, "learning_rate": 6.767475237383419e-06, "loss": 0.4723, "step": 15155 }, { "epoch": 0.4465330053179736, "grad_norm": 1.5129672365126245, "learning_rate": 6.7669941895844635e-06, "loss": 0.4531, "step": 15156 }, { "epoch": 0.4465624677754371, "grad_norm": 1.3781380976821611, "learning_rate": 6.766513123094782e-06, "loss": 0.446, "step": 15157 }, { "epoch": 0.4465919302329007, "grad_norm": 1.6181805439662869, "learning_rate": 6.766032037919463e-06, "loss": 0.4247, "step": 15158 }, { "epoch": 0.4466213926903643, "grad_norm": 1.6365024695014545, "learning_rate": 6.7655509340635946e-06, "loss": 0.4824, "step": 15159 }, { "epoch": 0.4466508551478279, "grad_norm": 1.421663625047723, "learning_rate": 6.7650698115322676e-06, "loss": 0.4668, "step": 15160 }, { "epoch": 0.4466803176052915, "grad_norm": 1.7553869362092156, "learning_rate": 6.764588670330568e-06, "loss": 0.5302, "step": 15161 }, { "epoch": 0.446709780062755, "grad_norm": 1.5640609349778887, "learning_rate": 6.764107510463591e-06, "loss": 0.4112, "step": 15162 }, { "epoch": 0.4467392425202186, "grad_norm": 1.4932139784765834, "learning_rate": 6.763626331936419e-06, "loss": 0.4482, "step": 15163 }, { "epoch": 0.4467687049776822, "grad_norm": 1.374142213730235, "learning_rate": 6.763145134754146e-06, "loss": 0.378, "step": 15164 }, { "epoch": 0.4467981674351458, "grad_norm": 1.4555498765461947, "learning_rate": 6.762663918921861e-06, "loss": 0.4786, "step": 15165 }, { "epoch": 0.4468276298926093, "grad_norm": 1.5071611460325376, "learning_rate": 6.762182684444655e-06, "loss": 0.464, "step": 15166 }, { "epoch": 0.4468570923500729, "grad_norm": 1.4781347569594785, "learning_rate": 6.761701431327618e-06, "loss": 0.4654, "step": 15167 }, { "epoch": 0.4468865548075365, "grad_norm": 1.5391739714078965, "learning_rate": 6.761220159575839e-06, "loss": 0.4136, "step": 15168 }, { "epoch": 0.4469160172650001, "grad_norm": 1.4749248216335424, "learning_rate": 6.76073886919441e-06, "loss": 0.503, "step": 15169 }, { "epoch": 0.4469454797224636, "grad_norm": 1.5604366913525614, "learning_rate": 6.760257560188421e-06, "loss": 0.5151, "step": 15170 }, { "epoch": 0.4469749421799272, "grad_norm": 1.4381767228587552, "learning_rate": 6.759776232562967e-06, "loss": 0.4358, "step": 15171 }, { "epoch": 0.44700440463739083, "grad_norm": 1.3595588181100469, "learning_rate": 6.759294886323133e-06, "loss": 0.3428, "step": 15172 }, { "epoch": 0.4470338670948544, "grad_norm": 1.388609236395438, "learning_rate": 6.7588135214740155e-06, "loss": 0.4603, "step": 15173 }, { "epoch": 0.447063329552318, "grad_norm": 1.3845203512417326, "learning_rate": 6.7583321380207045e-06, "loss": 0.3972, "step": 15174 }, { "epoch": 0.4470927920097815, "grad_norm": 1.4988292540037642, "learning_rate": 6.757850735968292e-06, "loss": 0.4903, "step": 15175 }, { "epoch": 0.44712225446724513, "grad_norm": 1.4899768100546154, "learning_rate": 6.757369315321869e-06, "loss": 0.4542, "step": 15176 }, { "epoch": 0.4471517169247087, "grad_norm": 1.5991485953238267, "learning_rate": 6.756887876086532e-06, "loss": 0.4381, "step": 15177 }, { "epoch": 0.4471811793821723, "grad_norm": 1.5829413269024222, "learning_rate": 6.756406418267367e-06, "loss": 0.4622, "step": 15178 }, { "epoch": 0.4472106418396358, "grad_norm": 1.4986260273857352, "learning_rate": 6.755924941869471e-06, "loss": 0.3805, "step": 15179 }, { "epoch": 0.44724010429709943, "grad_norm": 1.2934354726972186, "learning_rate": 6.755443446897938e-06, "loss": 0.4584, "step": 15180 }, { "epoch": 0.447269566754563, "grad_norm": 1.3800838136747808, "learning_rate": 6.7549619333578574e-06, "loss": 0.3828, "step": 15181 }, { "epoch": 0.4472990292120266, "grad_norm": 1.5363454856522476, "learning_rate": 6.754480401254325e-06, "loss": 0.5759, "step": 15182 }, { "epoch": 0.4473284916694901, "grad_norm": 1.589577472870833, "learning_rate": 6.753998850592433e-06, "loss": 0.4542, "step": 15183 }, { "epoch": 0.4473579541269537, "grad_norm": 1.5373713536237594, "learning_rate": 6.753517281377276e-06, "loss": 0.6149, "step": 15184 }, { "epoch": 0.44738741658441733, "grad_norm": 1.3871990167271875, "learning_rate": 6.7530356936139474e-06, "loss": 0.3453, "step": 15185 }, { "epoch": 0.4474168790418809, "grad_norm": 1.4464870197981516, "learning_rate": 6.752554087307543e-06, "loss": 0.4521, "step": 15186 }, { "epoch": 0.4474463414993445, "grad_norm": 1.4671257754617024, "learning_rate": 6.7520724624631555e-06, "loss": 0.5802, "step": 15187 }, { "epoch": 0.447475803956808, "grad_norm": 1.4003077571328422, "learning_rate": 6.751590819085877e-06, "loss": 0.4347, "step": 15188 }, { "epoch": 0.44750526641427163, "grad_norm": 1.6849372262913835, "learning_rate": 6.7511091571808065e-06, "loss": 0.4927, "step": 15189 }, { "epoch": 0.4475347288717352, "grad_norm": 1.574511815222627, "learning_rate": 6.7506274767530355e-06, "loss": 0.3783, "step": 15190 }, { "epoch": 0.4475641913291988, "grad_norm": 1.8117352283543733, "learning_rate": 6.7501457778076615e-06, "loss": 0.441, "step": 15191 }, { "epoch": 0.4475936537866623, "grad_norm": 1.5822143601963778, "learning_rate": 6.749664060349781e-06, "loss": 0.4487, "step": 15192 }, { "epoch": 0.44762311624412593, "grad_norm": 1.6155396881207305, "learning_rate": 6.749182324384485e-06, "loss": 0.6403, "step": 15193 }, { "epoch": 0.4476525787015895, "grad_norm": 1.358411047371252, "learning_rate": 6.748700569916871e-06, "loss": 0.4364, "step": 15194 }, { "epoch": 0.4476820411590531, "grad_norm": 1.5567763498225862, "learning_rate": 6.748218796952035e-06, "loss": 0.3182, "step": 15195 }, { "epoch": 0.4477115036165166, "grad_norm": 1.4649903278578777, "learning_rate": 6.747737005495074e-06, "loss": 0.4827, "step": 15196 }, { "epoch": 0.44774096607398023, "grad_norm": 1.4934804407596094, "learning_rate": 6.747255195551085e-06, "loss": 0.4502, "step": 15197 }, { "epoch": 0.44777042853144383, "grad_norm": 1.507968533535495, "learning_rate": 6.746773367125163e-06, "loss": 0.3775, "step": 15198 }, { "epoch": 0.4477998909889074, "grad_norm": 1.3864834320296022, "learning_rate": 6.746291520222402e-06, "loss": 0.4394, "step": 15199 }, { "epoch": 0.447829353446371, "grad_norm": 1.4634893739134325, "learning_rate": 6.745809654847902e-06, "loss": 0.5641, "step": 15200 }, { "epoch": 0.44785881590383453, "grad_norm": 1.7052500558738302, "learning_rate": 6.745327771006762e-06, "loss": 0.6123, "step": 15201 }, { "epoch": 0.44788827836129813, "grad_norm": 1.6625247970379806, "learning_rate": 6.744845868704075e-06, "loss": 0.4361, "step": 15202 }, { "epoch": 0.4479177408187617, "grad_norm": 1.3466697057334276, "learning_rate": 6.744363947944941e-06, "loss": 0.4518, "step": 15203 }, { "epoch": 0.4479472032762253, "grad_norm": 1.54278482507071, "learning_rate": 6.743882008734457e-06, "loss": 0.2938, "step": 15204 }, { "epoch": 0.44797666573368883, "grad_norm": 1.541028979036005, "learning_rate": 6.743400051077719e-06, "loss": 0.586, "step": 15205 }, { "epoch": 0.44800612819115243, "grad_norm": 1.4528668188805522, "learning_rate": 6.742918074979827e-06, "loss": 0.3845, "step": 15206 }, { "epoch": 0.448035590648616, "grad_norm": 1.616100266680557, "learning_rate": 6.74243608044588e-06, "loss": 0.4588, "step": 15207 }, { "epoch": 0.4480650531060796, "grad_norm": 1.7277205685852595, "learning_rate": 6.741954067480974e-06, "loss": 0.4179, "step": 15208 }, { "epoch": 0.4480945155635431, "grad_norm": 1.4685158805017002, "learning_rate": 6.7414720360902085e-06, "loss": 0.4893, "step": 15209 }, { "epoch": 0.44812397802100673, "grad_norm": 1.6562196265686262, "learning_rate": 6.740989986278684e-06, "loss": 0.6807, "step": 15210 }, { "epoch": 0.44815344047847033, "grad_norm": 1.4793648029656528, "learning_rate": 6.7405079180514964e-06, "loss": 0.5034, "step": 15211 }, { "epoch": 0.4481829029359339, "grad_norm": 1.6791807883413115, "learning_rate": 6.740025831413748e-06, "loss": 0.638, "step": 15212 }, { "epoch": 0.4482123653933975, "grad_norm": 1.468970869883939, "learning_rate": 6.7395437263705354e-06, "loss": 0.3395, "step": 15213 }, { "epoch": 0.44824182785086103, "grad_norm": 1.6342517336095503, "learning_rate": 6.7390616029269605e-06, "loss": 0.5275, "step": 15214 }, { "epoch": 0.44827129030832463, "grad_norm": 1.5294892166470604, "learning_rate": 6.73857946108812e-06, "loss": 0.5267, "step": 15215 }, { "epoch": 0.4483007527657882, "grad_norm": 1.2177073393776083, "learning_rate": 6.738097300859119e-06, "loss": 0.291, "step": 15216 }, { "epoch": 0.4483302152232518, "grad_norm": 1.486821151497301, "learning_rate": 6.737615122245053e-06, "loss": 0.5811, "step": 15217 }, { "epoch": 0.44835967768071533, "grad_norm": 1.4782734164300637, "learning_rate": 6.737132925251022e-06, "loss": 0.4169, "step": 15218 }, { "epoch": 0.44838914013817893, "grad_norm": 1.4841264581895397, "learning_rate": 6.736650709882131e-06, "loss": 0.4253, "step": 15219 }, { "epoch": 0.4484186025956425, "grad_norm": 1.4722867488415206, "learning_rate": 6.736168476143477e-06, "loss": 0.3145, "step": 15220 }, { "epoch": 0.4484480650531061, "grad_norm": 1.298921861773326, "learning_rate": 6.73568622404016e-06, "loss": 0.3659, "step": 15221 }, { "epoch": 0.44847752751056963, "grad_norm": 1.794829552559645, "learning_rate": 6.735203953577285e-06, "loss": 0.4929, "step": 15222 }, { "epoch": 0.44850698996803323, "grad_norm": 1.501325990581814, "learning_rate": 6.734721664759952e-06, "loss": 0.5472, "step": 15223 }, { "epoch": 0.44853645242549683, "grad_norm": 1.492560475132483, "learning_rate": 6.73423935759326e-06, "loss": 0.5002, "step": 15224 }, { "epoch": 0.4485659148829604, "grad_norm": 1.4200436021538851, "learning_rate": 6.733757032082315e-06, "loss": 0.3685, "step": 15225 }, { "epoch": 0.448595377340424, "grad_norm": 1.6320614445258597, "learning_rate": 6.733274688232215e-06, "loss": 0.5474, "step": 15226 }, { "epoch": 0.44862483979788753, "grad_norm": 1.58895717055213, "learning_rate": 6.7327923260480645e-06, "loss": 0.3468, "step": 15227 }, { "epoch": 0.44865430225535113, "grad_norm": 1.4731979642814323, "learning_rate": 6.732309945534964e-06, "loss": 0.4444, "step": 15228 }, { "epoch": 0.4486837647128147, "grad_norm": 1.502775596063465, "learning_rate": 6.731827546698017e-06, "loss": 0.4788, "step": 15229 }, { "epoch": 0.4487132271702783, "grad_norm": 1.557354286507225, "learning_rate": 6.731345129542326e-06, "loss": 0.5381, "step": 15230 }, { "epoch": 0.44874268962774183, "grad_norm": 1.652476940907056, "learning_rate": 6.730862694072994e-06, "loss": 0.4822, "step": 15231 }, { "epoch": 0.44877215208520543, "grad_norm": 1.5290432389037114, "learning_rate": 6.730380240295124e-06, "loss": 0.5081, "step": 15232 }, { "epoch": 0.448801614542669, "grad_norm": 1.4303178274830863, "learning_rate": 6.729897768213819e-06, "loss": 0.4172, "step": 15233 }, { "epoch": 0.4488310770001326, "grad_norm": 1.4758553799797745, "learning_rate": 6.7294152778341815e-06, "loss": 0.3529, "step": 15234 }, { "epoch": 0.44886053945759613, "grad_norm": 1.5108654580619136, "learning_rate": 6.728932769161318e-06, "loss": 0.4155, "step": 15235 }, { "epoch": 0.44889000191505973, "grad_norm": 1.4226927716952016, "learning_rate": 6.72845024220033e-06, "loss": 0.422, "step": 15236 }, { "epoch": 0.44891946437252334, "grad_norm": 1.5809105360480367, "learning_rate": 6.7279676969563226e-06, "loss": 0.5133, "step": 15237 }, { "epoch": 0.4489489268299869, "grad_norm": 1.6098844716947047, "learning_rate": 6.7274851334344e-06, "loss": 0.4898, "step": 15238 }, { "epoch": 0.4489783892874505, "grad_norm": 1.5553400714637875, "learning_rate": 6.727002551639663e-06, "loss": 0.4806, "step": 15239 }, { "epoch": 0.44900785174491403, "grad_norm": 1.549488887951924, "learning_rate": 6.7265199515772216e-06, "loss": 0.4676, "step": 15240 }, { "epoch": 0.44903731420237764, "grad_norm": 1.5364143350123072, "learning_rate": 6.726037333252178e-06, "loss": 0.4263, "step": 15241 }, { "epoch": 0.4490667766598412, "grad_norm": 1.6592228599799586, "learning_rate": 6.7255546966696384e-06, "loss": 0.4694, "step": 15242 }, { "epoch": 0.4490962391173048, "grad_norm": 1.4416393390958904, "learning_rate": 6.725072041834706e-06, "loss": 0.5185, "step": 15243 }, { "epoch": 0.44912570157476833, "grad_norm": 1.607150828129875, "learning_rate": 6.7245893687524875e-06, "loss": 0.4343, "step": 15244 }, { "epoch": 0.44915516403223193, "grad_norm": 1.4711553666847528, "learning_rate": 6.724106677428088e-06, "loss": 0.4649, "step": 15245 }, { "epoch": 0.4491846264896955, "grad_norm": 1.380719328279965, "learning_rate": 6.723623967866615e-06, "loss": 0.3385, "step": 15246 }, { "epoch": 0.4492140889471591, "grad_norm": 1.4482617007896503, "learning_rate": 6.723141240073172e-06, "loss": 0.5907, "step": 15247 }, { "epoch": 0.44924355140462263, "grad_norm": 1.2678203649121633, "learning_rate": 6.722658494052865e-06, "loss": 0.3274, "step": 15248 }, { "epoch": 0.44927301386208623, "grad_norm": 1.566749790584818, "learning_rate": 6.722175729810803e-06, "loss": 0.3938, "step": 15249 }, { "epoch": 0.44930247631954984, "grad_norm": 1.6883532890548303, "learning_rate": 6.72169294735209e-06, "loss": 0.6108, "step": 15250 }, { "epoch": 0.4493319387770134, "grad_norm": 1.5004457250467054, "learning_rate": 6.721210146681834e-06, "loss": 0.4957, "step": 15251 }, { "epoch": 0.449361401234477, "grad_norm": 1.5808431228444717, "learning_rate": 6.7207273278051425e-06, "loss": 0.5601, "step": 15252 }, { "epoch": 0.44939086369194053, "grad_norm": 1.560207327822521, "learning_rate": 6.720244490727122e-06, "loss": 0.6024, "step": 15253 }, { "epoch": 0.44942032614940414, "grad_norm": 1.2328327977950435, "learning_rate": 6.7197616354528775e-06, "loss": 0.3121, "step": 15254 }, { "epoch": 0.4494497886068677, "grad_norm": 1.4588194193900317, "learning_rate": 6.719278761987517e-06, "loss": 0.4444, "step": 15255 }, { "epoch": 0.4494792510643313, "grad_norm": 1.4925413348106529, "learning_rate": 6.718795870336155e-06, "loss": 0.4429, "step": 15256 }, { "epoch": 0.44950871352179483, "grad_norm": 1.3758812763133663, "learning_rate": 6.718312960503891e-06, "loss": 0.4204, "step": 15257 }, { "epoch": 0.44953817597925844, "grad_norm": 1.8261875236780387, "learning_rate": 6.717830032495835e-06, "loss": 0.6371, "step": 15258 }, { "epoch": 0.449567638436722, "grad_norm": 1.5112697485997941, "learning_rate": 6.717347086317099e-06, "loss": 0.5111, "step": 15259 }, { "epoch": 0.4495971008941856, "grad_norm": 1.8025775611699397, "learning_rate": 6.716864121972788e-06, "loss": 0.3828, "step": 15260 }, { "epoch": 0.44962656335164913, "grad_norm": 1.4001538871719335, "learning_rate": 6.716381139468011e-06, "loss": 0.3711, "step": 15261 }, { "epoch": 0.44965602580911274, "grad_norm": 1.515521138966502, "learning_rate": 6.715898138807879e-06, "loss": 0.4292, "step": 15262 }, { "epoch": 0.44968548826657634, "grad_norm": 1.5729726760242362, "learning_rate": 6.715415119997497e-06, "loss": 0.4577, "step": 15263 }, { "epoch": 0.4497149507240399, "grad_norm": 1.8419512022351765, "learning_rate": 6.7149320830419776e-06, "loss": 0.6469, "step": 15264 }, { "epoch": 0.4497444131815035, "grad_norm": 1.8133103790213951, "learning_rate": 6.71444902794643e-06, "loss": 0.6071, "step": 15265 }, { "epoch": 0.44977387563896704, "grad_norm": 1.3902529392069631, "learning_rate": 6.713965954715963e-06, "loss": 0.3922, "step": 15266 }, { "epoch": 0.44980333809643064, "grad_norm": 1.612779900359756, "learning_rate": 6.713482863355686e-06, "loss": 0.3528, "step": 15267 }, { "epoch": 0.4498328005538942, "grad_norm": 1.4855055292745145, "learning_rate": 6.71299975387071e-06, "loss": 0.5174, "step": 15268 }, { "epoch": 0.4498622630113578, "grad_norm": 1.3696350529146606, "learning_rate": 6.712516626266143e-06, "loss": 0.5167, "step": 15269 }, { "epoch": 0.44989172546882134, "grad_norm": 1.7075452297051588, "learning_rate": 6.7120334805470975e-06, "loss": 0.508, "step": 15270 }, { "epoch": 0.44992118792628494, "grad_norm": 1.5361673563087497, "learning_rate": 6.711550316718687e-06, "loss": 0.5673, "step": 15271 }, { "epoch": 0.4499506503837485, "grad_norm": 1.5493130497675667, "learning_rate": 6.711067134786016e-06, "loss": 0.4823, "step": 15272 }, { "epoch": 0.4499801128412121, "grad_norm": 1.4637093561805783, "learning_rate": 6.7105839347541966e-06, "loss": 0.4189, "step": 15273 }, { "epoch": 0.4500095752986757, "grad_norm": 1.5857269732038266, "learning_rate": 6.710100716628345e-06, "loss": 0.3432, "step": 15274 }, { "epoch": 0.45003903775613924, "grad_norm": 1.5596440436349857, "learning_rate": 6.709617480413567e-06, "loss": 0.4078, "step": 15275 }, { "epoch": 0.45006850021360284, "grad_norm": 1.4147414255854862, "learning_rate": 6.709134226114977e-06, "loss": 0.3941, "step": 15276 }, { "epoch": 0.4500979626710664, "grad_norm": 1.4809490101335994, "learning_rate": 6.708650953737688e-06, "loss": 0.4526, "step": 15277 }, { "epoch": 0.45012742512853, "grad_norm": 1.4221738893528262, "learning_rate": 6.708167663286807e-06, "loss": 0.4664, "step": 15278 }, { "epoch": 0.45015688758599354, "grad_norm": 1.3931924913706515, "learning_rate": 6.70768435476745e-06, "loss": 0.3986, "step": 15279 }, { "epoch": 0.45018635004345714, "grad_norm": 1.5995203402684088, "learning_rate": 6.7072010281847276e-06, "loss": 0.5199, "step": 15280 }, { "epoch": 0.4502158125009207, "grad_norm": 1.6781001630252919, "learning_rate": 6.706717683543755e-06, "loss": 0.6408, "step": 15281 }, { "epoch": 0.4502452749583843, "grad_norm": 1.352268283371567, "learning_rate": 6.70623432084964e-06, "loss": 0.4502, "step": 15282 }, { "epoch": 0.45027473741584784, "grad_norm": 1.511788525554673, "learning_rate": 6.705750940107502e-06, "loss": 0.3964, "step": 15283 }, { "epoch": 0.45030419987331144, "grad_norm": 1.4989106047365766, "learning_rate": 6.7052675413224475e-06, "loss": 0.4586, "step": 15284 }, { "epoch": 0.450333662330775, "grad_norm": 1.550698700925699, "learning_rate": 6.704784124499593e-06, "loss": 0.4377, "step": 15285 }, { "epoch": 0.4503631247882386, "grad_norm": 1.5848878547516638, "learning_rate": 6.704300689644054e-06, "loss": 0.4059, "step": 15286 }, { "epoch": 0.4503925872457022, "grad_norm": 1.6191157518087038, "learning_rate": 6.703817236760939e-06, "loss": 0.4038, "step": 15287 }, { "epoch": 0.45042204970316574, "grad_norm": 1.6547475109839118, "learning_rate": 6.703333765855365e-06, "loss": 0.5292, "step": 15288 }, { "epoch": 0.45045151216062934, "grad_norm": 1.4340495588208233, "learning_rate": 6.702850276932446e-06, "loss": 0.3647, "step": 15289 }, { "epoch": 0.4504809746180929, "grad_norm": 1.5215511174944085, "learning_rate": 6.702366769997295e-06, "loss": 0.377, "step": 15290 }, { "epoch": 0.4505104370755565, "grad_norm": 1.5640078713685182, "learning_rate": 6.701883245055029e-06, "loss": 0.4558, "step": 15291 }, { "epoch": 0.45053989953302004, "grad_norm": 1.6420579956798327, "learning_rate": 6.7013997021107605e-06, "loss": 0.4739, "step": 15292 }, { "epoch": 0.45056936199048364, "grad_norm": 1.3925520146874764, "learning_rate": 6.700916141169602e-06, "loss": 0.3557, "step": 15293 }, { "epoch": 0.4505988244479472, "grad_norm": 1.777413913412866, "learning_rate": 6.700432562236672e-06, "loss": 0.6865, "step": 15294 }, { "epoch": 0.4506282869054108, "grad_norm": 1.2936721491162908, "learning_rate": 6.699948965317086e-06, "loss": 0.3778, "step": 15295 }, { "epoch": 0.45065774936287434, "grad_norm": 1.4934424923891396, "learning_rate": 6.6994653504159566e-06, "loss": 0.4013, "step": 15296 }, { "epoch": 0.45068721182033794, "grad_norm": 1.4780085035209427, "learning_rate": 6.698981717538401e-06, "loss": 0.3189, "step": 15297 }, { "epoch": 0.4507166742778015, "grad_norm": 1.4881212892696951, "learning_rate": 6.698498066689535e-06, "loss": 0.4853, "step": 15298 }, { "epoch": 0.4507461367352651, "grad_norm": 1.452257952458918, "learning_rate": 6.698014397874473e-06, "loss": 0.3731, "step": 15299 }, { "epoch": 0.4507755991927287, "grad_norm": 1.6909915231709591, "learning_rate": 6.697530711098332e-06, "loss": 0.4535, "step": 15300 }, { "epoch": 0.45080506165019224, "grad_norm": 1.634188323072407, "learning_rate": 6.69704700636623e-06, "loss": 0.5576, "step": 15301 }, { "epoch": 0.45083452410765584, "grad_norm": 1.5377085801331773, "learning_rate": 6.6965632836832814e-06, "loss": 0.4231, "step": 15302 }, { "epoch": 0.4508639865651194, "grad_norm": 1.9685421057850552, "learning_rate": 6.696079543054603e-06, "loss": 0.678, "step": 15303 }, { "epoch": 0.450893449022583, "grad_norm": 1.6241319102904863, "learning_rate": 6.6955957844853125e-06, "loss": 0.4848, "step": 15304 }, { "epoch": 0.45092291148004654, "grad_norm": 1.5179024692245155, "learning_rate": 6.695112007980527e-06, "loss": 0.4387, "step": 15305 }, { "epoch": 0.45095237393751014, "grad_norm": 1.501016733403968, "learning_rate": 6.694628213545362e-06, "loss": 0.5383, "step": 15306 }, { "epoch": 0.4509818363949737, "grad_norm": 1.4282355504687363, "learning_rate": 6.694144401184937e-06, "loss": 0.3004, "step": 15307 }, { "epoch": 0.4510112988524373, "grad_norm": 1.8403294767166332, "learning_rate": 6.693660570904369e-06, "loss": 0.5595, "step": 15308 }, { "epoch": 0.45104076130990084, "grad_norm": 1.5961820541319303, "learning_rate": 6.693176722708775e-06, "loss": 0.3971, "step": 15309 }, { "epoch": 0.45107022376736444, "grad_norm": 1.5619904849297013, "learning_rate": 6.6926928566032745e-06, "loss": 0.5745, "step": 15310 }, { "epoch": 0.451099686224828, "grad_norm": 1.365753067474881, "learning_rate": 6.692208972592985e-06, "loss": 0.4764, "step": 15311 }, { "epoch": 0.4511291486822916, "grad_norm": 1.7876989751741617, "learning_rate": 6.691725070683023e-06, "loss": 0.5273, "step": 15312 }, { "epoch": 0.4511586111397552, "grad_norm": 1.9047532393413484, "learning_rate": 6.69124115087851e-06, "loss": 0.6306, "step": 15313 }, { "epoch": 0.45118807359721874, "grad_norm": 1.5335192305629735, "learning_rate": 6.6907572131845635e-06, "loss": 0.461, "step": 15314 }, { "epoch": 0.45121753605468234, "grad_norm": 1.480590073611545, "learning_rate": 6.690273257606301e-06, "loss": 0.421, "step": 15315 }, { "epoch": 0.4512469985121459, "grad_norm": 1.5489457262951571, "learning_rate": 6.689789284148845e-06, "loss": 0.3551, "step": 15316 }, { "epoch": 0.4512764609696095, "grad_norm": 1.627353744956986, "learning_rate": 6.689305292817313e-06, "loss": 0.5084, "step": 15317 }, { "epoch": 0.45130592342707304, "grad_norm": 1.447405062270163, "learning_rate": 6.688821283616824e-06, "loss": 0.5015, "step": 15318 }, { "epoch": 0.45133538588453664, "grad_norm": 1.576211815297695, "learning_rate": 6.688337256552498e-06, "loss": 0.5306, "step": 15319 }, { "epoch": 0.4513648483420002, "grad_norm": 1.2846474605763307, "learning_rate": 6.6878532116294545e-06, "loss": 0.3407, "step": 15320 }, { "epoch": 0.4513943107994638, "grad_norm": 1.5526496028282148, "learning_rate": 6.687369148852815e-06, "loss": 0.5049, "step": 15321 }, { "epoch": 0.45142377325692734, "grad_norm": 1.4312319878488824, "learning_rate": 6.686885068227698e-06, "loss": 0.5444, "step": 15322 }, { "epoch": 0.45145323571439094, "grad_norm": 1.4615793295721278, "learning_rate": 6.686400969759226e-06, "loss": 0.5449, "step": 15323 }, { "epoch": 0.4514826981718545, "grad_norm": 1.70066019890052, "learning_rate": 6.685916853452516e-06, "loss": 0.3244, "step": 15324 }, { "epoch": 0.4515121606293181, "grad_norm": 1.5525446898055497, "learning_rate": 6.685432719312694e-06, "loss": 0.5153, "step": 15325 }, { "epoch": 0.4515416230867817, "grad_norm": 1.5987618681630753, "learning_rate": 6.684948567344879e-06, "loss": 0.3589, "step": 15326 }, { "epoch": 0.45157108554424524, "grad_norm": 1.4758383326277331, "learning_rate": 6.684464397554189e-06, "loss": 0.5248, "step": 15327 }, { "epoch": 0.45160054800170885, "grad_norm": 1.3846811799508942, "learning_rate": 6.683980209945748e-06, "loss": 0.4066, "step": 15328 }, { "epoch": 0.4516300104591724, "grad_norm": 1.4515255260486724, "learning_rate": 6.68349600452468e-06, "loss": 0.3574, "step": 15329 }, { "epoch": 0.451659472916636, "grad_norm": 1.5396501527969706, "learning_rate": 6.683011781296103e-06, "loss": 0.5839, "step": 15330 }, { "epoch": 0.45168893537409954, "grad_norm": 1.35101051097145, "learning_rate": 6.68252754026514e-06, "loss": 0.4318, "step": 15331 }, { "epoch": 0.45171839783156315, "grad_norm": 1.6970653002106526, "learning_rate": 6.682043281436914e-06, "loss": 0.6491, "step": 15332 }, { "epoch": 0.4517478602890267, "grad_norm": 1.4178381726484894, "learning_rate": 6.681559004816545e-06, "loss": 0.3775, "step": 15333 }, { "epoch": 0.4517773227464903, "grad_norm": 1.502747371263107, "learning_rate": 6.68107471040916e-06, "loss": 0.535, "step": 15334 }, { "epoch": 0.45180678520395384, "grad_norm": 1.6974810680620707, "learning_rate": 6.680590398219877e-06, "loss": 0.4297, "step": 15335 }, { "epoch": 0.45183624766141744, "grad_norm": 1.3383066097133098, "learning_rate": 6.680106068253822e-06, "loss": 0.455, "step": 15336 }, { "epoch": 0.451865710118881, "grad_norm": 1.5291746418102254, "learning_rate": 6.679621720516116e-06, "loss": 0.5459, "step": 15337 }, { "epoch": 0.4518951725763446, "grad_norm": 1.5834238937050478, "learning_rate": 6.679137355011886e-06, "loss": 0.4263, "step": 15338 }, { "epoch": 0.4519246350338082, "grad_norm": 1.559583424398106, "learning_rate": 6.678652971746249e-06, "loss": 0.4198, "step": 15339 }, { "epoch": 0.45195409749127174, "grad_norm": 1.5151857556672228, "learning_rate": 6.678168570724335e-06, "loss": 0.3683, "step": 15340 }, { "epoch": 0.45198355994873535, "grad_norm": 1.4738049645535376, "learning_rate": 6.677684151951265e-06, "loss": 0.4352, "step": 15341 }, { "epoch": 0.4520130224061989, "grad_norm": 1.6941508020537077, "learning_rate": 6.677199715432163e-06, "loss": 0.5138, "step": 15342 }, { "epoch": 0.4520424848636625, "grad_norm": 1.390055885488881, "learning_rate": 6.676715261172153e-06, "loss": 0.3903, "step": 15343 }, { "epoch": 0.45207194732112604, "grad_norm": 1.4240320318866935, "learning_rate": 6.676230789176361e-06, "loss": 0.3916, "step": 15344 }, { "epoch": 0.45210140977858965, "grad_norm": 1.6164574297034668, "learning_rate": 6.675746299449909e-06, "loss": 0.6055, "step": 15345 }, { "epoch": 0.4521308722360532, "grad_norm": 1.3775055015310917, "learning_rate": 6.6752617919979245e-06, "loss": 0.3976, "step": 15346 }, { "epoch": 0.4521603346935168, "grad_norm": 1.5374016453228863, "learning_rate": 6.674777266825531e-06, "loss": 0.4375, "step": 15347 }, { "epoch": 0.45218979715098034, "grad_norm": 1.310231964274014, "learning_rate": 6.674292723937855e-06, "loss": 0.3312, "step": 15348 }, { "epoch": 0.45221925960844395, "grad_norm": 1.4062478065243682, "learning_rate": 6.673808163340019e-06, "loss": 0.3443, "step": 15349 }, { "epoch": 0.4522487220659075, "grad_norm": 1.561121694409155, "learning_rate": 6.673323585037152e-06, "loss": 0.4611, "step": 15350 }, { "epoch": 0.4522781845233711, "grad_norm": 1.6397182231252423, "learning_rate": 6.672838989034378e-06, "loss": 0.6124, "step": 15351 }, { "epoch": 0.4523076469808347, "grad_norm": 1.7681229756036647, "learning_rate": 6.672354375336823e-06, "loss": 0.6055, "step": 15352 }, { "epoch": 0.45233710943829825, "grad_norm": 1.843948565343937, "learning_rate": 6.671869743949612e-06, "loss": 0.4596, "step": 15353 }, { "epoch": 0.45236657189576185, "grad_norm": 1.6027506495587518, "learning_rate": 6.671385094877872e-06, "loss": 0.4694, "step": 15354 }, { "epoch": 0.4523960343532254, "grad_norm": 1.5088916297516493, "learning_rate": 6.670900428126732e-06, "loss": 0.3977, "step": 15355 }, { "epoch": 0.452425496810689, "grad_norm": 1.4884510228719343, "learning_rate": 6.670415743701315e-06, "loss": 0.3322, "step": 15356 }, { "epoch": 0.45245495926815255, "grad_norm": 1.4553443371448516, "learning_rate": 6.669931041606749e-06, "loss": 0.5285, "step": 15357 }, { "epoch": 0.45248442172561615, "grad_norm": 1.6809761051763372, "learning_rate": 6.669446321848161e-06, "loss": 0.4477, "step": 15358 }, { "epoch": 0.4525138841830797, "grad_norm": 1.2853917188532873, "learning_rate": 6.66896158443068e-06, "loss": 0.2941, "step": 15359 }, { "epoch": 0.4525433466405433, "grad_norm": 1.5228528877857819, "learning_rate": 6.668476829359431e-06, "loss": 0.3407, "step": 15360 }, { "epoch": 0.45257280909800685, "grad_norm": 1.5911649749863872, "learning_rate": 6.667992056639544e-06, "loss": 0.3718, "step": 15361 }, { "epoch": 0.45260227155547045, "grad_norm": 1.4884548882312134, "learning_rate": 6.6675072662761454e-06, "loss": 0.4702, "step": 15362 }, { "epoch": 0.452631734012934, "grad_norm": 1.443303918339033, "learning_rate": 6.667022458274361e-06, "loss": 0.4222, "step": 15363 }, { "epoch": 0.4526611964703976, "grad_norm": 1.5064751600864679, "learning_rate": 6.666537632639322e-06, "loss": 0.3572, "step": 15364 }, { "epoch": 0.4526906589278612, "grad_norm": 1.6202614125146853, "learning_rate": 6.6660527893761565e-06, "loss": 0.5228, "step": 15365 }, { "epoch": 0.45272012138532475, "grad_norm": 1.5524743899912201, "learning_rate": 6.665567928489992e-06, "loss": 0.6103, "step": 15366 }, { "epoch": 0.45274958384278835, "grad_norm": 1.4791574993084287, "learning_rate": 6.665083049985956e-06, "loss": 0.4544, "step": 15367 }, { "epoch": 0.4527790463002519, "grad_norm": 1.5620939381985084, "learning_rate": 6.664598153869182e-06, "loss": 0.4542, "step": 15368 }, { "epoch": 0.4528085087577155, "grad_norm": 1.6443392045439387, "learning_rate": 6.6641132401447935e-06, "loss": 0.5996, "step": 15369 }, { "epoch": 0.45283797121517905, "grad_norm": 1.4185248498753271, "learning_rate": 6.663628308817924e-06, "loss": 0.4748, "step": 15370 }, { "epoch": 0.45286743367264265, "grad_norm": 1.4431002499903673, "learning_rate": 6.663143359893701e-06, "loss": 0.4646, "step": 15371 }, { "epoch": 0.4528968961301062, "grad_norm": 1.5137130678008042, "learning_rate": 6.662658393377252e-06, "loss": 0.5495, "step": 15372 }, { "epoch": 0.4529263585875698, "grad_norm": 1.5977096059599147, "learning_rate": 6.662173409273711e-06, "loss": 0.6519, "step": 15373 }, { "epoch": 0.45295582104503335, "grad_norm": 1.4482821131071721, "learning_rate": 6.661688407588207e-06, "loss": 0.4681, "step": 15374 }, { "epoch": 0.45298528350249695, "grad_norm": 1.7130585129840674, "learning_rate": 6.661203388325869e-06, "loss": 0.5638, "step": 15375 }, { "epoch": 0.4530147459599605, "grad_norm": 1.6347156425316638, "learning_rate": 6.660718351491829e-06, "loss": 0.5283, "step": 15376 }, { "epoch": 0.4530442084174241, "grad_norm": 1.6861313497006798, "learning_rate": 6.660233297091215e-06, "loss": 0.6011, "step": 15377 }, { "epoch": 0.4530736708748877, "grad_norm": 1.324359105119857, "learning_rate": 6.659748225129158e-06, "loss": 0.4076, "step": 15378 }, { "epoch": 0.45310313333235125, "grad_norm": 1.6683024034839318, "learning_rate": 6.659263135610791e-06, "loss": 0.4387, "step": 15379 }, { "epoch": 0.45313259578981485, "grad_norm": 1.5944791267149254, "learning_rate": 6.658778028541244e-06, "loss": 0.5508, "step": 15380 }, { "epoch": 0.4531620582472784, "grad_norm": 1.3637764620046773, "learning_rate": 6.658292903925651e-06, "loss": 0.3656, "step": 15381 }, { "epoch": 0.453191520704742, "grad_norm": 1.5477505573528583, "learning_rate": 6.657807761769137e-06, "loss": 0.4555, "step": 15382 }, { "epoch": 0.45322098316220555, "grad_norm": 1.6750413509767352, "learning_rate": 6.657322602076841e-06, "loss": 0.4781, "step": 15383 }, { "epoch": 0.45325044561966915, "grad_norm": 1.604722063778676, "learning_rate": 6.65683742485389e-06, "loss": 0.4516, "step": 15384 }, { "epoch": 0.4532799080771327, "grad_norm": 1.454747843565023, "learning_rate": 6.6563522301054175e-06, "loss": 0.4719, "step": 15385 }, { "epoch": 0.4533093705345963, "grad_norm": 1.5679555885056653, "learning_rate": 6.655867017836558e-06, "loss": 0.3677, "step": 15386 }, { "epoch": 0.45333883299205985, "grad_norm": 1.429693361057133, "learning_rate": 6.655381788052439e-06, "loss": 0.4738, "step": 15387 }, { "epoch": 0.45336829544952345, "grad_norm": 1.70690867171081, "learning_rate": 6.654896540758198e-06, "loss": 0.5187, "step": 15388 }, { "epoch": 0.453397757906987, "grad_norm": 1.4867197502209757, "learning_rate": 6.6544112759589655e-06, "loss": 0.5208, "step": 15389 }, { "epoch": 0.4534272203644506, "grad_norm": 1.4098025468985909, "learning_rate": 6.653925993659873e-06, "loss": 0.3481, "step": 15390 }, { "epoch": 0.4534566828219142, "grad_norm": 1.4197445242731048, "learning_rate": 6.653440693866057e-06, "loss": 0.4227, "step": 15391 }, { "epoch": 0.45348614527937775, "grad_norm": 1.5810460338929266, "learning_rate": 6.652955376582649e-06, "loss": 0.4223, "step": 15392 }, { "epoch": 0.45351560773684135, "grad_norm": 1.8078060993730405, "learning_rate": 6.6524700418147825e-06, "loss": 0.5047, "step": 15393 }, { "epoch": 0.4535450701943049, "grad_norm": 1.5922441126826925, "learning_rate": 6.651984689567591e-06, "loss": 0.5437, "step": 15394 }, { "epoch": 0.4535745326517685, "grad_norm": 1.77372579106301, "learning_rate": 6.65149931984621e-06, "loss": 0.5086, "step": 15395 }, { "epoch": 0.45360399510923205, "grad_norm": 1.6576021088738446, "learning_rate": 6.651013932655772e-06, "loss": 0.4432, "step": 15396 }, { "epoch": 0.45363345756669565, "grad_norm": 1.793975974746576, "learning_rate": 6.650528528001412e-06, "loss": 0.6103, "step": 15397 }, { "epoch": 0.4536629200241592, "grad_norm": 1.7670614037632661, "learning_rate": 6.650043105888264e-06, "loss": 0.6414, "step": 15398 }, { "epoch": 0.4536923824816228, "grad_norm": 1.5347392321420166, "learning_rate": 6.649557666321462e-06, "loss": 0.448, "step": 15399 }, { "epoch": 0.45372184493908635, "grad_norm": 1.556787313057616, "learning_rate": 6.649072209306142e-06, "loss": 0.4197, "step": 15400 }, { "epoch": 0.45375130739654995, "grad_norm": 1.3897524163683843, "learning_rate": 6.648586734847442e-06, "loss": 0.3082, "step": 15401 }, { "epoch": 0.4537807698540135, "grad_norm": 1.5083329451385168, "learning_rate": 6.648101242950491e-06, "loss": 0.3937, "step": 15402 }, { "epoch": 0.4538102323114771, "grad_norm": 1.366349546899541, "learning_rate": 6.647615733620427e-06, "loss": 0.3868, "step": 15403 }, { "epoch": 0.4538396947689407, "grad_norm": 1.6876861038670916, "learning_rate": 6.647130206862387e-06, "loss": 0.5245, "step": 15404 }, { "epoch": 0.45386915722640425, "grad_norm": 1.6300935639034464, "learning_rate": 6.646644662681504e-06, "loss": 0.485, "step": 15405 }, { "epoch": 0.45389861968386785, "grad_norm": 1.5072452467720607, "learning_rate": 6.6461591010829175e-06, "loss": 0.3845, "step": 15406 }, { "epoch": 0.4539280821413314, "grad_norm": 1.4107083174712205, "learning_rate": 6.6456735220717615e-06, "loss": 0.39, "step": 15407 }, { "epoch": 0.453957544598795, "grad_norm": 1.3005890512750813, "learning_rate": 6.645187925653171e-06, "loss": 0.377, "step": 15408 }, { "epoch": 0.45398700705625855, "grad_norm": 1.324244587664994, "learning_rate": 6.644702311832285e-06, "loss": 0.3552, "step": 15409 }, { "epoch": 0.45401646951372215, "grad_norm": 1.4093241928910378, "learning_rate": 6.64421668061424e-06, "loss": 0.3764, "step": 15410 }, { "epoch": 0.4540459319711857, "grad_norm": 1.5371960069417294, "learning_rate": 6.643731032004171e-06, "loss": 0.5673, "step": 15411 }, { "epoch": 0.4540753944286493, "grad_norm": 1.5537950431462022, "learning_rate": 6.643245366007216e-06, "loss": 0.3957, "step": 15412 }, { "epoch": 0.45410485688611285, "grad_norm": 1.5027221122477323, "learning_rate": 6.642759682628513e-06, "loss": 0.4584, "step": 15413 }, { "epoch": 0.45413431934357645, "grad_norm": 1.8796515560291756, "learning_rate": 6.642273981873198e-06, "loss": 0.5991, "step": 15414 }, { "epoch": 0.45416378180104, "grad_norm": 1.4837249726027104, "learning_rate": 6.64178826374641e-06, "loss": 0.5038, "step": 15415 }, { "epoch": 0.4541932442585036, "grad_norm": 1.535395618734439, "learning_rate": 6.641302528253286e-06, "loss": 0.2851, "step": 15416 }, { "epoch": 0.4542227067159672, "grad_norm": 1.3977791221028604, "learning_rate": 6.640816775398964e-06, "loss": 0.3943, "step": 15417 }, { "epoch": 0.45425216917343075, "grad_norm": 1.7234694853742047, "learning_rate": 6.640331005188582e-06, "loss": 0.5145, "step": 15418 }, { "epoch": 0.45428163163089436, "grad_norm": 1.5417094769369264, "learning_rate": 6.639845217627279e-06, "loss": 0.4529, "step": 15419 }, { "epoch": 0.4543110940883579, "grad_norm": 1.5508032292184, "learning_rate": 6.639359412720192e-06, "loss": 0.5054, "step": 15420 }, { "epoch": 0.4543405565458215, "grad_norm": 1.6420225850167665, "learning_rate": 6.6388735904724625e-06, "loss": 0.4719, "step": 15421 }, { "epoch": 0.45437001900328505, "grad_norm": 1.563434591144283, "learning_rate": 6.638387750889227e-06, "loss": 0.3972, "step": 15422 }, { "epoch": 0.45439948146074866, "grad_norm": 1.5494051557228556, "learning_rate": 6.637901893975623e-06, "loss": 0.3886, "step": 15423 }, { "epoch": 0.4544289439182122, "grad_norm": 1.5723024496696578, "learning_rate": 6.637416019736794e-06, "loss": 0.392, "step": 15424 }, { "epoch": 0.4544584063756758, "grad_norm": 1.4608023498427016, "learning_rate": 6.636930128177878e-06, "loss": 0.5031, "step": 15425 }, { "epoch": 0.45448786883313935, "grad_norm": 1.4194491747183475, "learning_rate": 6.636444219304012e-06, "loss": 0.4548, "step": 15426 }, { "epoch": 0.45451733129060296, "grad_norm": 1.4366536530588792, "learning_rate": 6.6359582931203395e-06, "loss": 0.465, "step": 15427 }, { "epoch": 0.4545467937480665, "grad_norm": 1.6817855476795562, "learning_rate": 6.635472349631998e-06, "loss": 0.5198, "step": 15428 }, { "epoch": 0.4545762562055301, "grad_norm": 1.5863372528039779, "learning_rate": 6.634986388844128e-06, "loss": 0.4447, "step": 15429 }, { "epoch": 0.4546057186629937, "grad_norm": 1.544859946413545, "learning_rate": 6.634500410761871e-06, "loss": 0.4436, "step": 15430 }, { "epoch": 0.45463518112045725, "grad_norm": 1.8960301973240794, "learning_rate": 6.634014415390366e-06, "loss": 0.4204, "step": 15431 }, { "epoch": 0.45466464357792086, "grad_norm": 1.610128184158508, "learning_rate": 6.633528402734756e-06, "loss": 0.5625, "step": 15432 }, { "epoch": 0.4546941060353844, "grad_norm": 1.5421460261527307, "learning_rate": 6.633042372800178e-06, "loss": 0.5018, "step": 15433 }, { "epoch": 0.454723568492848, "grad_norm": 1.5297633505821213, "learning_rate": 6.632556325591778e-06, "loss": 0.622, "step": 15434 }, { "epoch": 0.45475303095031155, "grad_norm": 1.5168477090149302, "learning_rate": 6.6320702611146925e-06, "loss": 0.5139, "step": 15435 }, { "epoch": 0.45478249340777516, "grad_norm": 1.5216166305006869, "learning_rate": 6.631584179374066e-06, "loss": 0.4869, "step": 15436 }, { "epoch": 0.4548119558652387, "grad_norm": 1.7745304901527554, "learning_rate": 6.6310980803750405e-06, "loss": 0.6079, "step": 15437 }, { "epoch": 0.4548414183227023, "grad_norm": 1.7822976614632768, "learning_rate": 6.630611964122755e-06, "loss": 0.4611, "step": 15438 }, { "epoch": 0.45487088078016585, "grad_norm": 1.5479596582598254, "learning_rate": 6.630125830622353e-06, "loss": 0.4974, "step": 15439 }, { "epoch": 0.45490034323762946, "grad_norm": 1.3941868626855172, "learning_rate": 6.6296396798789784e-06, "loss": 0.427, "step": 15440 }, { "epoch": 0.454929805695093, "grad_norm": 1.3681338613550904, "learning_rate": 6.6291535118977715e-06, "loss": 0.4201, "step": 15441 }, { "epoch": 0.4549592681525566, "grad_norm": 1.620656272034756, "learning_rate": 6.628667326683874e-06, "loss": 0.6033, "step": 15442 }, { "epoch": 0.4549887306100202, "grad_norm": 1.5066020138374558, "learning_rate": 6.628181124242431e-06, "loss": 0.4096, "step": 15443 }, { "epoch": 0.45501819306748376, "grad_norm": 1.4223719633305731, "learning_rate": 6.627694904578583e-06, "loss": 0.3488, "step": 15444 }, { "epoch": 0.45504765552494736, "grad_norm": 1.5093854203833121, "learning_rate": 6.627208667697476e-06, "loss": 0.3522, "step": 15445 }, { "epoch": 0.4550771179824109, "grad_norm": 1.3760579730831872, "learning_rate": 6.626722413604251e-06, "loss": 0.3708, "step": 15446 }, { "epoch": 0.4551065804398745, "grad_norm": 1.6575662192938994, "learning_rate": 6.626236142304052e-06, "loss": 0.4396, "step": 15447 }, { "epoch": 0.45513604289733806, "grad_norm": 1.6935060813305356, "learning_rate": 6.625749853802022e-06, "loss": 0.4531, "step": 15448 }, { "epoch": 0.45516550535480166, "grad_norm": 1.489160236637081, "learning_rate": 6.625263548103307e-06, "loss": 0.3411, "step": 15449 }, { "epoch": 0.4551949678122652, "grad_norm": 1.632840846083646, "learning_rate": 6.624777225213049e-06, "loss": 0.4998, "step": 15450 }, { "epoch": 0.4552244302697288, "grad_norm": 1.442109586345807, "learning_rate": 6.624290885136392e-06, "loss": 0.4776, "step": 15451 }, { "epoch": 0.45525389272719236, "grad_norm": 1.376876060478329, "learning_rate": 6.62380452787848e-06, "loss": 0.4826, "step": 15452 }, { "epoch": 0.45528335518465596, "grad_norm": 1.674758371456186, "learning_rate": 6.62331815344446e-06, "loss": 0.438, "step": 15453 }, { "epoch": 0.4553128176421195, "grad_norm": 1.3309414023681547, "learning_rate": 6.6228317618394756e-06, "loss": 0.3701, "step": 15454 }, { "epoch": 0.4553422800995831, "grad_norm": 1.5055110251386765, "learning_rate": 6.622345353068671e-06, "loss": 0.457, "step": 15455 }, { "epoch": 0.4553717425570467, "grad_norm": 1.5342547897623027, "learning_rate": 6.621858927137192e-06, "loss": 0.451, "step": 15456 }, { "epoch": 0.45540120501451026, "grad_norm": 1.516775937844818, "learning_rate": 6.621372484050184e-06, "loss": 0.5254, "step": 15457 }, { "epoch": 0.45543066747197386, "grad_norm": 1.5162988834582127, "learning_rate": 6.620886023812789e-06, "loss": 0.5286, "step": 15458 }, { "epoch": 0.4554601299294374, "grad_norm": 1.5061153290006692, "learning_rate": 6.620399546430158e-06, "loss": 0.4882, "step": 15459 }, { "epoch": 0.455489592386901, "grad_norm": 1.5351606149802148, "learning_rate": 6.619913051907435e-06, "loss": 0.4697, "step": 15460 }, { "epoch": 0.45551905484436456, "grad_norm": 1.5273984437504846, "learning_rate": 6.619426540249764e-06, "loss": 0.4194, "step": 15461 }, { "epoch": 0.45554851730182816, "grad_norm": 1.7910052732739243, "learning_rate": 6.618940011462295e-06, "loss": 0.4967, "step": 15462 }, { "epoch": 0.4555779797592917, "grad_norm": 1.2770302684906178, "learning_rate": 6.618453465550169e-06, "loss": 0.378, "step": 15463 }, { "epoch": 0.4556074422167553, "grad_norm": 1.7591370622754527, "learning_rate": 6.617966902518537e-06, "loss": 0.5794, "step": 15464 }, { "epoch": 0.45563690467421886, "grad_norm": 1.3108644069137532, "learning_rate": 6.617480322372544e-06, "loss": 0.3419, "step": 15465 }, { "epoch": 0.45566636713168246, "grad_norm": 1.2699748206189059, "learning_rate": 6.616993725117338e-06, "loss": 0.2958, "step": 15466 }, { "epoch": 0.455695829589146, "grad_norm": 1.4975405774059718, "learning_rate": 6.616507110758064e-06, "loss": 0.4958, "step": 15467 }, { "epoch": 0.4557252920466096, "grad_norm": 1.4567749504221486, "learning_rate": 6.616020479299869e-06, "loss": 0.4637, "step": 15468 }, { "epoch": 0.4557547545040732, "grad_norm": 1.561748333828875, "learning_rate": 6.615533830747904e-06, "loss": 0.5359, "step": 15469 }, { "epoch": 0.45578421696153676, "grad_norm": 1.638971698840404, "learning_rate": 6.615047165107314e-06, "loss": 0.4642, "step": 15470 }, { "epoch": 0.45581367941900036, "grad_norm": 1.6228495007817503, "learning_rate": 6.6145604823832474e-06, "loss": 0.4143, "step": 15471 }, { "epoch": 0.4558431418764639, "grad_norm": 1.5523605155423856, "learning_rate": 6.614073782580852e-06, "loss": 0.433, "step": 15472 }, { "epoch": 0.4558726043339275, "grad_norm": 1.5337757582304088, "learning_rate": 6.613587065705276e-06, "loss": 0.4887, "step": 15473 }, { "epoch": 0.45590206679139106, "grad_norm": 1.5311980468543744, "learning_rate": 6.613100331761668e-06, "loss": 0.4901, "step": 15474 }, { "epoch": 0.45593152924885466, "grad_norm": 1.5378186002802896, "learning_rate": 6.612613580755175e-06, "loss": 0.3781, "step": 15475 }, { "epoch": 0.4559609917063182, "grad_norm": 1.645687938279398, "learning_rate": 6.6121268126909485e-06, "loss": 0.5095, "step": 15476 }, { "epoch": 0.4559904541637818, "grad_norm": 1.497479877573537, "learning_rate": 6.611640027574137e-06, "loss": 0.4509, "step": 15477 }, { "epoch": 0.45601991662124536, "grad_norm": 1.4510299301105032, "learning_rate": 6.6111532254098856e-06, "loss": 0.3799, "step": 15478 }, { "epoch": 0.45604937907870896, "grad_norm": 1.422760178805315, "learning_rate": 6.610666406203348e-06, "loss": 0.4813, "step": 15479 }, { "epoch": 0.4560788415361725, "grad_norm": 1.449053854748979, "learning_rate": 6.610179569959672e-06, "loss": 0.3612, "step": 15480 }, { "epoch": 0.4561083039936361, "grad_norm": 1.5039172403018968, "learning_rate": 6.609692716684006e-06, "loss": 0.5728, "step": 15481 }, { "epoch": 0.4561377664510997, "grad_norm": 1.4326016199548612, "learning_rate": 6.609205846381502e-06, "loss": 0.5305, "step": 15482 }, { "epoch": 0.45616722890856326, "grad_norm": 1.581888624241861, "learning_rate": 6.60871895905731e-06, "loss": 0.5565, "step": 15483 }, { "epoch": 0.45619669136602686, "grad_norm": 1.5130170047780644, "learning_rate": 6.6082320547165764e-06, "loss": 0.4352, "step": 15484 }, { "epoch": 0.4562261538234904, "grad_norm": 1.7616262829501188, "learning_rate": 6.607745133364456e-06, "loss": 0.5742, "step": 15485 }, { "epoch": 0.456255616280954, "grad_norm": 1.8637827549353152, "learning_rate": 6.607258195006099e-06, "loss": 0.4067, "step": 15486 }, { "epoch": 0.45628507873841756, "grad_norm": 1.736181257892056, "learning_rate": 6.6067712396466524e-06, "loss": 0.5778, "step": 15487 }, { "epoch": 0.45631454119588116, "grad_norm": 1.540004497212095, "learning_rate": 6.60628426729127e-06, "loss": 0.4251, "step": 15488 }, { "epoch": 0.4563440036533447, "grad_norm": 1.749822697682867, "learning_rate": 6.605797277945102e-06, "loss": 0.4388, "step": 15489 }, { "epoch": 0.4563734661108083, "grad_norm": 1.4101008296257118, "learning_rate": 6.605310271613299e-06, "loss": 0.4889, "step": 15490 }, { "epoch": 0.45640292856827186, "grad_norm": 1.3771404273625938, "learning_rate": 6.6048232483010155e-06, "loss": 0.4078, "step": 15491 }, { "epoch": 0.45643239102573546, "grad_norm": 1.3973917435898087, "learning_rate": 6.604336208013399e-06, "loss": 0.4635, "step": 15492 }, { "epoch": 0.456461853483199, "grad_norm": 1.6842669391496488, "learning_rate": 6.603849150755603e-06, "loss": 0.5209, "step": 15493 }, { "epoch": 0.4564913159406626, "grad_norm": 1.6660934799464733, "learning_rate": 6.60336207653278e-06, "loss": 0.6613, "step": 15494 }, { "epoch": 0.4565207783981262, "grad_norm": 1.608744864913422, "learning_rate": 6.6028749853500805e-06, "loss": 0.4541, "step": 15495 }, { "epoch": 0.45655024085558976, "grad_norm": 1.842220786499626, "learning_rate": 6.602387877212661e-06, "loss": 0.5871, "step": 15496 }, { "epoch": 0.45657970331305336, "grad_norm": 1.5388868121045813, "learning_rate": 6.601900752125667e-06, "loss": 0.524, "step": 15497 }, { "epoch": 0.4566091657705169, "grad_norm": 1.6536780883170368, "learning_rate": 6.601413610094257e-06, "loss": 0.5783, "step": 15498 }, { "epoch": 0.4566386282279805, "grad_norm": 1.3277509640366252, "learning_rate": 6.600926451123581e-06, "loss": 0.3957, "step": 15499 }, { "epoch": 0.45666809068544406, "grad_norm": 1.570302091667657, "learning_rate": 6.600439275218794e-06, "loss": 0.5478, "step": 15500 }, { "epoch": 0.45669755314290766, "grad_norm": 1.6173247598528135, "learning_rate": 6.599952082385048e-06, "loss": 0.3907, "step": 15501 }, { "epoch": 0.4567270156003712, "grad_norm": 1.2809687077043104, "learning_rate": 6.599464872627497e-06, "loss": 0.3729, "step": 15502 }, { "epoch": 0.4567564780578348, "grad_norm": 1.5248440763004634, "learning_rate": 6.5989776459512925e-06, "loss": 0.5614, "step": 15503 }, { "epoch": 0.45678594051529836, "grad_norm": 1.4216965946319393, "learning_rate": 6.59849040236159e-06, "loss": 0.3691, "step": 15504 }, { "epoch": 0.45681540297276196, "grad_norm": 1.4963674536995624, "learning_rate": 6.598003141863544e-06, "loss": 0.4342, "step": 15505 }, { "epoch": 0.4568448654302255, "grad_norm": 1.3860546888341463, "learning_rate": 6.597515864462307e-06, "loss": 0.5459, "step": 15506 }, { "epoch": 0.4568743278876891, "grad_norm": 1.3815658736556242, "learning_rate": 6.597028570163036e-06, "loss": 0.4326, "step": 15507 }, { "epoch": 0.4569037903451527, "grad_norm": 1.4406853121046053, "learning_rate": 6.596541258970881e-06, "loss": 0.3962, "step": 15508 }, { "epoch": 0.45693325280261626, "grad_norm": 1.4587455072982494, "learning_rate": 6.596053930890999e-06, "loss": 0.4069, "step": 15509 }, { "epoch": 0.45696271526007987, "grad_norm": 1.5021164487111545, "learning_rate": 6.5955665859285466e-06, "loss": 0.5076, "step": 15510 }, { "epoch": 0.4569921777175434, "grad_norm": 1.7669919135582917, "learning_rate": 6.5950792240886765e-06, "loss": 0.4732, "step": 15511 }, { "epoch": 0.457021640175007, "grad_norm": 1.4618259386686379, "learning_rate": 6.5945918453765446e-06, "loss": 0.3939, "step": 15512 }, { "epoch": 0.45705110263247056, "grad_norm": 1.4543896528899076, "learning_rate": 6.594104449797306e-06, "loss": 0.4977, "step": 15513 }, { "epoch": 0.45708056508993417, "grad_norm": 1.6261000262188245, "learning_rate": 6.593617037356116e-06, "loss": 0.5713, "step": 15514 }, { "epoch": 0.4571100275473977, "grad_norm": 1.578116509297295, "learning_rate": 6.5931296080581305e-06, "loss": 0.4293, "step": 15515 }, { "epoch": 0.4571394900048613, "grad_norm": 1.4521403663467267, "learning_rate": 6.5926421619085065e-06, "loss": 0.4947, "step": 15516 }, { "epoch": 0.45716895246232486, "grad_norm": 1.774035446841071, "learning_rate": 6.5921546989123984e-06, "loss": 0.606, "step": 15517 }, { "epoch": 0.45719841491978847, "grad_norm": 1.6734597639083757, "learning_rate": 6.591667219074963e-06, "loss": 0.5936, "step": 15518 }, { "epoch": 0.457227877377252, "grad_norm": 1.6673682136786163, "learning_rate": 6.591179722401358e-06, "loss": 0.5244, "step": 15519 }, { "epoch": 0.4572573398347156, "grad_norm": 1.667346502910487, "learning_rate": 6.590692208896738e-06, "loss": 0.5473, "step": 15520 }, { "epoch": 0.4572868022921792, "grad_norm": 1.4408930938438553, "learning_rate": 6.59020467856626e-06, "loss": 0.4685, "step": 15521 }, { "epoch": 0.45731626474964276, "grad_norm": 1.5632040963586915, "learning_rate": 6.589717131415083e-06, "loss": 0.5196, "step": 15522 }, { "epoch": 0.45734572720710637, "grad_norm": 1.6941767003870916, "learning_rate": 6.5892295674483605e-06, "loss": 0.4974, "step": 15523 }, { "epoch": 0.4573751896645699, "grad_norm": 1.302721974982491, "learning_rate": 6.588741986671253e-06, "loss": 0.3674, "step": 15524 }, { "epoch": 0.4574046521220335, "grad_norm": 1.4611776969913501, "learning_rate": 6.588254389088919e-06, "loss": 0.3554, "step": 15525 }, { "epoch": 0.45743411457949706, "grad_norm": 1.3603202308294298, "learning_rate": 6.5877667747065125e-06, "loss": 0.4259, "step": 15526 }, { "epoch": 0.45746357703696067, "grad_norm": 1.317072382560051, "learning_rate": 6.587279143529191e-06, "loss": 0.3135, "step": 15527 }, { "epoch": 0.4574930394944242, "grad_norm": 1.771530154271151, "learning_rate": 6.586791495562117e-06, "loss": 0.6415, "step": 15528 }, { "epoch": 0.4575225019518878, "grad_norm": 1.364083434668235, "learning_rate": 6.586303830810446e-06, "loss": 0.3375, "step": 15529 }, { "epoch": 0.45755196440935136, "grad_norm": 1.310407364384329, "learning_rate": 6.585816149279333e-06, "loss": 0.3959, "step": 15530 }, { "epoch": 0.45758142686681497, "grad_norm": 1.5059778352493935, "learning_rate": 6.5853284509739455e-06, "loss": 0.5184, "step": 15531 }, { "epoch": 0.4576108893242785, "grad_norm": 1.2672322043177118, "learning_rate": 6.584840735899434e-06, "loss": 0.3758, "step": 15532 }, { "epoch": 0.4576403517817421, "grad_norm": 1.5367497716721568, "learning_rate": 6.5843530040609595e-06, "loss": 0.3714, "step": 15533 }, { "epoch": 0.4576698142392057, "grad_norm": 1.5005829054507993, "learning_rate": 6.583865255463684e-06, "loss": 0.4747, "step": 15534 }, { "epoch": 0.45769927669666927, "grad_norm": 1.4241911670324383, "learning_rate": 6.583377490112762e-06, "loss": 0.4059, "step": 15535 }, { "epoch": 0.45772873915413287, "grad_norm": 1.2746540181688621, "learning_rate": 6.582889708013357e-06, "loss": 0.3927, "step": 15536 }, { "epoch": 0.4577582016115964, "grad_norm": 1.608114678952981, "learning_rate": 6.582401909170628e-06, "loss": 0.6347, "step": 15537 }, { "epoch": 0.45778766406906, "grad_norm": 1.5458351784469606, "learning_rate": 6.581914093589732e-06, "loss": 0.4403, "step": 15538 }, { "epoch": 0.45781712652652357, "grad_norm": 1.3233188647298402, "learning_rate": 6.581426261275831e-06, "loss": 0.4035, "step": 15539 }, { "epoch": 0.45784658898398717, "grad_norm": 1.5636285671782986, "learning_rate": 6.580938412234086e-06, "loss": 0.4482, "step": 15540 }, { "epoch": 0.4578760514414507, "grad_norm": 1.3966324271317698, "learning_rate": 6.580450546469656e-06, "loss": 0.4621, "step": 15541 }, { "epoch": 0.4579055138989143, "grad_norm": 1.6299540240976869, "learning_rate": 6.5799626639877e-06, "loss": 0.5785, "step": 15542 }, { "epoch": 0.45793497635637787, "grad_norm": 1.512226365003604, "learning_rate": 6.579474764793383e-06, "loss": 0.5116, "step": 15543 }, { "epoch": 0.45796443881384147, "grad_norm": 1.8859449461332427, "learning_rate": 6.578986848891862e-06, "loss": 0.6239, "step": 15544 }, { "epoch": 0.457993901271305, "grad_norm": 1.4693057945017303, "learning_rate": 6.578498916288298e-06, "loss": 0.3926, "step": 15545 }, { "epoch": 0.4580233637287686, "grad_norm": 1.6845516370222504, "learning_rate": 6.578010966987855e-06, "loss": 0.5133, "step": 15546 }, { "epoch": 0.4580528261862322, "grad_norm": 1.6743465976254666, "learning_rate": 6.5775230009956946e-06, "loss": 0.5567, "step": 15547 }, { "epoch": 0.45808228864369577, "grad_norm": 1.432163825706474, "learning_rate": 6.577035018316974e-06, "loss": 0.4582, "step": 15548 }, { "epoch": 0.45811175110115937, "grad_norm": 1.3100639120313364, "learning_rate": 6.576547018956859e-06, "loss": 0.3761, "step": 15549 }, { "epoch": 0.4581412135586229, "grad_norm": 1.5225953069991505, "learning_rate": 6.576059002920509e-06, "loss": 0.4477, "step": 15550 }, { "epoch": 0.4581706760160865, "grad_norm": 1.5521956264654164, "learning_rate": 6.575570970213089e-06, "loss": 0.5341, "step": 15551 }, { "epoch": 0.45820013847355007, "grad_norm": 1.736358534950956, "learning_rate": 6.575082920839757e-06, "loss": 0.38, "step": 15552 }, { "epoch": 0.45822960093101367, "grad_norm": 1.6571981378462837, "learning_rate": 6.574594854805681e-06, "loss": 0.5999, "step": 15553 }, { "epoch": 0.4582590633884772, "grad_norm": 1.626022294571646, "learning_rate": 6.574106772116018e-06, "loss": 0.5418, "step": 15554 }, { "epoch": 0.4582885258459408, "grad_norm": 1.5629226744976026, "learning_rate": 6.573618672775935e-06, "loss": 0.4453, "step": 15555 }, { "epoch": 0.45831798830340437, "grad_norm": 1.645580633560815, "learning_rate": 6.573130556790593e-06, "loss": 0.6806, "step": 15556 }, { "epoch": 0.45834745076086797, "grad_norm": 1.3426515690884449, "learning_rate": 6.572642424165153e-06, "loss": 0.4162, "step": 15557 }, { "epoch": 0.4583769132183315, "grad_norm": 1.5042179459922578, "learning_rate": 6.572154274904783e-06, "loss": 0.569, "step": 15558 }, { "epoch": 0.4584063756757951, "grad_norm": 1.528626394556921, "learning_rate": 6.5716661090146425e-06, "loss": 0.4121, "step": 15559 }, { "epoch": 0.4584358381332587, "grad_norm": 1.5766605897546642, "learning_rate": 6.571177926499897e-06, "loss": 0.4602, "step": 15560 }, { "epoch": 0.45846530059072227, "grad_norm": 1.5151241679740615, "learning_rate": 6.5706897273657126e-06, "loss": 0.5435, "step": 15561 }, { "epoch": 0.45849476304818587, "grad_norm": 1.706983946447744, "learning_rate": 6.57020151161725e-06, "loss": 0.5721, "step": 15562 }, { "epoch": 0.4585242255056494, "grad_norm": 1.3268594312004496, "learning_rate": 6.569713279259673e-06, "loss": 0.4638, "step": 15563 }, { "epoch": 0.458553687963113, "grad_norm": 1.8267490366393646, "learning_rate": 6.569225030298149e-06, "loss": 0.4285, "step": 15564 }, { "epoch": 0.45858315042057657, "grad_norm": 1.6137632118956313, "learning_rate": 6.56873676473784e-06, "loss": 0.603, "step": 15565 }, { "epoch": 0.45861261287804017, "grad_norm": 1.442711864921375, "learning_rate": 6.568248482583912e-06, "loss": 0.5346, "step": 15566 }, { "epoch": 0.4586420753355037, "grad_norm": 1.4495640454473893, "learning_rate": 6.567760183841528e-06, "loss": 0.415, "step": 15567 }, { "epoch": 0.4586715377929673, "grad_norm": 1.5694489301526509, "learning_rate": 6.567271868515856e-06, "loss": 0.5323, "step": 15568 }, { "epoch": 0.45870100025043087, "grad_norm": 1.6430834861709311, "learning_rate": 6.566783536612059e-06, "loss": 0.5563, "step": 15569 }, { "epoch": 0.45873046270789447, "grad_norm": 1.4608240598046844, "learning_rate": 6.566295188135304e-06, "loss": 0.3959, "step": 15570 }, { "epoch": 0.458759925165358, "grad_norm": 1.488773837315018, "learning_rate": 6.565806823090757e-06, "loss": 0.4408, "step": 15571 }, { "epoch": 0.4587893876228216, "grad_norm": 1.5467332942498722, "learning_rate": 6.56531844148358e-06, "loss": 0.4603, "step": 15572 }, { "epoch": 0.4588188500802852, "grad_norm": 1.3819669786733, "learning_rate": 6.564830043318943e-06, "loss": 0.4406, "step": 15573 }, { "epoch": 0.45884831253774877, "grad_norm": 1.4496829419245973, "learning_rate": 6.5643416286020115e-06, "loss": 0.4772, "step": 15574 }, { "epoch": 0.4588777749952124, "grad_norm": 1.3336494263074747, "learning_rate": 6.563853197337948e-06, "loss": 0.3997, "step": 15575 }, { "epoch": 0.4589072374526759, "grad_norm": 1.53817920810266, "learning_rate": 6.563364749531925e-06, "loss": 0.4273, "step": 15576 }, { "epoch": 0.4589366999101395, "grad_norm": 1.6093889792488236, "learning_rate": 6.562876285189106e-06, "loss": 0.4597, "step": 15577 }, { "epoch": 0.45896616236760307, "grad_norm": 1.4034992674795859, "learning_rate": 6.562387804314656e-06, "loss": 0.4469, "step": 15578 }, { "epoch": 0.4589956248250667, "grad_norm": 1.4168830920477762, "learning_rate": 6.561899306913745e-06, "loss": 0.4496, "step": 15579 }, { "epoch": 0.4590250872825302, "grad_norm": 1.4182084401271982, "learning_rate": 6.561410792991539e-06, "loss": 0.4575, "step": 15580 }, { "epoch": 0.4590545497399938, "grad_norm": 1.3689659926848914, "learning_rate": 6.560922262553206e-06, "loss": 0.4579, "step": 15581 }, { "epoch": 0.45908401219745737, "grad_norm": 1.3788139805197568, "learning_rate": 6.560433715603914e-06, "loss": 0.4134, "step": 15582 }, { "epoch": 0.45911347465492097, "grad_norm": 1.4430871988471947, "learning_rate": 6.559945152148828e-06, "loss": 0.3384, "step": 15583 }, { "epoch": 0.4591429371123845, "grad_norm": 1.4462881753869339, "learning_rate": 6.559456572193117e-06, "loss": 0.5112, "step": 15584 }, { "epoch": 0.4591723995698481, "grad_norm": 1.456524618987971, "learning_rate": 6.5589679757419515e-06, "loss": 0.3689, "step": 15585 }, { "epoch": 0.4592018620273117, "grad_norm": 1.4762630186584025, "learning_rate": 6.558479362800497e-06, "loss": 0.4932, "step": 15586 }, { "epoch": 0.45923132448477527, "grad_norm": 1.455627282344302, "learning_rate": 6.5579907333739226e-06, "loss": 0.4517, "step": 15587 }, { "epoch": 0.4592607869422389, "grad_norm": 1.5119210970015464, "learning_rate": 6.557502087467397e-06, "loss": 0.4613, "step": 15588 }, { "epoch": 0.4592902493997024, "grad_norm": 1.508438643121399, "learning_rate": 6.55701342508609e-06, "loss": 0.4971, "step": 15589 }, { "epoch": 0.459319711857166, "grad_norm": 1.4270103721508165, "learning_rate": 6.556524746235169e-06, "loss": 0.4245, "step": 15590 }, { "epoch": 0.45934917431462957, "grad_norm": 1.3977233001791787, "learning_rate": 6.556036050919802e-06, "loss": 0.4311, "step": 15591 }, { "epoch": 0.4593786367720932, "grad_norm": 1.5754918048241526, "learning_rate": 6.555547339145162e-06, "loss": 0.2594, "step": 15592 }, { "epoch": 0.4594080992295567, "grad_norm": 1.6373399957265478, "learning_rate": 6.555058610916414e-06, "loss": 0.7122, "step": 15593 }, { "epoch": 0.4594375616870203, "grad_norm": 1.4945479471678043, "learning_rate": 6.554569866238732e-06, "loss": 0.5943, "step": 15594 }, { "epoch": 0.45946702414448387, "grad_norm": 1.832204774490247, "learning_rate": 6.554081105117284e-06, "loss": 0.4958, "step": 15595 }, { "epoch": 0.4594964866019475, "grad_norm": 1.5585486363763732, "learning_rate": 6.5535923275572385e-06, "loss": 0.5122, "step": 15596 }, { "epoch": 0.459525949059411, "grad_norm": 1.520531381882649, "learning_rate": 6.553103533563766e-06, "loss": 0.5165, "step": 15597 }, { "epoch": 0.4595554115168746, "grad_norm": 1.3565396826353717, "learning_rate": 6.552614723142039e-06, "loss": 0.4289, "step": 15598 }, { "epoch": 0.4595848739743382, "grad_norm": 1.5251427672435198, "learning_rate": 6.552125896297227e-06, "loss": 0.3701, "step": 15599 }, { "epoch": 0.4596143364318018, "grad_norm": 1.6298116893608645, "learning_rate": 6.551637053034499e-06, "loss": 0.5355, "step": 15600 }, { "epoch": 0.4596437988892654, "grad_norm": 1.4709252651411955, "learning_rate": 6.551148193359029e-06, "loss": 0.3001, "step": 15601 }, { "epoch": 0.4596732613467289, "grad_norm": 1.490457488534359, "learning_rate": 6.550659317275985e-06, "loss": 0.4054, "step": 15602 }, { "epoch": 0.4597027238041925, "grad_norm": 1.4286670946045263, "learning_rate": 6.550170424790538e-06, "loss": 0.3021, "step": 15603 }, { "epoch": 0.4597321862616561, "grad_norm": 1.671270357547472, "learning_rate": 6.549681515907862e-06, "loss": 0.3403, "step": 15604 }, { "epoch": 0.4597616487191197, "grad_norm": 1.4590850616544941, "learning_rate": 6.549192590633129e-06, "loss": 0.3506, "step": 15605 }, { "epoch": 0.4597911111765832, "grad_norm": 1.3680897862401362, "learning_rate": 6.548703648971506e-06, "loss": 0.4289, "step": 15606 }, { "epoch": 0.4598205736340468, "grad_norm": 1.6246665257789716, "learning_rate": 6.548214690928169e-06, "loss": 0.5415, "step": 15607 }, { "epoch": 0.4598500360915104, "grad_norm": 1.4360926083267762, "learning_rate": 6.547725716508289e-06, "loss": 0.3966, "step": 15608 }, { "epoch": 0.459879498548974, "grad_norm": 1.546559593391714, "learning_rate": 6.547236725717037e-06, "loss": 0.4784, "step": 15609 }, { "epoch": 0.4599089610064375, "grad_norm": 1.5752022082028927, "learning_rate": 6.546747718559588e-06, "loss": 0.3942, "step": 15610 }, { "epoch": 0.4599384234639011, "grad_norm": 1.5843152975003063, "learning_rate": 6.5462586950411125e-06, "loss": 0.4646, "step": 15611 }, { "epoch": 0.4599678859213647, "grad_norm": 1.479679520911198, "learning_rate": 6.545769655166782e-06, "loss": 0.4721, "step": 15612 }, { "epoch": 0.4599973483788283, "grad_norm": 1.5815866139441608, "learning_rate": 6.5452805989417735e-06, "loss": 0.5041, "step": 15613 }, { "epoch": 0.4600268108362919, "grad_norm": 1.4818177927926615, "learning_rate": 6.544791526371257e-06, "loss": 0.4523, "step": 15614 }, { "epoch": 0.4600562732937554, "grad_norm": 1.551430777182387, "learning_rate": 6.544302437460406e-06, "loss": 0.4758, "step": 15615 }, { "epoch": 0.460085735751219, "grad_norm": 1.654483965396899, "learning_rate": 6.543813332214396e-06, "loss": 0.4453, "step": 15616 }, { "epoch": 0.4601151982086826, "grad_norm": 1.464232934575025, "learning_rate": 6.543324210638398e-06, "loss": 0.4701, "step": 15617 }, { "epoch": 0.4601446606661462, "grad_norm": 1.5842216293138316, "learning_rate": 6.542835072737586e-06, "loss": 0.5775, "step": 15618 }, { "epoch": 0.4601741231236097, "grad_norm": 1.688978009595263, "learning_rate": 6.5423459185171356e-06, "loss": 0.5569, "step": 15619 }, { "epoch": 0.4602035855810733, "grad_norm": 1.5473959165575548, "learning_rate": 6.54185674798222e-06, "loss": 0.5603, "step": 15620 }, { "epoch": 0.4602330480385369, "grad_norm": 1.604932924546145, "learning_rate": 6.541367561138015e-06, "loss": 0.5983, "step": 15621 }, { "epoch": 0.4602625104960005, "grad_norm": 1.3034335034603064, "learning_rate": 6.540878357989692e-06, "loss": 0.3314, "step": 15622 }, { "epoch": 0.460291972953464, "grad_norm": 1.4834870100961086, "learning_rate": 6.540389138542427e-06, "loss": 0.4499, "step": 15623 }, { "epoch": 0.4603214354109276, "grad_norm": 1.4374227665868282, "learning_rate": 6.539899902801395e-06, "loss": 0.5166, "step": 15624 }, { "epoch": 0.46035089786839123, "grad_norm": 1.4960207682001623, "learning_rate": 6.539410650771774e-06, "loss": 0.4371, "step": 15625 }, { "epoch": 0.4603803603258548, "grad_norm": 1.4973373565242079, "learning_rate": 6.538921382458734e-06, "loss": 0.4109, "step": 15626 }, { "epoch": 0.4604098227833184, "grad_norm": 1.7094371656939025, "learning_rate": 6.538432097867452e-06, "loss": 0.5173, "step": 15627 }, { "epoch": 0.4604392852407819, "grad_norm": 1.298035614445499, "learning_rate": 6.537942797003105e-06, "loss": 0.4131, "step": 15628 }, { "epoch": 0.46046874769824553, "grad_norm": 1.3508559785341667, "learning_rate": 6.537453479870868e-06, "loss": 0.41, "step": 15629 }, { "epoch": 0.4604982101557091, "grad_norm": 1.5372057799654457, "learning_rate": 6.536964146475917e-06, "loss": 0.3709, "step": 15630 }, { "epoch": 0.4605276726131727, "grad_norm": 1.4712218773478087, "learning_rate": 6.5364747968234275e-06, "loss": 0.5208, "step": 15631 }, { "epoch": 0.4605571350706362, "grad_norm": 1.4124637079116793, "learning_rate": 6.535985430918575e-06, "loss": 0.4428, "step": 15632 }, { "epoch": 0.46058659752809983, "grad_norm": 1.30607817381688, "learning_rate": 6.5354960487665364e-06, "loss": 0.4626, "step": 15633 }, { "epoch": 0.4606160599855634, "grad_norm": 1.4585053452446528, "learning_rate": 6.535006650372491e-06, "loss": 0.4476, "step": 15634 }, { "epoch": 0.460645522443027, "grad_norm": 1.7161045698191153, "learning_rate": 6.534517235741611e-06, "loss": 0.565, "step": 15635 }, { "epoch": 0.4606749849004905, "grad_norm": 1.5289294456485623, "learning_rate": 6.5340278048790755e-06, "loss": 0.3496, "step": 15636 }, { "epoch": 0.4607044473579541, "grad_norm": 1.7420335850419697, "learning_rate": 6.533538357790063e-06, "loss": 0.5289, "step": 15637 }, { "epoch": 0.46073390981541773, "grad_norm": 1.4846498144224538, "learning_rate": 6.533048894479746e-06, "loss": 0.5191, "step": 15638 }, { "epoch": 0.4607633722728813, "grad_norm": 1.7995847903550486, "learning_rate": 6.532559414953307e-06, "loss": 0.738, "step": 15639 }, { "epoch": 0.4607928347303449, "grad_norm": 1.700544595991698, "learning_rate": 6.532069919215922e-06, "loss": 0.6121, "step": 15640 }, { "epoch": 0.4608222971878084, "grad_norm": 1.5221109069260315, "learning_rate": 6.5315804072727675e-06, "loss": 0.4746, "step": 15641 }, { "epoch": 0.46085175964527203, "grad_norm": 1.3358073062102105, "learning_rate": 6.531090879129021e-06, "loss": 0.3847, "step": 15642 }, { "epoch": 0.4608812221027356, "grad_norm": 1.464096179062861, "learning_rate": 6.530601334789865e-06, "loss": 0.4104, "step": 15643 }, { "epoch": 0.4609106845601992, "grad_norm": 1.56993219254008, "learning_rate": 6.530111774260471e-06, "loss": 0.495, "step": 15644 }, { "epoch": 0.4609401470176627, "grad_norm": 1.522020689689218, "learning_rate": 6.529622197546024e-06, "loss": 0.5784, "step": 15645 }, { "epoch": 0.46096960947512633, "grad_norm": 1.5161314862209394, "learning_rate": 6.529132604651699e-06, "loss": 0.4798, "step": 15646 }, { "epoch": 0.4609990719325899, "grad_norm": 1.5143284005229927, "learning_rate": 6.528642995582674e-06, "loss": 0.5964, "step": 15647 }, { "epoch": 0.4610285343900535, "grad_norm": 1.7152481045270433, "learning_rate": 6.528153370344129e-06, "loss": 0.4335, "step": 15648 }, { "epoch": 0.461057996847517, "grad_norm": 1.649592395457, "learning_rate": 6.527663728941245e-06, "loss": 0.5054, "step": 15649 }, { "epoch": 0.46108745930498063, "grad_norm": 1.4748366872814325, "learning_rate": 6.5271740713791985e-06, "loss": 0.4886, "step": 15650 }, { "epoch": 0.46111692176244423, "grad_norm": 1.6009412372900742, "learning_rate": 6.52668439766317e-06, "loss": 0.5729, "step": 15651 }, { "epoch": 0.4611463842199078, "grad_norm": 1.5334468793381089, "learning_rate": 6.526194707798341e-06, "loss": 0.4004, "step": 15652 }, { "epoch": 0.4611758466773714, "grad_norm": 1.528472034766037, "learning_rate": 6.525705001789887e-06, "loss": 0.523, "step": 15653 }, { "epoch": 0.46120530913483493, "grad_norm": 1.6507893228687867, "learning_rate": 6.525215279642991e-06, "loss": 0.5465, "step": 15654 }, { "epoch": 0.46123477159229853, "grad_norm": 1.363883377532988, "learning_rate": 6.524725541362834e-06, "loss": 0.3779, "step": 15655 }, { "epoch": 0.4612642340497621, "grad_norm": 1.565864835759853, "learning_rate": 6.5242357869545944e-06, "loss": 0.5532, "step": 15656 }, { "epoch": 0.4612936965072257, "grad_norm": 1.4766266305559517, "learning_rate": 6.523746016423454e-06, "loss": 0.4434, "step": 15657 }, { "epoch": 0.46132315896468923, "grad_norm": 1.36335914910362, "learning_rate": 6.523256229774591e-06, "loss": 0.3615, "step": 15658 }, { "epoch": 0.46135262142215283, "grad_norm": 1.485305955433642, "learning_rate": 6.522766427013187e-06, "loss": 0.3995, "step": 15659 }, { "epoch": 0.4613820838796164, "grad_norm": 1.7420276177823033, "learning_rate": 6.522276608144427e-06, "loss": 0.3947, "step": 15660 }, { "epoch": 0.46141154633708, "grad_norm": 1.5386573580677303, "learning_rate": 6.521786773173486e-06, "loss": 0.5451, "step": 15661 }, { "epoch": 0.46144100879454353, "grad_norm": 1.5297146495991498, "learning_rate": 6.52129692210555e-06, "loss": 0.5976, "step": 15662 }, { "epoch": 0.46147047125200713, "grad_norm": 1.5212574876484304, "learning_rate": 6.520807054945797e-06, "loss": 0.4304, "step": 15663 }, { "epoch": 0.46149993370947073, "grad_norm": 1.5031992522059967, "learning_rate": 6.520317171699413e-06, "loss": 0.4765, "step": 15664 }, { "epoch": 0.4615293961669343, "grad_norm": 1.6371292013837409, "learning_rate": 6.519827272371576e-06, "loss": 0.5055, "step": 15665 }, { "epoch": 0.4615588586243979, "grad_norm": 1.2826388232298969, "learning_rate": 6.519337356967469e-06, "loss": 0.3611, "step": 15666 }, { "epoch": 0.46158832108186143, "grad_norm": 1.7019342225172862, "learning_rate": 6.518847425492274e-06, "loss": 0.5724, "step": 15667 }, { "epoch": 0.46161778353932503, "grad_norm": 1.3691932426876043, "learning_rate": 6.518357477951175e-06, "loss": 0.3061, "step": 15668 }, { "epoch": 0.4616472459967886, "grad_norm": 1.4668471912280179, "learning_rate": 6.5178675143493515e-06, "loss": 0.4958, "step": 15669 }, { "epoch": 0.4616767084542522, "grad_norm": 1.4409957230249213, "learning_rate": 6.517377534691991e-06, "loss": 0.4536, "step": 15670 }, { "epoch": 0.46170617091171573, "grad_norm": 1.4834141717517637, "learning_rate": 6.516887538984272e-06, "loss": 0.5207, "step": 15671 }, { "epoch": 0.46173563336917933, "grad_norm": 1.5199884906326564, "learning_rate": 6.516397527231378e-06, "loss": 0.5171, "step": 15672 }, { "epoch": 0.4617650958266429, "grad_norm": 1.3784373762381608, "learning_rate": 6.5159074994384945e-06, "loss": 0.4552, "step": 15673 }, { "epoch": 0.4617945582841065, "grad_norm": 1.4670196953309975, "learning_rate": 6.515417455610802e-06, "loss": 0.4201, "step": 15674 }, { "epoch": 0.46182402074157003, "grad_norm": 1.4245729813258359, "learning_rate": 6.514927395753486e-06, "loss": 0.315, "step": 15675 }, { "epoch": 0.46185348319903363, "grad_norm": 1.7146735596938143, "learning_rate": 6.51443731987173e-06, "loss": 0.4797, "step": 15676 }, { "epoch": 0.46188294565649723, "grad_norm": 1.4041733494847048, "learning_rate": 6.513947227970718e-06, "loss": 0.2744, "step": 15677 }, { "epoch": 0.4619124081139608, "grad_norm": 1.4456320519265589, "learning_rate": 6.513457120055633e-06, "loss": 0.4456, "step": 15678 }, { "epoch": 0.4619418705714244, "grad_norm": 1.5704457270338226, "learning_rate": 6.51296699613166e-06, "loss": 0.5779, "step": 15679 }, { "epoch": 0.46197133302888793, "grad_norm": 1.4799557905265568, "learning_rate": 6.512476856203984e-06, "loss": 0.3309, "step": 15680 }, { "epoch": 0.46200079548635153, "grad_norm": 1.454842459704724, "learning_rate": 6.511986700277787e-06, "loss": 0.3735, "step": 15681 }, { "epoch": 0.4620302579438151, "grad_norm": 1.6171333353865156, "learning_rate": 6.5114965283582555e-06, "loss": 0.4809, "step": 15682 }, { "epoch": 0.4620597204012787, "grad_norm": 1.514947806014638, "learning_rate": 6.511006340450576e-06, "loss": 0.4865, "step": 15683 }, { "epoch": 0.46208918285874223, "grad_norm": 1.5477227451503925, "learning_rate": 6.51051613655993e-06, "loss": 0.5309, "step": 15684 }, { "epoch": 0.46211864531620583, "grad_norm": 1.3377976164763907, "learning_rate": 6.510025916691507e-06, "loss": 0.3541, "step": 15685 }, { "epoch": 0.4621481077736694, "grad_norm": 1.5293349476842584, "learning_rate": 6.50953568085049e-06, "loss": 0.501, "step": 15686 }, { "epoch": 0.462177570231133, "grad_norm": 1.5457378886185074, "learning_rate": 6.509045429042062e-06, "loss": 0.5172, "step": 15687 }, { "epoch": 0.46220703268859653, "grad_norm": 1.6277045889614978, "learning_rate": 6.508555161271412e-06, "loss": 0.4871, "step": 15688 }, { "epoch": 0.46223649514606013, "grad_norm": 1.5002359500438953, "learning_rate": 6.508064877543727e-06, "loss": 0.5485, "step": 15689 }, { "epoch": 0.46226595760352374, "grad_norm": 1.460313279604384, "learning_rate": 6.507574577864191e-06, "loss": 0.3688, "step": 15690 }, { "epoch": 0.4622954200609873, "grad_norm": 1.4267043133131248, "learning_rate": 6.507084262237989e-06, "loss": 0.4122, "step": 15691 }, { "epoch": 0.4623248825184509, "grad_norm": 1.3381962470847466, "learning_rate": 6.506593930670311e-06, "loss": 0.2525, "step": 15692 }, { "epoch": 0.46235434497591443, "grad_norm": 1.5419257425835726, "learning_rate": 6.506103583166339e-06, "loss": 0.5054, "step": 15693 }, { "epoch": 0.46238380743337804, "grad_norm": 1.4602865950615807, "learning_rate": 6.5056132197312645e-06, "loss": 0.4916, "step": 15694 }, { "epoch": 0.4624132698908416, "grad_norm": 1.6451712540921948, "learning_rate": 6.5051228403702724e-06, "loss": 0.4729, "step": 15695 }, { "epoch": 0.4624427323483052, "grad_norm": 1.6354736734513167, "learning_rate": 6.504632445088548e-06, "loss": 0.3717, "step": 15696 }, { "epoch": 0.46247219480576873, "grad_norm": 1.6736105227432416, "learning_rate": 6.504142033891281e-06, "loss": 0.4013, "step": 15697 }, { "epoch": 0.46250165726323234, "grad_norm": 1.5751374430803944, "learning_rate": 6.503651606783657e-06, "loss": 0.3329, "step": 15698 }, { "epoch": 0.4625311197206959, "grad_norm": 1.1833199346840289, "learning_rate": 6.503161163770866e-06, "loss": 0.2611, "step": 15699 }, { "epoch": 0.4625605821781595, "grad_norm": 1.3288463220642164, "learning_rate": 6.5026707048580935e-06, "loss": 0.3728, "step": 15700 }, { "epoch": 0.46259004463562303, "grad_norm": 1.3339787444735471, "learning_rate": 6.502180230050529e-06, "loss": 0.3963, "step": 15701 }, { "epoch": 0.46261950709308663, "grad_norm": 1.6722272449518953, "learning_rate": 6.5016897393533585e-06, "loss": 0.524, "step": 15702 }, { "epoch": 0.46264896955055024, "grad_norm": 1.465812800769914, "learning_rate": 6.501199232771772e-06, "loss": 0.2847, "step": 15703 }, { "epoch": 0.4626784320080138, "grad_norm": 1.46298318750878, "learning_rate": 6.5007087103109576e-06, "loss": 0.5612, "step": 15704 }, { "epoch": 0.4627078944654774, "grad_norm": 1.6001907439680347, "learning_rate": 6.500218171976105e-06, "loss": 0.6101, "step": 15705 }, { "epoch": 0.46273735692294093, "grad_norm": 1.3379725061005132, "learning_rate": 6.4997276177724e-06, "loss": 0.3518, "step": 15706 }, { "epoch": 0.46276681938040454, "grad_norm": 1.542955364632251, "learning_rate": 6.4992370477050344e-06, "loss": 0.4031, "step": 15707 }, { "epoch": 0.4627962818378681, "grad_norm": 1.680346832194078, "learning_rate": 6.498746461779197e-06, "loss": 0.4996, "step": 15708 }, { "epoch": 0.4628257442953317, "grad_norm": 1.7266301889699902, "learning_rate": 6.498255860000073e-06, "loss": 0.3937, "step": 15709 }, { "epoch": 0.46285520675279523, "grad_norm": 1.6245409339079873, "learning_rate": 6.49776524237286e-06, "loss": 0.5466, "step": 15710 }, { "epoch": 0.46288466921025884, "grad_norm": 1.542278462295922, "learning_rate": 6.49727460890274e-06, "loss": 0.3799, "step": 15711 }, { "epoch": 0.4629141316677224, "grad_norm": 1.5890966215160887, "learning_rate": 6.496783959594904e-06, "loss": 0.4076, "step": 15712 }, { "epoch": 0.462943594125186, "grad_norm": 1.3806883588241534, "learning_rate": 6.496293294454545e-06, "loss": 0.3905, "step": 15713 }, { "epoch": 0.46297305658264953, "grad_norm": 1.5548004952579817, "learning_rate": 6.495802613486852e-06, "loss": 0.4072, "step": 15714 }, { "epoch": 0.46300251904011314, "grad_norm": 1.6074288759305888, "learning_rate": 6.495311916697015e-06, "loss": 0.4371, "step": 15715 }, { "epoch": 0.46303198149757674, "grad_norm": 1.5417934277882739, "learning_rate": 6.494821204090224e-06, "loss": 0.4785, "step": 15716 }, { "epoch": 0.4630614439550403, "grad_norm": 1.406070270222373, "learning_rate": 6.494330475671669e-06, "loss": 0.4302, "step": 15717 }, { "epoch": 0.4630909064125039, "grad_norm": 1.5043768904363073, "learning_rate": 6.493839731446542e-06, "loss": 0.4531, "step": 15718 }, { "epoch": 0.46312036886996744, "grad_norm": 1.4375321019649139, "learning_rate": 6.4933489714200335e-06, "loss": 0.344, "step": 15719 }, { "epoch": 0.46314983132743104, "grad_norm": 1.477751141144397, "learning_rate": 6.4928581955973355e-06, "loss": 0.2829, "step": 15720 }, { "epoch": 0.4631792937848946, "grad_norm": 1.7043259705732634, "learning_rate": 6.492367403983637e-06, "loss": 0.6291, "step": 15721 }, { "epoch": 0.4632087562423582, "grad_norm": 1.4870831303204723, "learning_rate": 6.491876596584132e-06, "loss": 0.5066, "step": 15722 }, { "epoch": 0.46323821869982174, "grad_norm": 1.3811169271160024, "learning_rate": 6.491385773404011e-06, "loss": 0.4352, "step": 15723 }, { "epoch": 0.46326768115728534, "grad_norm": 1.405251407547928, "learning_rate": 6.4908949344484654e-06, "loss": 0.385, "step": 15724 }, { "epoch": 0.4632971436147489, "grad_norm": 1.5669672820474683, "learning_rate": 6.49040407972269e-06, "loss": 0.5685, "step": 15725 }, { "epoch": 0.4633266060722125, "grad_norm": 1.554272674201659, "learning_rate": 6.4899132092318705e-06, "loss": 0.3978, "step": 15726 }, { "epoch": 0.46335606852967604, "grad_norm": 1.4565245139143232, "learning_rate": 6.489422322981205e-06, "loss": 0.3341, "step": 15727 }, { "epoch": 0.46338553098713964, "grad_norm": 1.6182295472716623, "learning_rate": 6.4889314209758835e-06, "loss": 0.4363, "step": 15728 }, { "epoch": 0.46341499344460324, "grad_norm": 1.4970579670951853, "learning_rate": 6.4884405032211e-06, "loss": 0.5317, "step": 15729 }, { "epoch": 0.4634444559020668, "grad_norm": 1.4792789543398859, "learning_rate": 6.487949569722046e-06, "loss": 0.4767, "step": 15730 }, { "epoch": 0.4634739183595304, "grad_norm": 1.6514514581680497, "learning_rate": 6.487458620483916e-06, "loss": 0.4583, "step": 15731 }, { "epoch": 0.46350338081699394, "grad_norm": 1.6517806748676618, "learning_rate": 6.4869676555119e-06, "loss": 0.5835, "step": 15732 }, { "epoch": 0.46353284327445754, "grad_norm": 1.40131092696113, "learning_rate": 6.486476674811194e-06, "loss": 0.3948, "step": 15733 }, { "epoch": 0.4635623057319211, "grad_norm": 1.4708340595695772, "learning_rate": 6.485985678386993e-06, "loss": 0.5146, "step": 15734 }, { "epoch": 0.4635917681893847, "grad_norm": 1.6161951201647136, "learning_rate": 6.485494666244486e-06, "loss": 0.4682, "step": 15735 }, { "epoch": 0.46362123064684824, "grad_norm": 1.7472155711049997, "learning_rate": 6.485003638388869e-06, "loss": 0.4418, "step": 15736 }, { "epoch": 0.46365069310431184, "grad_norm": 1.4300639780996853, "learning_rate": 6.484512594825337e-06, "loss": 0.3945, "step": 15737 }, { "epoch": 0.4636801555617754, "grad_norm": 1.6305084629329947, "learning_rate": 6.484021535559083e-06, "loss": 0.4416, "step": 15738 }, { "epoch": 0.463709618019239, "grad_norm": 1.463364484121385, "learning_rate": 6.4835304605953005e-06, "loss": 0.5383, "step": 15739 }, { "epoch": 0.46373908047670254, "grad_norm": 1.4496714197381684, "learning_rate": 6.483039369939188e-06, "loss": 0.4923, "step": 15740 }, { "epoch": 0.46376854293416614, "grad_norm": 1.6110971862952974, "learning_rate": 6.482548263595934e-06, "loss": 0.6706, "step": 15741 }, { "epoch": 0.46379800539162974, "grad_norm": 1.4693280759099105, "learning_rate": 6.482057141570736e-06, "loss": 0.4285, "step": 15742 }, { "epoch": 0.4638274678490933, "grad_norm": 1.4400519104109004, "learning_rate": 6.481566003868791e-06, "loss": 0.4809, "step": 15743 }, { "epoch": 0.4638569303065569, "grad_norm": 1.5137377428639531, "learning_rate": 6.481074850495291e-06, "loss": 0.5393, "step": 15744 }, { "epoch": 0.46388639276402044, "grad_norm": 1.6119736379965972, "learning_rate": 6.480583681455434e-06, "loss": 0.5124, "step": 15745 }, { "epoch": 0.46391585522148404, "grad_norm": 1.4880547126625905, "learning_rate": 6.480092496754413e-06, "loss": 0.3988, "step": 15746 }, { "epoch": 0.4639453176789476, "grad_norm": 1.5703616887418792, "learning_rate": 6.4796012963974255e-06, "loss": 0.4638, "step": 15747 }, { "epoch": 0.4639747801364112, "grad_norm": 1.3570500888244548, "learning_rate": 6.479110080389665e-06, "loss": 0.3631, "step": 15748 }, { "epoch": 0.46400424259387474, "grad_norm": 1.543778854212712, "learning_rate": 6.47861884873633e-06, "loss": 0.483, "step": 15749 }, { "epoch": 0.46403370505133834, "grad_norm": 1.5070889293866019, "learning_rate": 6.4781276014426166e-06, "loss": 0.3848, "step": 15750 }, { "epoch": 0.4640631675088019, "grad_norm": 1.4813979596318547, "learning_rate": 6.477636338513718e-06, "loss": 0.5459, "step": 15751 }, { "epoch": 0.4640926299662655, "grad_norm": 1.7459788160850618, "learning_rate": 6.477145059954833e-06, "loss": 0.5301, "step": 15752 }, { "epoch": 0.46412209242372904, "grad_norm": 1.4989292350819605, "learning_rate": 6.476653765771157e-06, "loss": 0.3343, "step": 15753 }, { "epoch": 0.46415155488119264, "grad_norm": 1.3463402685324783, "learning_rate": 6.476162455967887e-06, "loss": 0.3743, "step": 15754 }, { "epoch": 0.46418101733865624, "grad_norm": 1.5185629371844904, "learning_rate": 6.475671130550223e-06, "loss": 0.4798, "step": 15755 }, { "epoch": 0.4642104797961198, "grad_norm": 1.5379382112146112, "learning_rate": 6.4751797895233585e-06, "loss": 0.5076, "step": 15756 }, { "epoch": 0.4642399422535834, "grad_norm": 1.5083732091185493, "learning_rate": 6.474688432892491e-06, "loss": 0.5058, "step": 15757 }, { "epoch": 0.46426940471104694, "grad_norm": 1.8807812847997556, "learning_rate": 6.474197060662819e-06, "loss": 0.5363, "step": 15758 }, { "epoch": 0.46429886716851054, "grad_norm": 1.581743038022804, "learning_rate": 6.47370567283954e-06, "loss": 0.5584, "step": 15759 }, { "epoch": 0.4643283296259741, "grad_norm": 1.4163204607280377, "learning_rate": 6.473214269427851e-06, "loss": 0.3186, "step": 15760 }, { "epoch": 0.4643577920834377, "grad_norm": 1.7364471830005908, "learning_rate": 6.472722850432951e-06, "loss": 0.5217, "step": 15761 }, { "epoch": 0.46438725454090124, "grad_norm": 1.5049514081827409, "learning_rate": 6.472231415860037e-06, "loss": 0.5381, "step": 15762 }, { "epoch": 0.46441671699836484, "grad_norm": 1.371889168861635, "learning_rate": 6.471739965714307e-06, "loss": 0.4098, "step": 15763 }, { "epoch": 0.4644461794558284, "grad_norm": 1.6277339516504674, "learning_rate": 6.471248500000963e-06, "loss": 0.4722, "step": 15764 }, { "epoch": 0.464475641913292, "grad_norm": 1.40187826375782, "learning_rate": 6.470757018725199e-06, "loss": 0.4291, "step": 15765 }, { "epoch": 0.46450510437075554, "grad_norm": 1.6051417006329507, "learning_rate": 6.470265521892215e-06, "loss": 0.5696, "step": 15766 }, { "epoch": 0.46453456682821914, "grad_norm": 1.6489426893099763, "learning_rate": 6.469774009507211e-06, "loss": 0.4715, "step": 15767 }, { "epoch": 0.46456402928568274, "grad_norm": 1.5259209428710583, "learning_rate": 6.469282481575386e-06, "loss": 0.5028, "step": 15768 }, { "epoch": 0.4645934917431463, "grad_norm": 1.511977401531811, "learning_rate": 6.468790938101937e-06, "loss": 0.5438, "step": 15769 }, { "epoch": 0.4646229542006099, "grad_norm": 1.4568135005553249, "learning_rate": 6.4682993790920665e-06, "loss": 0.4528, "step": 15770 }, { "epoch": 0.46465241665807344, "grad_norm": 1.4928298805401634, "learning_rate": 6.467807804550972e-06, "loss": 0.4745, "step": 15771 }, { "epoch": 0.46468187911553704, "grad_norm": 1.5399130405130161, "learning_rate": 6.4673162144838545e-06, "loss": 0.5275, "step": 15772 }, { "epoch": 0.4647113415730006, "grad_norm": 1.4374833654501677, "learning_rate": 6.466824608895913e-06, "loss": 0.4253, "step": 15773 }, { "epoch": 0.4647408040304642, "grad_norm": 1.640213512499965, "learning_rate": 6.466332987792347e-06, "loss": 0.5149, "step": 15774 }, { "epoch": 0.46477026648792774, "grad_norm": 1.4144868330099092, "learning_rate": 6.4658413511783595e-06, "loss": 0.3865, "step": 15775 }, { "epoch": 0.46479972894539134, "grad_norm": 1.527621595897967, "learning_rate": 6.465349699059147e-06, "loss": 0.4842, "step": 15776 }, { "epoch": 0.4648291914028549, "grad_norm": 1.600396948300908, "learning_rate": 6.464858031439912e-06, "loss": 0.4471, "step": 15777 }, { "epoch": 0.4648586538603185, "grad_norm": 1.7593073313469474, "learning_rate": 6.464366348325856e-06, "loss": 0.5069, "step": 15778 }, { "epoch": 0.46488811631778204, "grad_norm": 1.3809481448590544, "learning_rate": 6.463874649722179e-06, "loss": 0.3444, "step": 15779 }, { "epoch": 0.46491757877524564, "grad_norm": 1.7951922280046602, "learning_rate": 6.463382935634082e-06, "loss": 0.4797, "step": 15780 }, { "epoch": 0.46494704123270925, "grad_norm": 1.3880992258455749, "learning_rate": 6.462891206066766e-06, "loss": 0.3962, "step": 15781 }, { "epoch": 0.4649765036901728, "grad_norm": 1.6099775024556822, "learning_rate": 6.462399461025434e-06, "loss": 0.3594, "step": 15782 }, { "epoch": 0.4650059661476364, "grad_norm": 1.7245548874697225, "learning_rate": 6.461907700515284e-06, "loss": 0.6541, "step": 15783 }, { "epoch": 0.46503542860509994, "grad_norm": 1.4355530858177186, "learning_rate": 6.461415924541521e-06, "loss": 0.44, "step": 15784 }, { "epoch": 0.46506489106256355, "grad_norm": 1.4919006568084587, "learning_rate": 6.460924133109346e-06, "loss": 0.4297, "step": 15785 }, { "epoch": 0.4650943535200271, "grad_norm": 1.912863538945862, "learning_rate": 6.4604323262239595e-06, "loss": 0.5228, "step": 15786 }, { "epoch": 0.4651238159774907, "grad_norm": 1.5538208772793416, "learning_rate": 6.459940503890566e-06, "loss": 0.5573, "step": 15787 }, { "epoch": 0.46515327843495424, "grad_norm": 1.796918329034687, "learning_rate": 6.459448666114368e-06, "loss": 0.3704, "step": 15788 }, { "epoch": 0.46518274089241785, "grad_norm": 1.57041441592199, "learning_rate": 6.4589568129005645e-06, "loss": 0.5577, "step": 15789 }, { "epoch": 0.4652122033498814, "grad_norm": 1.492972754796591, "learning_rate": 6.4584649442543614e-06, "loss": 0.5037, "step": 15790 }, { "epoch": 0.465241665807345, "grad_norm": 1.58926666412776, "learning_rate": 6.45797306018096e-06, "loss": 0.5056, "step": 15791 }, { "epoch": 0.46527112826480854, "grad_norm": 1.6400366506242114, "learning_rate": 6.457481160685565e-06, "loss": 0.4094, "step": 15792 }, { "epoch": 0.46530059072227214, "grad_norm": 1.6997863923107976, "learning_rate": 6.4569892457733775e-06, "loss": 0.3419, "step": 15793 }, { "epoch": 0.46533005317973575, "grad_norm": 1.6235015745681871, "learning_rate": 6.456497315449602e-06, "loss": 0.4729, "step": 15794 }, { "epoch": 0.4653595156371993, "grad_norm": 1.7050015352858179, "learning_rate": 6.456005369719443e-06, "loss": 0.5351, "step": 15795 }, { "epoch": 0.4653889780946629, "grad_norm": 1.5202018474438375, "learning_rate": 6.455513408588101e-06, "loss": 0.5322, "step": 15796 }, { "epoch": 0.46541844055212644, "grad_norm": 1.4761086940587282, "learning_rate": 6.455021432060782e-06, "loss": 0.4101, "step": 15797 }, { "epoch": 0.46544790300959005, "grad_norm": 1.4809049540906116, "learning_rate": 6.454529440142692e-06, "loss": 0.3343, "step": 15798 }, { "epoch": 0.4654773654670536, "grad_norm": 1.5820914032330842, "learning_rate": 6.45403743283903e-06, "loss": 0.4947, "step": 15799 }, { "epoch": 0.4655068279245172, "grad_norm": 1.58779126706607, "learning_rate": 6.453545410155004e-06, "loss": 0.5442, "step": 15800 }, { "epoch": 0.46553629038198074, "grad_norm": 1.4804239767691045, "learning_rate": 6.45305337209582e-06, "loss": 0.4795, "step": 15801 }, { "epoch": 0.46556575283944435, "grad_norm": 1.590025988821014, "learning_rate": 6.452561318666677e-06, "loss": 0.5247, "step": 15802 }, { "epoch": 0.4655952152969079, "grad_norm": 1.339668350634054, "learning_rate": 6.452069249872785e-06, "loss": 0.3986, "step": 15803 }, { "epoch": 0.4656246777543715, "grad_norm": 1.4556692272623, "learning_rate": 6.451577165719346e-06, "loss": 0.4801, "step": 15804 }, { "epoch": 0.46565414021183504, "grad_norm": 1.5290007551927065, "learning_rate": 6.451085066211567e-06, "loss": 0.5264, "step": 15805 }, { "epoch": 0.46568360266929865, "grad_norm": 1.570298210744318, "learning_rate": 6.450592951354652e-06, "loss": 0.6014, "step": 15806 }, { "epoch": 0.46571306512676225, "grad_norm": 1.4674250718816733, "learning_rate": 6.4501008211538066e-06, "loss": 0.4382, "step": 15807 }, { "epoch": 0.4657425275842258, "grad_norm": 1.5134206667796841, "learning_rate": 6.449608675614236e-06, "loss": 0.5133, "step": 15808 }, { "epoch": 0.4657719900416894, "grad_norm": 1.389006313106784, "learning_rate": 6.449116514741148e-06, "loss": 0.4618, "step": 15809 }, { "epoch": 0.46580145249915295, "grad_norm": 1.499652197561101, "learning_rate": 6.4486243385397476e-06, "loss": 0.497, "step": 15810 }, { "epoch": 0.46583091495661655, "grad_norm": 1.4157115763706198, "learning_rate": 6.448132147015238e-06, "loss": 0.4273, "step": 15811 }, { "epoch": 0.4658603774140801, "grad_norm": 1.5234577018325752, "learning_rate": 6.44763994017283e-06, "loss": 0.3501, "step": 15812 }, { "epoch": 0.4658898398715437, "grad_norm": 1.4399527803202334, "learning_rate": 6.447147718017727e-06, "loss": 0.5354, "step": 15813 }, { "epoch": 0.46591930232900725, "grad_norm": 1.6936965469569232, "learning_rate": 6.446655480555136e-06, "loss": 0.5361, "step": 15814 }, { "epoch": 0.46594876478647085, "grad_norm": 1.368959437327332, "learning_rate": 6.4461632277902656e-06, "loss": 0.4642, "step": 15815 }, { "epoch": 0.4659782272439344, "grad_norm": 1.4135949116457962, "learning_rate": 6.445670959728321e-06, "loss": 0.3974, "step": 15816 }, { "epoch": 0.466007689701398, "grad_norm": 1.574958120768611, "learning_rate": 6.445178676374509e-06, "loss": 0.4707, "step": 15817 }, { "epoch": 0.46603715215886155, "grad_norm": 1.4638971686729563, "learning_rate": 6.4446863777340366e-06, "loss": 0.4158, "step": 15818 }, { "epoch": 0.46606661461632515, "grad_norm": 1.364576831884685, "learning_rate": 6.4441940638121126e-06, "loss": 0.5108, "step": 15819 }, { "epoch": 0.46609607707378875, "grad_norm": 1.3994443385465614, "learning_rate": 6.443701734613945e-06, "loss": 0.4199, "step": 15820 }, { "epoch": 0.4661255395312523, "grad_norm": 1.7595270253047903, "learning_rate": 6.443209390144739e-06, "loss": 0.516, "step": 15821 }, { "epoch": 0.4661550019887159, "grad_norm": 1.889624843464581, "learning_rate": 6.442717030409705e-06, "loss": 0.7101, "step": 15822 }, { "epoch": 0.46618446444617945, "grad_norm": 1.6019240619069655, "learning_rate": 6.4422246554140484e-06, "loss": 0.4455, "step": 15823 }, { "epoch": 0.46621392690364305, "grad_norm": 1.6008340430742385, "learning_rate": 6.44173226516298e-06, "loss": 0.5245, "step": 15824 }, { "epoch": 0.4662433893611066, "grad_norm": 1.4460939302544968, "learning_rate": 6.441239859661709e-06, "loss": 0.3931, "step": 15825 }, { "epoch": 0.4662728518185702, "grad_norm": 1.8190960674931018, "learning_rate": 6.44074743891544e-06, "loss": 0.3948, "step": 15826 }, { "epoch": 0.46630231427603375, "grad_norm": 1.6655657276116784, "learning_rate": 6.440255002929383e-06, "loss": 0.5013, "step": 15827 }, { "epoch": 0.46633177673349735, "grad_norm": 1.4488203863157596, "learning_rate": 6.439762551708748e-06, "loss": 0.4711, "step": 15828 }, { "epoch": 0.4663612391909609, "grad_norm": 1.8344997322727836, "learning_rate": 6.439270085258744e-06, "loss": 0.4483, "step": 15829 }, { "epoch": 0.4663907016484245, "grad_norm": 1.6810211545011384, "learning_rate": 6.438777603584579e-06, "loss": 0.5479, "step": 15830 }, { "epoch": 0.46642016410588805, "grad_norm": 1.440322679826638, "learning_rate": 6.438285106691464e-06, "loss": 0.4549, "step": 15831 }, { "epoch": 0.46644962656335165, "grad_norm": 1.4144077830483608, "learning_rate": 6.4377925945846065e-06, "loss": 0.3496, "step": 15832 }, { "epoch": 0.46647908902081525, "grad_norm": 1.5080489496685237, "learning_rate": 6.437300067269217e-06, "loss": 0.3465, "step": 15833 }, { "epoch": 0.4665085514782788, "grad_norm": 1.4090746939544299, "learning_rate": 6.4368075247505065e-06, "loss": 0.391, "step": 15834 }, { "epoch": 0.4665380139357424, "grad_norm": 1.6245012550636393, "learning_rate": 6.436314967033684e-06, "loss": 0.5714, "step": 15835 }, { "epoch": 0.46656747639320595, "grad_norm": 1.3954402450370265, "learning_rate": 6.435822394123958e-06, "loss": 0.3541, "step": 15836 }, { "epoch": 0.46659693885066955, "grad_norm": 1.7414950141389698, "learning_rate": 6.4353298060265426e-06, "loss": 0.5831, "step": 15837 }, { "epoch": 0.4666264013081331, "grad_norm": 1.5390338874308336, "learning_rate": 6.434837202746643e-06, "loss": 0.4427, "step": 15838 }, { "epoch": 0.4666558637655967, "grad_norm": 1.4420040510907728, "learning_rate": 6.434344584289474e-06, "loss": 0.5052, "step": 15839 }, { "epoch": 0.46668532622306025, "grad_norm": 1.5057812847112026, "learning_rate": 6.433851950660247e-06, "loss": 0.5906, "step": 15840 }, { "epoch": 0.46671478868052385, "grad_norm": 1.5717468625580922, "learning_rate": 6.433359301864169e-06, "loss": 0.4751, "step": 15841 }, { "epoch": 0.4667442511379874, "grad_norm": 1.7755702894977625, "learning_rate": 6.432866637906453e-06, "loss": 0.5228, "step": 15842 }, { "epoch": 0.466773713595451, "grad_norm": 1.697209275286912, "learning_rate": 6.4323739587923114e-06, "loss": 0.4845, "step": 15843 }, { "epoch": 0.46680317605291455, "grad_norm": 1.4852619752593805, "learning_rate": 6.431881264526954e-06, "loss": 0.4511, "step": 15844 }, { "epoch": 0.46683263851037815, "grad_norm": 1.3141382698237987, "learning_rate": 6.431388555115593e-06, "loss": 0.328, "step": 15845 }, { "epoch": 0.46686210096784175, "grad_norm": 1.4794101556600825, "learning_rate": 6.43089583056344e-06, "loss": 0.46, "step": 15846 }, { "epoch": 0.4668915634253053, "grad_norm": 1.7060181708340412, "learning_rate": 6.430403090875708e-06, "loss": 0.5861, "step": 15847 }, { "epoch": 0.4669210258827689, "grad_norm": 1.4841326784836288, "learning_rate": 6.429910336057605e-06, "loss": 0.4012, "step": 15848 }, { "epoch": 0.46695048834023245, "grad_norm": 1.4938029164936624, "learning_rate": 6.42941756611435e-06, "loss": 0.4252, "step": 15849 }, { "epoch": 0.46697995079769605, "grad_norm": 1.5321180334182634, "learning_rate": 6.42892478105115e-06, "loss": 0.4352, "step": 15850 }, { "epoch": 0.4670094132551596, "grad_norm": 1.3747690842756115, "learning_rate": 6.428431980873219e-06, "loss": 0.434, "step": 15851 }, { "epoch": 0.4670388757126232, "grad_norm": 1.3027492599909272, "learning_rate": 6.42793916558577e-06, "loss": 0.4257, "step": 15852 }, { "epoch": 0.46706833817008675, "grad_norm": 1.7928340484411571, "learning_rate": 6.427446335194016e-06, "loss": 0.3799, "step": 15853 }, { "epoch": 0.46709780062755035, "grad_norm": 1.5479831964597721, "learning_rate": 6.426953489703169e-06, "loss": 0.498, "step": 15854 }, { "epoch": 0.4671272630850139, "grad_norm": 1.5937882445063367, "learning_rate": 6.426460629118445e-06, "loss": 0.6071, "step": 15855 }, { "epoch": 0.4671567255424775, "grad_norm": 1.293444239302475, "learning_rate": 6.4259677534450525e-06, "loss": 0.3115, "step": 15856 }, { "epoch": 0.46718618799994105, "grad_norm": 1.7151313813994136, "learning_rate": 6.425474862688208e-06, "loss": 0.4907, "step": 15857 }, { "epoch": 0.46721565045740465, "grad_norm": 1.7175708295300947, "learning_rate": 6.424981956853126e-06, "loss": 0.5567, "step": 15858 }, { "epoch": 0.46724511291486825, "grad_norm": 1.5334621962940869, "learning_rate": 6.424489035945019e-06, "loss": 0.4816, "step": 15859 }, { "epoch": 0.4672745753723318, "grad_norm": 1.4270318629544965, "learning_rate": 6.423996099969101e-06, "loss": 0.3923, "step": 15860 }, { "epoch": 0.4673040378297954, "grad_norm": 1.559221904563331, "learning_rate": 6.4235031489305855e-06, "loss": 0.4902, "step": 15861 }, { "epoch": 0.46733350028725895, "grad_norm": 1.584666761563836, "learning_rate": 6.423010182834688e-06, "loss": 0.4263, "step": 15862 }, { "epoch": 0.46736296274472255, "grad_norm": 1.334553410336392, "learning_rate": 6.422517201686622e-06, "loss": 0.3106, "step": 15863 }, { "epoch": 0.4673924252021861, "grad_norm": 1.4394232948579104, "learning_rate": 6.422024205491603e-06, "loss": 0.4612, "step": 15864 }, { "epoch": 0.4674218876596497, "grad_norm": 1.7112415647620383, "learning_rate": 6.4215311942548465e-06, "loss": 0.5636, "step": 15865 }, { "epoch": 0.46745135011711325, "grad_norm": 1.5721303861656633, "learning_rate": 6.421038167981564e-06, "loss": 0.5041, "step": 15866 }, { "epoch": 0.46748081257457685, "grad_norm": 1.407456493017177, "learning_rate": 6.420545126676974e-06, "loss": 0.2715, "step": 15867 }, { "epoch": 0.4675102750320404, "grad_norm": 1.4588492269266666, "learning_rate": 6.42005207034629e-06, "loss": 0.5178, "step": 15868 }, { "epoch": 0.467539737489504, "grad_norm": 1.5096252188329407, "learning_rate": 6.4195589989947284e-06, "loss": 0.4236, "step": 15869 }, { "epoch": 0.46756919994696755, "grad_norm": 1.3604519208811416, "learning_rate": 6.419065912627506e-06, "loss": 0.4562, "step": 15870 }, { "epoch": 0.46759866240443115, "grad_norm": 1.5640662789789122, "learning_rate": 6.4185728112498345e-06, "loss": 0.4546, "step": 15871 }, { "epoch": 0.46762812486189476, "grad_norm": 1.5480618338705865, "learning_rate": 6.418079694866932e-06, "loss": 0.3741, "step": 15872 }, { "epoch": 0.4676575873193583, "grad_norm": 1.593751212360326, "learning_rate": 6.4175865634840165e-06, "loss": 0.4281, "step": 15873 }, { "epoch": 0.4676870497768219, "grad_norm": 1.5406460338807506, "learning_rate": 6.4170934171063e-06, "loss": 0.4535, "step": 15874 }, { "epoch": 0.46771651223428545, "grad_norm": 1.6861912060496087, "learning_rate": 6.4166002557390035e-06, "loss": 0.6142, "step": 15875 }, { "epoch": 0.46774597469174906, "grad_norm": 1.3880907865761944, "learning_rate": 6.416107079387342e-06, "loss": 0.4264, "step": 15876 }, { "epoch": 0.4677754371492126, "grad_norm": 1.4407202081507187, "learning_rate": 6.415613888056529e-06, "loss": 0.4461, "step": 15877 }, { "epoch": 0.4678048996066762, "grad_norm": 1.564844225479875, "learning_rate": 6.415120681751783e-06, "loss": 0.4292, "step": 15878 }, { "epoch": 0.46783436206413975, "grad_norm": 1.5337023613639582, "learning_rate": 6.414627460478324e-06, "loss": 0.5129, "step": 15879 }, { "epoch": 0.46786382452160336, "grad_norm": 1.5875260628676053, "learning_rate": 6.414134224241366e-06, "loss": 0.6229, "step": 15880 }, { "epoch": 0.4678932869790669, "grad_norm": 1.4142089238489726, "learning_rate": 6.413640973046127e-06, "loss": 0.396, "step": 15881 }, { "epoch": 0.4679227494365305, "grad_norm": 1.6583057996408366, "learning_rate": 6.413147706897824e-06, "loss": 0.3026, "step": 15882 }, { "epoch": 0.46795221189399405, "grad_norm": 1.599003630915924, "learning_rate": 6.412654425801677e-06, "loss": 0.4479, "step": 15883 }, { "epoch": 0.46798167435145765, "grad_norm": 1.597396061832688, "learning_rate": 6.412161129762899e-06, "loss": 0.5218, "step": 15884 }, { "epoch": 0.46801113680892126, "grad_norm": 1.6560886050444799, "learning_rate": 6.411667818786714e-06, "loss": 0.6562, "step": 15885 }, { "epoch": 0.4680405992663848, "grad_norm": 1.4711410299266576, "learning_rate": 6.411174492878336e-06, "loss": 0.4171, "step": 15886 }, { "epoch": 0.4680700617238484, "grad_norm": 1.6236394085958494, "learning_rate": 6.410681152042985e-06, "loss": 0.4946, "step": 15887 }, { "epoch": 0.46809952418131195, "grad_norm": 1.5558988646948877, "learning_rate": 6.410187796285878e-06, "loss": 0.4929, "step": 15888 }, { "epoch": 0.46812898663877556, "grad_norm": 1.5414736102657975, "learning_rate": 6.409694425612233e-06, "loss": 0.3846, "step": 15889 }, { "epoch": 0.4681584490962391, "grad_norm": 1.4783821751045885, "learning_rate": 6.409201040027272e-06, "loss": 0.4773, "step": 15890 }, { "epoch": 0.4681879115537027, "grad_norm": 1.5719992225205195, "learning_rate": 6.40870763953621e-06, "loss": 0.5354, "step": 15891 }, { "epoch": 0.46821737401116625, "grad_norm": 1.6004587440311593, "learning_rate": 6.408214224144269e-06, "loss": 0.4958, "step": 15892 }, { "epoch": 0.46824683646862986, "grad_norm": 1.7832812522657642, "learning_rate": 6.407720793856666e-06, "loss": 0.6893, "step": 15893 }, { "epoch": 0.4682762989260934, "grad_norm": 1.7637499071565783, "learning_rate": 6.407227348678624e-06, "loss": 0.6932, "step": 15894 }, { "epoch": 0.468305761383557, "grad_norm": 1.4665695784769033, "learning_rate": 6.406733888615359e-06, "loss": 0.5424, "step": 15895 }, { "epoch": 0.46833522384102055, "grad_norm": 1.6206352186512978, "learning_rate": 6.4062404136720916e-06, "loss": 0.5063, "step": 15896 }, { "epoch": 0.46836468629848416, "grad_norm": 1.4227431930057317, "learning_rate": 6.40574692385404e-06, "loss": 0.3588, "step": 15897 }, { "epoch": 0.46839414875594776, "grad_norm": 1.5751868031659055, "learning_rate": 6.405253419166428e-06, "loss": 0.4564, "step": 15898 }, { "epoch": 0.4684236112134113, "grad_norm": 1.4935696811590766, "learning_rate": 6.404759899614471e-06, "loss": 0.4849, "step": 15899 }, { "epoch": 0.4684530736708749, "grad_norm": 1.6277097197314119, "learning_rate": 6.404266365203394e-06, "loss": 0.4969, "step": 15900 }, { "epoch": 0.46848253612833846, "grad_norm": 1.6880733301882844, "learning_rate": 6.403772815938416e-06, "loss": 0.5454, "step": 15901 }, { "epoch": 0.46851199858580206, "grad_norm": 1.3895669071950707, "learning_rate": 6.403279251824755e-06, "loss": 0.412, "step": 15902 }, { "epoch": 0.4685414610432656, "grad_norm": 1.703294075583747, "learning_rate": 6.402785672867635e-06, "loss": 0.5873, "step": 15903 }, { "epoch": 0.4685709235007292, "grad_norm": 1.4601047545924295, "learning_rate": 6.402292079072274e-06, "loss": 0.471, "step": 15904 }, { "epoch": 0.46860038595819276, "grad_norm": 1.4632802305266674, "learning_rate": 6.401798470443897e-06, "loss": 0.3713, "step": 15905 }, { "epoch": 0.46862984841565636, "grad_norm": 1.785793536130074, "learning_rate": 6.401304846987722e-06, "loss": 0.465, "step": 15906 }, { "epoch": 0.4686593108731199, "grad_norm": 1.5783350825455262, "learning_rate": 6.4008112087089714e-06, "loss": 0.4755, "step": 15907 }, { "epoch": 0.4686887733305835, "grad_norm": 1.5693963986894908, "learning_rate": 6.400317555612866e-06, "loss": 0.4751, "step": 15908 }, { "epoch": 0.46871823578804706, "grad_norm": 1.540703272372131, "learning_rate": 6.399823887704631e-06, "loss": 0.4917, "step": 15909 }, { "epoch": 0.46874769824551066, "grad_norm": 1.7155758305408033, "learning_rate": 6.3993302049894825e-06, "loss": 0.5787, "step": 15910 }, { "epoch": 0.46877716070297426, "grad_norm": 1.3858655597445413, "learning_rate": 6.398836507472645e-06, "loss": 0.39, "step": 15911 }, { "epoch": 0.4688066231604378, "grad_norm": 1.6293180644459033, "learning_rate": 6.3983427951593435e-06, "loss": 0.5025, "step": 15912 }, { "epoch": 0.4688360856179014, "grad_norm": 1.6808452133066543, "learning_rate": 6.397849068054798e-06, "loss": 0.5605, "step": 15913 }, { "epoch": 0.46886554807536496, "grad_norm": 1.3943213429935277, "learning_rate": 6.397355326164231e-06, "loss": 0.3862, "step": 15914 }, { "epoch": 0.46889501053282856, "grad_norm": 1.4644524813192148, "learning_rate": 6.396861569492863e-06, "loss": 0.4726, "step": 15915 }, { "epoch": 0.4689244729902921, "grad_norm": 1.3799944802680646, "learning_rate": 6.396367798045922e-06, "loss": 0.3546, "step": 15916 }, { "epoch": 0.4689539354477557, "grad_norm": 1.6263925616706156, "learning_rate": 6.395874011828627e-06, "loss": 0.5676, "step": 15917 }, { "epoch": 0.46898339790521926, "grad_norm": 1.5591622713712219, "learning_rate": 6.395380210846202e-06, "loss": 0.4821, "step": 15918 }, { "epoch": 0.46901286036268286, "grad_norm": 1.577429173249419, "learning_rate": 6.394886395103869e-06, "loss": 0.4247, "step": 15919 }, { "epoch": 0.4690423228201464, "grad_norm": 1.4540463126177863, "learning_rate": 6.394392564606855e-06, "loss": 0.3212, "step": 15920 }, { "epoch": 0.46907178527761, "grad_norm": 1.367783382158684, "learning_rate": 6.393898719360379e-06, "loss": 0.4649, "step": 15921 }, { "epoch": 0.46910124773507356, "grad_norm": 1.3926933862511248, "learning_rate": 6.393404859369669e-06, "loss": 0.4407, "step": 15922 }, { "epoch": 0.46913071019253716, "grad_norm": 1.4915807309691822, "learning_rate": 6.392910984639946e-06, "loss": 0.4506, "step": 15923 }, { "epoch": 0.46916017265000076, "grad_norm": 1.7526176935419515, "learning_rate": 6.392417095176436e-06, "loss": 0.5611, "step": 15924 }, { "epoch": 0.4691896351074643, "grad_norm": 1.4712768153921363, "learning_rate": 6.391923190984363e-06, "loss": 0.4858, "step": 15925 }, { "epoch": 0.4692190975649279, "grad_norm": 1.5328122970753149, "learning_rate": 6.391429272068948e-06, "loss": 0.4403, "step": 15926 }, { "epoch": 0.46924856002239146, "grad_norm": 1.3064045175236116, "learning_rate": 6.39093533843542e-06, "loss": 0.4551, "step": 15927 }, { "epoch": 0.46927802247985506, "grad_norm": 1.3823360639050177, "learning_rate": 6.390441390089003e-06, "loss": 0.4746, "step": 15928 }, { "epoch": 0.4693074849373186, "grad_norm": 1.5215111687696208, "learning_rate": 6.3899474270349196e-06, "loss": 0.4903, "step": 15929 }, { "epoch": 0.4693369473947822, "grad_norm": 1.5377625898302616, "learning_rate": 6.389453449278396e-06, "loss": 0.4132, "step": 15930 }, { "epoch": 0.46936640985224576, "grad_norm": 1.4887820500959972, "learning_rate": 6.388959456824657e-06, "loss": 0.4571, "step": 15931 }, { "epoch": 0.46939587230970936, "grad_norm": 1.7043674587329225, "learning_rate": 6.388465449678928e-06, "loss": 0.465, "step": 15932 }, { "epoch": 0.4694253347671729, "grad_norm": 1.4009168057015995, "learning_rate": 6.3879714278464335e-06, "loss": 0.4988, "step": 15933 }, { "epoch": 0.4694547972246365, "grad_norm": 1.4809735560349284, "learning_rate": 6.387477391332404e-06, "loss": 0.4443, "step": 15934 }, { "epoch": 0.46948425968210006, "grad_norm": 1.897251908286231, "learning_rate": 6.386983340142058e-06, "loss": 0.5309, "step": 15935 }, { "epoch": 0.46951372213956366, "grad_norm": 1.670249290617399, "learning_rate": 6.386489274280625e-06, "loss": 0.5469, "step": 15936 }, { "epoch": 0.46954318459702726, "grad_norm": 1.6832257893326275, "learning_rate": 6.3859951937533335e-06, "loss": 0.6188, "step": 15937 }, { "epoch": 0.4695726470544908, "grad_norm": 1.5170288237089946, "learning_rate": 6.385501098565405e-06, "loss": 0.5135, "step": 15938 }, { "epoch": 0.4696021095119544, "grad_norm": 1.8953947991672158, "learning_rate": 6.385006988722069e-06, "loss": 0.621, "step": 15939 }, { "epoch": 0.46963157196941796, "grad_norm": 1.5001396620896477, "learning_rate": 6.384512864228551e-06, "loss": 0.5367, "step": 15940 }, { "epoch": 0.46966103442688156, "grad_norm": 1.4734166720672472, "learning_rate": 6.384018725090079e-06, "loss": 0.4133, "step": 15941 }, { "epoch": 0.4696904968843451, "grad_norm": 1.4737058876020988, "learning_rate": 6.383524571311877e-06, "loss": 0.4749, "step": 15942 }, { "epoch": 0.4697199593418087, "grad_norm": 1.6653581640393607, "learning_rate": 6.383030402899174e-06, "loss": 0.506, "step": 15943 }, { "epoch": 0.46974942179927226, "grad_norm": 1.2985539818931389, "learning_rate": 6.382536219857199e-06, "loss": 0.3529, "step": 15944 }, { "epoch": 0.46977888425673586, "grad_norm": 1.4331042543582224, "learning_rate": 6.382042022191174e-06, "loss": 0.4305, "step": 15945 }, { "epoch": 0.4698083467141994, "grad_norm": 1.5468942226648765, "learning_rate": 6.381547809906332e-06, "loss": 0.3904, "step": 15946 }, { "epoch": 0.469837809171663, "grad_norm": 1.6708366319675143, "learning_rate": 6.3810535830078965e-06, "loss": 0.4832, "step": 15947 }, { "epoch": 0.46986727162912656, "grad_norm": 1.6625855892987456, "learning_rate": 6.380559341501098e-06, "loss": 0.4334, "step": 15948 }, { "epoch": 0.46989673408659016, "grad_norm": 1.71969346086713, "learning_rate": 6.380065085391165e-06, "loss": 0.5278, "step": 15949 }, { "epoch": 0.46992619654405376, "grad_norm": 1.6329502472797002, "learning_rate": 6.379570814683323e-06, "loss": 0.5316, "step": 15950 }, { "epoch": 0.4699556590015173, "grad_norm": 1.4528243590676762, "learning_rate": 6.379076529382799e-06, "loss": 0.4611, "step": 15951 }, { "epoch": 0.4699851214589809, "grad_norm": 1.4686687504044638, "learning_rate": 6.378582229494827e-06, "loss": 0.4206, "step": 15952 }, { "epoch": 0.47001458391644446, "grad_norm": 1.6810411344986613, "learning_rate": 6.37808791502463e-06, "loss": 0.3403, "step": 15953 }, { "epoch": 0.47004404637390806, "grad_norm": 1.5577716118327922, "learning_rate": 6.377593585977441e-06, "loss": 0.4297, "step": 15954 }, { "epoch": 0.4700735088313716, "grad_norm": 1.3486894999892383, "learning_rate": 6.377099242358486e-06, "loss": 0.3445, "step": 15955 }, { "epoch": 0.4701029712888352, "grad_norm": 1.5195795079619114, "learning_rate": 6.376604884172994e-06, "loss": 0.4326, "step": 15956 }, { "epoch": 0.47013243374629876, "grad_norm": 1.3704537465064386, "learning_rate": 6.376110511426197e-06, "loss": 0.4017, "step": 15957 }, { "epoch": 0.47016189620376236, "grad_norm": 1.6198441155477386, "learning_rate": 6.375616124123321e-06, "loss": 0.4248, "step": 15958 }, { "epoch": 0.4701913586612259, "grad_norm": 1.6186151911862996, "learning_rate": 6.375121722269598e-06, "loss": 0.4739, "step": 15959 }, { "epoch": 0.4702208211186895, "grad_norm": 1.5205807602682528, "learning_rate": 6.374627305870255e-06, "loss": 0.5829, "step": 15960 }, { "epoch": 0.47025028357615306, "grad_norm": 1.4697676335649201, "learning_rate": 6.374132874930525e-06, "loss": 0.3677, "step": 15961 }, { "epoch": 0.47027974603361666, "grad_norm": 1.502906442199206, "learning_rate": 6.373638429455634e-06, "loss": 0.4216, "step": 15962 }, { "epoch": 0.47030920849108027, "grad_norm": 1.404334409712838, "learning_rate": 6.373143969450816e-06, "loss": 0.3098, "step": 15963 }, { "epoch": 0.4703386709485438, "grad_norm": 1.4726870391060332, "learning_rate": 6.3726494949213e-06, "loss": 0.4762, "step": 15964 }, { "epoch": 0.4703681334060074, "grad_norm": 1.3122423793701317, "learning_rate": 6.372155005872315e-06, "loss": 0.3366, "step": 15965 }, { "epoch": 0.47039759586347096, "grad_norm": 1.4912199450011032, "learning_rate": 6.371660502309091e-06, "loss": 0.4751, "step": 15966 }, { "epoch": 0.47042705832093457, "grad_norm": 1.4522311261905425, "learning_rate": 6.371165984236864e-06, "loss": 0.2889, "step": 15967 }, { "epoch": 0.4704565207783981, "grad_norm": 1.622004662845823, "learning_rate": 6.370671451660858e-06, "loss": 0.5134, "step": 15968 }, { "epoch": 0.4704859832358617, "grad_norm": 1.2869314114955894, "learning_rate": 6.370176904586308e-06, "loss": 0.3685, "step": 15969 }, { "epoch": 0.47051544569332526, "grad_norm": 1.3227364911276045, "learning_rate": 6.369682343018445e-06, "loss": 0.3595, "step": 15970 }, { "epoch": 0.47054490815078887, "grad_norm": 1.4375685071868798, "learning_rate": 6.3691877669624994e-06, "loss": 0.4649, "step": 15971 }, { "epoch": 0.4705743706082524, "grad_norm": 1.4318467443065352, "learning_rate": 6.3686931764237015e-06, "loss": 0.3557, "step": 15972 }, { "epoch": 0.470603833065716, "grad_norm": 1.7554336375291955, "learning_rate": 6.368198571407286e-06, "loss": 0.5075, "step": 15973 }, { "epoch": 0.47063329552317956, "grad_norm": 1.4212086901842986, "learning_rate": 6.367703951918482e-06, "loss": 0.408, "step": 15974 }, { "epoch": 0.47066275798064316, "grad_norm": 1.6637551457514774, "learning_rate": 6.367209317962523e-06, "loss": 0.6008, "step": 15975 }, { "epoch": 0.47069222043810677, "grad_norm": 1.5799122915346777, "learning_rate": 6.366714669544641e-06, "loss": 0.3728, "step": 15976 }, { "epoch": 0.4707216828955703, "grad_norm": 1.715974468182793, "learning_rate": 6.366220006670065e-06, "loss": 0.5517, "step": 15977 }, { "epoch": 0.4707511453530339, "grad_norm": 1.5110346663880105, "learning_rate": 6.365725329344032e-06, "loss": 0.3154, "step": 15978 }, { "epoch": 0.47078060781049746, "grad_norm": 1.5252884632861328, "learning_rate": 6.365230637571773e-06, "loss": 0.4418, "step": 15979 }, { "epoch": 0.47081007026796107, "grad_norm": 1.379314340084784, "learning_rate": 6.364735931358521e-06, "loss": 0.3375, "step": 15980 }, { "epoch": 0.4708395327254246, "grad_norm": 1.4329201466035202, "learning_rate": 6.364241210709507e-06, "loss": 0.433, "step": 15981 }, { "epoch": 0.4708689951828882, "grad_norm": 1.5640718346466151, "learning_rate": 6.363746475629966e-06, "loss": 0.5491, "step": 15982 }, { "epoch": 0.47089845764035176, "grad_norm": 1.6210472753070357, "learning_rate": 6.363251726125129e-06, "loss": 0.533, "step": 15983 }, { "epoch": 0.47092792009781537, "grad_norm": 1.590631767522253, "learning_rate": 6.3627569622002325e-06, "loss": 0.4778, "step": 15984 }, { "epoch": 0.4709573825552789, "grad_norm": 1.5472094707681192, "learning_rate": 6.362262183860508e-06, "loss": 0.4494, "step": 15985 }, { "epoch": 0.4709868450127425, "grad_norm": 1.5264425745368884, "learning_rate": 6.361767391111189e-06, "loss": 0.4766, "step": 15986 }, { "epoch": 0.47101630747020606, "grad_norm": 1.399295094203507, "learning_rate": 6.361272583957507e-06, "loss": 0.439, "step": 15987 }, { "epoch": 0.47104576992766967, "grad_norm": 1.4548037846126423, "learning_rate": 6.360777762404702e-06, "loss": 0.4999, "step": 15988 }, { "epoch": 0.47107523238513327, "grad_norm": 1.4187746798410932, "learning_rate": 6.3602829264580034e-06, "loss": 0.3927, "step": 15989 }, { "epoch": 0.4711046948425968, "grad_norm": 1.4001666777187445, "learning_rate": 6.359788076122646e-06, "loss": 0.4311, "step": 15990 }, { "epoch": 0.4711341573000604, "grad_norm": 1.489880437623432, "learning_rate": 6.359293211403866e-06, "loss": 0.5325, "step": 15991 }, { "epoch": 0.47116361975752397, "grad_norm": 1.3641687608692767, "learning_rate": 6.358798332306895e-06, "loss": 0.3544, "step": 15992 }, { "epoch": 0.47119308221498757, "grad_norm": 1.4252163841393304, "learning_rate": 6.35830343883697e-06, "loss": 0.3256, "step": 15993 }, { "epoch": 0.4712225446724511, "grad_norm": 1.3983464138559523, "learning_rate": 6.357808530999325e-06, "loss": 0.4334, "step": 15994 }, { "epoch": 0.4712520071299147, "grad_norm": 1.2776460707117565, "learning_rate": 6.357313608799196e-06, "loss": 0.274, "step": 15995 }, { "epoch": 0.47128146958737827, "grad_norm": 1.4585755346190115, "learning_rate": 6.356818672241817e-06, "loss": 0.4157, "step": 15996 }, { "epoch": 0.47131093204484187, "grad_norm": 1.5714259378177275, "learning_rate": 6.356323721332424e-06, "loss": 0.536, "step": 15997 }, { "epoch": 0.4713403945023054, "grad_norm": 1.32185699681353, "learning_rate": 6.35582875607625e-06, "loss": 0.3966, "step": 15998 }, { "epoch": 0.471369856959769, "grad_norm": 1.5012116504437991, "learning_rate": 6.355333776478535e-06, "loss": 0.4571, "step": 15999 }, { "epoch": 0.47139931941723257, "grad_norm": 1.5781275925001772, "learning_rate": 6.35483878254451e-06, "loss": 0.5416, "step": 16000 }, { "epoch": 0.47142878187469617, "grad_norm": 1.5040731169531552, "learning_rate": 6.354343774279416e-06, "loss": 0.482, "step": 16001 }, { "epoch": 0.47145824433215977, "grad_norm": 1.537678098790389, "learning_rate": 6.353848751688484e-06, "loss": 0.4698, "step": 16002 }, { "epoch": 0.4714877067896233, "grad_norm": 1.5312375893802206, "learning_rate": 6.353353714776954e-06, "loss": 0.4056, "step": 16003 }, { "epoch": 0.4715171692470869, "grad_norm": 1.468920470983182, "learning_rate": 6.352858663550061e-06, "loss": 0.5476, "step": 16004 }, { "epoch": 0.47154663170455047, "grad_norm": 1.5163109482912518, "learning_rate": 6.352363598013041e-06, "loss": 0.5958, "step": 16005 }, { "epoch": 0.47157609416201407, "grad_norm": 1.4944507459386585, "learning_rate": 6.351868518171131e-06, "loss": 0.4778, "step": 16006 }, { "epoch": 0.4716055566194776, "grad_norm": 1.5218534454626593, "learning_rate": 6.3513734240295666e-06, "loss": 0.5061, "step": 16007 }, { "epoch": 0.4716350190769412, "grad_norm": 1.393688742919379, "learning_rate": 6.350878315593586e-06, "loss": 0.4313, "step": 16008 }, { "epoch": 0.47166448153440477, "grad_norm": 1.4425989064720661, "learning_rate": 6.3503831928684275e-06, "loss": 0.4646, "step": 16009 }, { "epoch": 0.47169394399186837, "grad_norm": 1.5126964039522466, "learning_rate": 6.349888055859328e-06, "loss": 0.5408, "step": 16010 }, { "epoch": 0.4717234064493319, "grad_norm": 1.5338950865781091, "learning_rate": 6.3493929045715205e-06, "loss": 0.478, "step": 16011 }, { "epoch": 0.4717528689067955, "grad_norm": 1.3139101786293665, "learning_rate": 6.34889773901025e-06, "loss": 0.3334, "step": 16012 }, { "epoch": 0.47178233136425907, "grad_norm": 1.4556423851247828, "learning_rate": 6.348402559180749e-06, "loss": 0.4117, "step": 16013 }, { "epoch": 0.47181179382172267, "grad_norm": 1.5622018144413743, "learning_rate": 6.347907365088256e-06, "loss": 0.4978, "step": 16014 }, { "epoch": 0.47184125627918627, "grad_norm": 1.4290610143153542, "learning_rate": 6.347412156738009e-06, "loss": 0.5089, "step": 16015 }, { "epoch": 0.4718707187366498, "grad_norm": 1.4602992072015661, "learning_rate": 6.346916934135249e-06, "loss": 0.3835, "step": 16016 }, { "epoch": 0.4719001811941134, "grad_norm": 1.3652394253189999, "learning_rate": 6.34642169728521e-06, "loss": 0.325, "step": 16017 }, { "epoch": 0.47192964365157697, "grad_norm": 1.4492893051934344, "learning_rate": 6.345926446193135e-06, "loss": 0.4262, "step": 16018 }, { "epoch": 0.47195910610904057, "grad_norm": 1.393418743684426, "learning_rate": 6.3454311808642585e-06, "loss": 0.3493, "step": 16019 }, { "epoch": 0.4719885685665041, "grad_norm": 1.5719898586742984, "learning_rate": 6.344935901303821e-06, "loss": 0.501, "step": 16020 }, { "epoch": 0.4720180310239677, "grad_norm": 1.4266794959665106, "learning_rate": 6.344440607517061e-06, "loss": 0.4262, "step": 16021 }, { "epoch": 0.47204749348143127, "grad_norm": 1.6070147366981702, "learning_rate": 6.34394529950922e-06, "loss": 0.4593, "step": 16022 }, { "epoch": 0.47207695593889487, "grad_norm": 1.5544556155833276, "learning_rate": 6.343449977285533e-06, "loss": 0.4341, "step": 16023 }, { "epoch": 0.4721064183963584, "grad_norm": 1.595564595278355, "learning_rate": 6.342954640851243e-06, "loss": 0.4024, "step": 16024 }, { "epoch": 0.472135880853822, "grad_norm": 1.7611631554594591, "learning_rate": 6.342459290211588e-06, "loss": 0.3808, "step": 16025 }, { "epoch": 0.47216534331128557, "grad_norm": 1.6231117487794, "learning_rate": 6.341963925371807e-06, "loss": 0.4649, "step": 16026 }, { "epoch": 0.47219480576874917, "grad_norm": 1.4151479249080579, "learning_rate": 6.3414685463371405e-06, "loss": 0.4778, "step": 16027 }, { "epoch": 0.4722242682262128, "grad_norm": 1.616964644353992, "learning_rate": 6.340973153112829e-06, "loss": 0.4539, "step": 16028 }, { "epoch": 0.4722537306836763, "grad_norm": 1.5501542000774626, "learning_rate": 6.340477745704114e-06, "loss": 0.558, "step": 16029 }, { "epoch": 0.4722831931411399, "grad_norm": 1.644151519021662, "learning_rate": 6.339982324116231e-06, "loss": 0.4379, "step": 16030 }, { "epoch": 0.47231265559860347, "grad_norm": 1.5909131998751331, "learning_rate": 6.339486888354425e-06, "loss": 0.4773, "step": 16031 }, { "epoch": 0.4723421180560671, "grad_norm": 1.5674569297030956, "learning_rate": 6.3389914384239335e-06, "loss": 0.6077, "step": 16032 }, { "epoch": 0.4723715805135306, "grad_norm": 1.4138428183318121, "learning_rate": 6.33849597433e-06, "loss": 0.3739, "step": 16033 }, { "epoch": 0.4724010429709942, "grad_norm": 1.4321698828818814, "learning_rate": 6.338000496077864e-06, "loss": 0.5308, "step": 16034 }, { "epoch": 0.47243050542845777, "grad_norm": 1.4424684072386016, "learning_rate": 6.337505003672766e-06, "loss": 0.4492, "step": 16035 }, { "epoch": 0.4724599678859214, "grad_norm": 1.7034472837536132, "learning_rate": 6.337009497119947e-06, "loss": 0.5175, "step": 16036 }, { "epoch": 0.4724894303433849, "grad_norm": 1.430832017718732, "learning_rate": 6.33651397642465e-06, "loss": 0.4704, "step": 16037 }, { "epoch": 0.4725188928008485, "grad_norm": 1.7996873732306324, "learning_rate": 6.3360184415921144e-06, "loss": 0.4872, "step": 16038 }, { "epoch": 0.47254835525831207, "grad_norm": 1.3487700849086321, "learning_rate": 6.335522892627585e-06, "loss": 0.4875, "step": 16039 }, { "epoch": 0.47257781771577567, "grad_norm": 1.5097963961298648, "learning_rate": 6.3350273295363004e-06, "loss": 0.4254, "step": 16040 }, { "epoch": 0.4726072801732393, "grad_norm": 1.4686103402148771, "learning_rate": 6.334531752323503e-06, "loss": 0.4411, "step": 16041 }, { "epoch": 0.4726367426307028, "grad_norm": 1.540369227967157, "learning_rate": 6.334036160994434e-06, "loss": 0.4591, "step": 16042 }, { "epoch": 0.4726662050881664, "grad_norm": 1.6259437943564186, "learning_rate": 6.33354055555434e-06, "loss": 0.599, "step": 16043 }, { "epoch": 0.47269566754562997, "grad_norm": 1.4931840693997893, "learning_rate": 6.33304493600846e-06, "loss": 0.4172, "step": 16044 }, { "epoch": 0.4727251300030936, "grad_norm": 1.718460129612649, "learning_rate": 6.332549302362036e-06, "loss": 0.3804, "step": 16045 }, { "epoch": 0.4727545924605571, "grad_norm": 1.3449035075015132, "learning_rate": 6.332053654620312e-06, "loss": 0.3592, "step": 16046 }, { "epoch": 0.4727840549180207, "grad_norm": 1.3584126629211222, "learning_rate": 6.331557992788528e-06, "loss": 0.4167, "step": 16047 }, { "epoch": 0.47281351737548427, "grad_norm": 1.4221475324819028, "learning_rate": 6.331062316871932e-06, "loss": 0.4567, "step": 16048 }, { "epoch": 0.4728429798329479, "grad_norm": 1.539389435627999, "learning_rate": 6.330566626875766e-06, "loss": 0.486, "step": 16049 }, { "epoch": 0.4728724422904114, "grad_norm": 1.5884269307781205, "learning_rate": 6.330070922805269e-06, "loss": 0.5402, "step": 16050 }, { "epoch": 0.472901904747875, "grad_norm": 1.5771382353027623, "learning_rate": 6.329575204665686e-06, "loss": 0.5484, "step": 16051 }, { "epoch": 0.47293136720533857, "grad_norm": 1.4380302043335589, "learning_rate": 6.329079472462265e-06, "loss": 0.4669, "step": 16052 }, { "epoch": 0.4729608296628022, "grad_norm": 1.5431590833629352, "learning_rate": 6.328583726200244e-06, "loss": 0.4578, "step": 16053 }, { "epoch": 0.4729902921202658, "grad_norm": 1.3578443343798772, "learning_rate": 6.328087965884871e-06, "loss": 0.323, "step": 16054 }, { "epoch": 0.4730197545777293, "grad_norm": 1.32887611472778, "learning_rate": 6.327592191521388e-06, "loss": 0.43, "step": 16055 }, { "epoch": 0.4730492170351929, "grad_norm": 1.3891760174793224, "learning_rate": 6.327096403115039e-06, "loss": 0.3726, "step": 16056 }, { "epoch": 0.4730786794926565, "grad_norm": 1.507797261585317, "learning_rate": 6.326600600671067e-06, "loss": 0.3831, "step": 16057 }, { "epoch": 0.4731081419501201, "grad_norm": 1.2574317461466868, "learning_rate": 6.32610478419472e-06, "loss": 0.3906, "step": 16058 }, { "epoch": 0.4731376044075836, "grad_norm": 1.4609625451415722, "learning_rate": 6.325608953691241e-06, "loss": 0.4104, "step": 16059 }, { "epoch": 0.4731670668650472, "grad_norm": 1.4495266740761967, "learning_rate": 6.3251131091658734e-06, "loss": 0.543, "step": 16060 }, { "epoch": 0.4731965293225108, "grad_norm": 1.623624686876173, "learning_rate": 6.3246172506238636e-06, "loss": 0.4251, "step": 16061 }, { "epoch": 0.4732259917799744, "grad_norm": 1.6750382878510661, "learning_rate": 6.324121378070456e-06, "loss": 0.5598, "step": 16062 }, { "epoch": 0.4732554542374379, "grad_norm": 1.7922927044955894, "learning_rate": 6.323625491510895e-06, "loss": 0.568, "step": 16063 }, { "epoch": 0.4732849166949015, "grad_norm": 1.4820693647216525, "learning_rate": 6.32312959095043e-06, "loss": 0.5117, "step": 16064 }, { "epoch": 0.4733143791523651, "grad_norm": 1.8839994314155561, "learning_rate": 6.322633676394301e-06, "loss": 0.4636, "step": 16065 }, { "epoch": 0.4733438416098287, "grad_norm": 1.6959135224339519, "learning_rate": 6.322137747847756e-06, "loss": 0.4314, "step": 16066 }, { "epoch": 0.4733733040672923, "grad_norm": 1.6021400805680859, "learning_rate": 6.321641805316042e-06, "loss": 0.489, "step": 16067 }, { "epoch": 0.4734027665247558, "grad_norm": 1.7132553442606784, "learning_rate": 6.321145848804403e-06, "loss": 0.5309, "step": 16068 }, { "epoch": 0.4734322289822194, "grad_norm": 1.4748221759552138, "learning_rate": 6.320649878318087e-06, "loss": 0.5272, "step": 16069 }, { "epoch": 0.473461691439683, "grad_norm": 1.3687875604307105, "learning_rate": 6.320153893862338e-06, "loss": 0.361, "step": 16070 }, { "epoch": 0.4734911538971466, "grad_norm": 1.7040324268847669, "learning_rate": 6.319657895442404e-06, "loss": 0.4813, "step": 16071 }, { "epoch": 0.4735206163546101, "grad_norm": 1.4632446996338477, "learning_rate": 6.3191618830635295e-06, "loss": 0.4189, "step": 16072 }, { "epoch": 0.4735500788120737, "grad_norm": 1.634482974891764, "learning_rate": 6.318665856730963e-06, "loss": 0.441, "step": 16073 }, { "epoch": 0.4735795412695373, "grad_norm": 1.7346190480255692, "learning_rate": 6.318169816449953e-06, "loss": 0.5071, "step": 16074 }, { "epoch": 0.4736090037270009, "grad_norm": 1.6912929016251872, "learning_rate": 6.317673762225742e-06, "loss": 0.4668, "step": 16075 }, { "epoch": 0.4736384661844644, "grad_norm": 1.4411641356682339, "learning_rate": 6.317177694063581e-06, "loss": 0.395, "step": 16076 }, { "epoch": 0.473667928641928, "grad_norm": 1.509903268478855, "learning_rate": 6.3166816119687155e-06, "loss": 0.3699, "step": 16077 }, { "epoch": 0.4736973910993916, "grad_norm": 1.4788818632609175, "learning_rate": 6.316185515946392e-06, "loss": 0.3013, "step": 16078 }, { "epoch": 0.4737268535568552, "grad_norm": 1.638827163965898, "learning_rate": 6.315689406001861e-06, "loss": 0.4518, "step": 16079 }, { "epoch": 0.4737563160143188, "grad_norm": 1.4327742318809102, "learning_rate": 6.315193282140369e-06, "loss": 0.3559, "step": 16080 }, { "epoch": 0.4737857784717823, "grad_norm": 1.4903648426085088, "learning_rate": 6.314697144367161e-06, "loss": 0.5364, "step": 16081 }, { "epoch": 0.47381524092924593, "grad_norm": 1.44055542393129, "learning_rate": 6.314200992687488e-06, "loss": 0.3522, "step": 16082 }, { "epoch": 0.4738447033867095, "grad_norm": 1.42156526694372, "learning_rate": 6.3137048271065974e-06, "loss": 0.458, "step": 16083 }, { "epoch": 0.4738741658441731, "grad_norm": 1.656719813793921, "learning_rate": 6.313208647629738e-06, "loss": 0.5514, "step": 16084 }, { "epoch": 0.4739036283016366, "grad_norm": 1.423927694481581, "learning_rate": 6.312712454262158e-06, "loss": 0.4902, "step": 16085 }, { "epoch": 0.47393309075910023, "grad_norm": 1.5117312053164411, "learning_rate": 6.312216247009104e-06, "loss": 0.4047, "step": 16086 }, { "epoch": 0.4739625532165638, "grad_norm": 1.3937281279659133, "learning_rate": 6.311720025875828e-06, "loss": 0.4359, "step": 16087 }, { "epoch": 0.4739920156740274, "grad_norm": 1.3924326070288362, "learning_rate": 6.311223790867578e-06, "loss": 0.3566, "step": 16088 }, { "epoch": 0.4740214781314909, "grad_norm": 1.5642366556422005, "learning_rate": 6.310727541989601e-06, "loss": 0.6501, "step": 16089 }, { "epoch": 0.47405094058895453, "grad_norm": 1.5316204633647532, "learning_rate": 6.310231279247148e-06, "loss": 0.4244, "step": 16090 }, { "epoch": 0.4740804030464181, "grad_norm": 1.5689560014162347, "learning_rate": 6.3097350026454675e-06, "loss": 0.5756, "step": 16091 }, { "epoch": 0.4741098655038817, "grad_norm": 1.4663637627532569, "learning_rate": 6.309238712189809e-06, "loss": 0.4037, "step": 16092 }, { "epoch": 0.4741393279613453, "grad_norm": 1.4650644741414702, "learning_rate": 6.308742407885422e-06, "loss": 0.4272, "step": 16093 }, { "epoch": 0.4741687904188088, "grad_norm": 1.4922138735806607, "learning_rate": 6.30824608973756e-06, "loss": 0.4957, "step": 16094 }, { "epoch": 0.47419825287627243, "grad_norm": 1.6189245080285224, "learning_rate": 6.307749757751465e-06, "loss": 0.4919, "step": 16095 }, { "epoch": 0.474227715333736, "grad_norm": 1.5354912146762074, "learning_rate": 6.307253411932395e-06, "loss": 0.4847, "step": 16096 }, { "epoch": 0.4742571777911996, "grad_norm": 1.2690003649000632, "learning_rate": 6.306757052285595e-06, "loss": 0.3509, "step": 16097 }, { "epoch": 0.4742866402486631, "grad_norm": 1.409392396641673, "learning_rate": 6.306260678816317e-06, "loss": 0.453, "step": 16098 }, { "epoch": 0.47431610270612673, "grad_norm": 1.546734330097003, "learning_rate": 6.305764291529813e-06, "loss": 0.4186, "step": 16099 }, { "epoch": 0.4743455651635903, "grad_norm": 1.594798893782361, "learning_rate": 6.305267890431333e-06, "loss": 0.4888, "step": 16100 }, { "epoch": 0.4743750276210539, "grad_norm": 1.5165934712097129, "learning_rate": 6.304771475526124e-06, "loss": 0.5205, "step": 16101 }, { "epoch": 0.4744044900785174, "grad_norm": 1.522954730314941, "learning_rate": 6.304275046819441e-06, "loss": 0.4803, "step": 16102 }, { "epoch": 0.47443395253598103, "grad_norm": 1.4044646546154618, "learning_rate": 6.303778604316535e-06, "loss": 0.4054, "step": 16103 }, { "epoch": 0.4744634149934446, "grad_norm": 1.5835058470010364, "learning_rate": 6.303282148022657e-06, "loss": 0.4457, "step": 16104 }, { "epoch": 0.4744928774509082, "grad_norm": 1.5885419151702005, "learning_rate": 6.302785677943055e-06, "loss": 0.5284, "step": 16105 }, { "epoch": 0.4745223399083718, "grad_norm": 1.4176139728646, "learning_rate": 6.302289194082986e-06, "loss": 0.4694, "step": 16106 }, { "epoch": 0.47455180236583533, "grad_norm": 1.382596921475747, "learning_rate": 6.301792696447697e-06, "loss": 0.3549, "step": 16107 }, { "epoch": 0.47458126482329893, "grad_norm": 1.5136041430039149, "learning_rate": 6.301296185042442e-06, "loss": 0.498, "step": 16108 }, { "epoch": 0.4746107272807625, "grad_norm": 1.5376435239054325, "learning_rate": 6.3007996598724715e-06, "loss": 0.5058, "step": 16109 }, { "epoch": 0.4746401897382261, "grad_norm": 1.7267865299991467, "learning_rate": 6.300303120943041e-06, "loss": 0.2532, "step": 16110 }, { "epoch": 0.47466965219568963, "grad_norm": 1.5507588677561426, "learning_rate": 6.299806568259399e-06, "loss": 0.5112, "step": 16111 }, { "epoch": 0.47469911465315323, "grad_norm": 1.4539393177297313, "learning_rate": 6.299310001826799e-06, "loss": 0.4036, "step": 16112 }, { "epoch": 0.4747285771106168, "grad_norm": 1.525310652391368, "learning_rate": 6.298813421650493e-06, "loss": 0.5536, "step": 16113 }, { "epoch": 0.4747580395680804, "grad_norm": 1.6155129896920728, "learning_rate": 6.2983168277357375e-06, "loss": 0.5077, "step": 16114 }, { "epoch": 0.47478750202554393, "grad_norm": 1.46627835946472, "learning_rate": 6.297820220087779e-06, "loss": 0.3729, "step": 16115 }, { "epoch": 0.47481696448300753, "grad_norm": 1.5860159165583938, "learning_rate": 6.297323598711876e-06, "loss": 0.3973, "step": 16116 }, { "epoch": 0.4748464269404711, "grad_norm": 1.6389384351301701, "learning_rate": 6.296826963613278e-06, "loss": 0.5051, "step": 16117 }, { "epoch": 0.4748758893979347, "grad_norm": 1.5516696377786665, "learning_rate": 6.296330314797241e-06, "loss": 0.3958, "step": 16118 }, { "epoch": 0.4749053518553983, "grad_norm": 1.51131581689719, "learning_rate": 6.2958336522690156e-06, "loss": 0.4176, "step": 16119 }, { "epoch": 0.47493481431286183, "grad_norm": 1.4288332526797893, "learning_rate": 6.295336976033857e-06, "loss": 0.4061, "step": 16120 }, { "epoch": 0.47496427677032543, "grad_norm": 1.4549334859439373, "learning_rate": 6.294840286097019e-06, "loss": 0.457, "step": 16121 }, { "epoch": 0.474993739227789, "grad_norm": 1.3985075058752603, "learning_rate": 6.294343582463755e-06, "loss": 0.4023, "step": 16122 }, { "epoch": 0.4750232016852526, "grad_norm": 1.4916134256624614, "learning_rate": 6.293846865139318e-06, "loss": 0.3365, "step": 16123 }, { "epoch": 0.47505266414271613, "grad_norm": 1.4549881854845417, "learning_rate": 6.293350134128964e-06, "loss": 0.343, "step": 16124 }, { "epoch": 0.47508212660017973, "grad_norm": 1.520924173186421, "learning_rate": 6.292853389437947e-06, "loss": 0.5162, "step": 16125 }, { "epoch": 0.4751115890576433, "grad_norm": 1.5746897976244423, "learning_rate": 6.29235663107152e-06, "loss": 0.438, "step": 16126 }, { "epoch": 0.4751410515151069, "grad_norm": 1.7523359577307696, "learning_rate": 6.291859859034939e-06, "loss": 0.5838, "step": 16127 }, { "epoch": 0.47517051397257043, "grad_norm": 1.5586175440309789, "learning_rate": 6.291363073333459e-06, "loss": 0.5262, "step": 16128 }, { "epoch": 0.47519997643003403, "grad_norm": 1.4918886085407477, "learning_rate": 6.290866273972332e-06, "loss": 0.4321, "step": 16129 }, { "epoch": 0.4752294388874976, "grad_norm": 1.6132674091575965, "learning_rate": 6.290369460956816e-06, "loss": 0.357, "step": 16130 }, { "epoch": 0.4752589013449612, "grad_norm": 1.6136598973253147, "learning_rate": 6.289872634292165e-06, "loss": 0.6297, "step": 16131 }, { "epoch": 0.4752883638024248, "grad_norm": 1.4353042013962805, "learning_rate": 6.289375793983634e-06, "loss": 0.3048, "step": 16132 }, { "epoch": 0.47531782625988833, "grad_norm": 1.4463833985480843, "learning_rate": 6.2888789400364795e-06, "loss": 0.515, "step": 16133 }, { "epoch": 0.47534728871735193, "grad_norm": 1.451665813887847, "learning_rate": 6.288382072455956e-06, "loss": 0.4744, "step": 16134 }, { "epoch": 0.4753767511748155, "grad_norm": 1.6762423061944802, "learning_rate": 6.287885191247319e-06, "loss": 0.4988, "step": 16135 }, { "epoch": 0.4754062136322791, "grad_norm": 1.5076862006190386, "learning_rate": 6.2873882964158255e-06, "loss": 0.4106, "step": 16136 }, { "epoch": 0.47543567608974263, "grad_norm": 1.4979677196784658, "learning_rate": 6.2868913879667315e-06, "loss": 0.3598, "step": 16137 }, { "epoch": 0.47546513854720623, "grad_norm": 1.6502056697827026, "learning_rate": 6.286394465905291e-06, "loss": 0.6282, "step": 16138 }, { "epoch": 0.4754946010046698, "grad_norm": 1.3352800956125983, "learning_rate": 6.285897530236764e-06, "loss": 0.42, "step": 16139 }, { "epoch": 0.4755240634621334, "grad_norm": 1.3530159622045679, "learning_rate": 6.285400580966403e-06, "loss": 0.4003, "step": 16140 }, { "epoch": 0.47555352591959693, "grad_norm": 1.6309217072538755, "learning_rate": 6.284903618099467e-06, "loss": 0.4627, "step": 16141 }, { "epoch": 0.47558298837706053, "grad_norm": 1.6765352166776597, "learning_rate": 6.284406641641212e-06, "loss": 0.5069, "step": 16142 }, { "epoch": 0.4756124508345241, "grad_norm": 1.4324965183817926, "learning_rate": 6.283909651596895e-06, "loss": 0.4212, "step": 16143 }, { "epoch": 0.4756419132919877, "grad_norm": 1.507174497892062, "learning_rate": 6.2834126479717735e-06, "loss": 0.403, "step": 16144 }, { "epoch": 0.4756713757494513, "grad_norm": 1.2604944550845743, "learning_rate": 6.282915630771105e-06, "loss": 0.3257, "step": 16145 }, { "epoch": 0.47570083820691483, "grad_norm": 1.5431698495083581, "learning_rate": 6.282418600000144e-06, "loss": 0.5433, "step": 16146 }, { "epoch": 0.47573030066437844, "grad_norm": 1.639383855858048, "learning_rate": 6.281921555664151e-06, "loss": 0.5942, "step": 16147 }, { "epoch": 0.475759763121842, "grad_norm": 1.3478671566812137, "learning_rate": 6.2814244977683825e-06, "loss": 0.3518, "step": 16148 }, { "epoch": 0.4757892255793056, "grad_norm": 1.5185245827030767, "learning_rate": 6.280927426318096e-06, "loss": 0.4092, "step": 16149 }, { "epoch": 0.47581868803676913, "grad_norm": 1.364894834606441, "learning_rate": 6.280430341318549e-06, "loss": 0.3406, "step": 16150 }, { "epoch": 0.47584815049423274, "grad_norm": 1.3696438195272016, "learning_rate": 6.279933242775e-06, "loss": 0.449, "step": 16151 }, { "epoch": 0.4758776129516963, "grad_norm": 1.3694791433820643, "learning_rate": 6.279436130692709e-06, "loss": 0.4301, "step": 16152 }, { "epoch": 0.4759070754091599, "grad_norm": 1.5137580600216005, "learning_rate": 6.278939005076929e-06, "loss": 0.4946, "step": 16153 }, { "epoch": 0.47593653786662343, "grad_norm": 1.5304669924448455, "learning_rate": 6.278441865932925e-06, "loss": 0.4033, "step": 16154 }, { "epoch": 0.47596600032408704, "grad_norm": 2.0865017555643606, "learning_rate": 6.2779447132659535e-06, "loss": 0.4736, "step": 16155 }, { "epoch": 0.4759954627815506, "grad_norm": 1.3660012653477889, "learning_rate": 6.277447547081269e-06, "loss": 0.3569, "step": 16156 }, { "epoch": 0.4760249252390142, "grad_norm": 1.5389061719719748, "learning_rate": 6.276950367384135e-06, "loss": 0.5309, "step": 16157 }, { "epoch": 0.4760543876964778, "grad_norm": 1.4171402542583127, "learning_rate": 6.2764531741798096e-06, "loss": 0.3681, "step": 16158 }, { "epoch": 0.47608385015394133, "grad_norm": 1.5268921255746115, "learning_rate": 6.275955967473553e-06, "loss": 0.4491, "step": 16159 }, { "epoch": 0.47611331261140494, "grad_norm": 1.4516396248971246, "learning_rate": 6.275458747270619e-06, "loss": 0.5049, "step": 16160 }, { "epoch": 0.4761427750688685, "grad_norm": 1.5338497551732728, "learning_rate": 6.274961513576274e-06, "loss": 0.5281, "step": 16161 }, { "epoch": 0.4761722375263321, "grad_norm": 1.7374553218210544, "learning_rate": 6.274464266395774e-06, "loss": 0.4707, "step": 16162 }, { "epoch": 0.47620169998379563, "grad_norm": 1.364387878576633, "learning_rate": 6.273967005734379e-06, "loss": 0.385, "step": 16163 }, { "epoch": 0.47623116244125924, "grad_norm": 1.582390841798002, "learning_rate": 6.273469731597351e-06, "loss": 0.536, "step": 16164 }, { "epoch": 0.4762606248987228, "grad_norm": 1.581786532017169, "learning_rate": 6.272972443989946e-06, "loss": 0.5404, "step": 16165 }, { "epoch": 0.4762900873561864, "grad_norm": 1.3172957275055903, "learning_rate": 6.272475142917428e-06, "loss": 0.3158, "step": 16166 }, { "epoch": 0.47631954981364993, "grad_norm": 1.5858113350531602, "learning_rate": 6.271977828385056e-06, "loss": 0.4833, "step": 16167 }, { "epoch": 0.47634901227111354, "grad_norm": 1.6141767196827081, "learning_rate": 6.2714805003980885e-06, "loss": 0.3991, "step": 16168 }, { "epoch": 0.4763784747285771, "grad_norm": 1.5540241491660733, "learning_rate": 6.27098315896179e-06, "loss": 0.3748, "step": 16169 }, { "epoch": 0.4764079371860407, "grad_norm": 1.665064741357689, "learning_rate": 6.270485804081418e-06, "loss": 0.4628, "step": 16170 }, { "epoch": 0.4764373996435043, "grad_norm": 1.408807766882895, "learning_rate": 6.269988435762233e-06, "loss": 0.3775, "step": 16171 }, { "epoch": 0.47646686210096784, "grad_norm": 1.7830983210272267, "learning_rate": 6.269491054009498e-06, "loss": 0.4734, "step": 16172 }, { "epoch": 0.47649632455843144, "grad_norm": 1.4696104018904526, "learning_rate": 6.268993658828474e-06, "loss": 0.3999, "step": 16173 }, { "epoch": 0.476525787015895, "grad_norm": 1.5118851565637945, "learning_rate": 6.268496250224423e-06, "loss": 0.4979, "step": 16174 }, { "epoch": 0.4765552494733586, "grad_norm": 1.4790874904517584, "learning_rate": 6.267998828202603e-06, "loss": 0.4818, "step": 16175 }, { "epoch": 0.47658471193082214, "grad_norm": 1.4586462427329838, "learning_rate": 6.267501392768279e-06, "loss": 0.5383, "step": 16176 }, { "epoch": 0.47661417438828574, "grad_norm": 1.511021615173576, "learning_rate": 6.267003943926711e-06, "loss": 0.4321, "step": 16177 }, { "epoch": 0.4766436368457493, "grad_norm": 1.5508627548663303, "learning_rate": 6.266506481683161e-06, "loss": 0.3618, "step": 16178 }, { "epoch": 0.4766730993032129, "grad_norm": 1.6642526022757782, "learning_rate": 6.266009006042893e-06, "loss": 0.5246, "step": 16179 }, { "epoch": 0.47670256176067644, "grad_norm": 1.4540976607938605, "learning_rate": 6.265511517011167e-06, "loss": 0.4411, "step": 16180 }, { "epoch": 0.47673202421814004, "grad_norm": 1.5187078691651046, "learning_rate": 6.2650140145932445e-06, "loss": 0.4232, "step": 16181 }, { "epoch": 0.4767614866756036, "grad_norm": 1.6453376078952848, "learning_rate": 6.264516498794391e-06, "loss": 0.4792, "step": 16182 }, { "epoch": 0.4767909491330672, "grad_norm": 1.5338614543475697, "learning_rate": 6.2640189696198665e-06, "loss": 0.4001, "step": 16183 }, { "epoch": 0.4768204115905308, "grad_norm": 1.4228693201091156, "learning_rate": 6.2635214270749355e-06, "loss": 0.436, "step": 16184 }, { "epoch": 0.47684987404799434, "grad_norm": 1.5335969781653658, "learning_rate": 6.26302387116486e-06, "loss": 0.3646, "step": 16185 }, { "epoch": 0.47687933650545794, "grad_norm": 1.3829346149215618, "learning_rate": 6.2625263018949015e-06, "loss": 0.4756, "step": 16186 }, { "epoch": 0.4769087989629215, "grad_norm": 1.7473989798349394, "learning_rate": 6.262028719270325e-06, "loss": 0.5028, "step": 16187 }, { "epoch": 0.4769382614203851, "grad_norm": 1.556418039600522, "learning_rate": 6.2615311232963925e-06, "loss": 0.5014, "step": 16188 }, { "epoch": 0.47696772387784864, "grad_norm": 1.38557225368253, "learning_rate": 6.261033513978369e-06, "loss": 0.3502, "step": 16189 }, { "epoch": 0.47699718633531224, "grad_norm": 1.384090317721425, "learning_rate": 6.260535891321518e-06, "loss": 0.4679, "step": 16190 }, { "epoch": 0.4770266487927758, "grad_norm": 1.5167692493805927, "learning_rate": 6.260038255331102e-06, "loss": 0.4714, "step": 16191 }, { "epoch": 0.4770561112502394, "grad_norm": 1.8279496411062195, "learning_rate": 6.259540606012385e-06, "loss": 0.6247, "step": 16192 }, { "epoch": 0.47708557370770294, "grad_norm": 1.4239847870693696, "learning_rate": 6.259042943370631e-06, "loss": 0.4177, "step": 16193 }, { "epoch": 0.47711503616516654, "grad_norm": 1.4643383101555945, "learning_rate": 6.258545267411107e-06, "loss": 0.3526, "step": 16194 }, { "epoch": 0.4771444986226301, "grad_norm": 1.474220881029066, "learning_rate": 6.258047578139072e-06, "loss": 0.3658, "step": 16195 }, { "epoch": 0.4771739610800937, "grad_norm": 1.3462130829492085, "learning_rate": 6.257549875559794e-06, "loss": 0.4168, "step": 16196 }, { "epoch": 0.4772034235375573, "grad_norm": 1.5424601907716662, "learning_rate": 6.257052159678537e-06, "loss": 0.2823, "step": 16197 }, { "epoch": 0.47723288599502084, "grad_norm": 1.5721567437819537, "learning_rate": 6.256554430500565e-06, "loss": 0.4578, "step": 16198 }, { "epoch": 0.47726234845248444, "grad_norm": 1.659397660165793, "learning_rate": 6.256056688031143e-06, "loss": 0.3603, "step": 16199 }, { "epoch": 0.477291810909948, "grad_norm": 1.5506450260833147, "learning_rate": 6.255558932275537e-06, "loss": 0.3531, "step": 16200 }, { "epoch": 0.4773212733674116, "grad_norm": 1.613773722062454, "learning_rate": 6.25506116323901e-06, "loss": 0.3903, "step": 16201 }, { "epoch": 0.47735073582487514, "grad_norm": 1.508106498279453, "learning_rate": 6.254563380926829e-06, "loss": 0.5687, "step": 16202 }, { "epoch": 0.47738019828233874, "grad_norm": 1.5717762463377094, "learning_rate": 6.254065585344259e-06, "loss": 0.411, "step": 16203 }, { "epoch": 0.4774096607398023, "grad_norm": 1.6805866362206856, "learning_rate": 6.253567776496566e-06, "loss": 0.504, "step": 16204 }, { "epoch": 0.4774391231972659, "grad_norm": 1.3319142027384814, "learning_rate": 6.253069954389014e-06, "loss": 0.4041, "step": 16205 }, { "epoch": 0.47746858565472944, "grad_norm": 1.7540784903995614, "learning_rate": 6.252572119026871e-06, "loss": 0.5692, "step": 16206 }, { "epoch": 0.47749804811219304, "grad_norm": 1.2082367765773883, "learning_rate": 6.2520742704154e-06, "loss": 0.2981, "step": 16207 }, { "epoch": 0.4775275105696566, "grad_norm": 1.672422495342442, "learning_rate": 6.2515764085598705e-06, "loss": 0.5042, "step": 16208 }, { "epoch": 0.4775569730271202, "grad_norm": 1.3817199975958268, "learning_rate": 6.251078533465549e-06, "loss": 0.2324, "step": 16209 }, { "epoch": 0.4775864354845838, "grad_norm": 1.7014960710021385, "learning_rate": 6.250580645137697e-06, "loss": 0.6784, "step": 16210 }, { "epoch": 0.47761589794204734, "grad_norm": 1.5534390932063158, "learning_rate": 6.250082743581585e-06, "loss": 0.5376, "step": 16211 }, { "epoch": 0.47764536039951094, "grad_norm": 1.4641663176470416, "learning_rate": 6.249584828802479e-06, "loss": 0.546, "step": 16212 }, { "epoch": 0.4776748228569745, "grad_norm": 1.5007221268719522, "learning_rate": 6.249086900805644e-06, "loss": 0.3501, "step": 16213 }, { "epoch": 0.4777042853144381, "grad_norm": 1.6158348369321716, "learning_rate": 6.24858895959635e-06, "loss": 0.471, "step": 16214 }, { "epoch": 0.47773374777190164, "grad_norm": 1.882487998641765, "learning_rate": 6.248091005179863e-06, "loss": 0.2961, "step": 16215 }, { "epoch": 0.47776321022936524, "grad_norm": 1.5052894561319503, "learning_rate": 6.247593037561448e-06, "loss": 0.4073, "step": 16216 }, { "epoch": 0.4777926726868288, "grad_norm": 1.3662606088008378, "learning_rate": 6.247095056746375e-06, "loss": 0.3417, "step": 16217 }, { "epoch": 0.4778221351442924, "grad_norm": 1.6709451132717048, "learning_rate": 6.2465970627399106e-06, "loss": 0.4993, "step": 16218 }, { "epoch": 0.47785159760175594, "grad_norm": 1.394354221440138, "learning_rate": 6.246099055547323e-06, "loss": 0.3076, "step": 16219 }, { "epoch": 0.47788106005921954, "grad_norm": 1.5980399905990466, "learning_rate": 6.2456010351738785e-06, "loss": 0.5505, "step": 16220 }, { "epoch": 0.4779105225166831, "grad_norm": 1.4094078708853452, "learning_rate": 6.245103001624846e-06, "loss": 0.4285, "step": 16221 }, { "epoch": 0.4779399849741467, "grad_norm": 1.4272702079602457, "learning_rate": 6.244604954905493e-06, "loss": 0.5322, "step": 16222 }, { "epoch": 0.4779694474316103, "grad_norm": 1.570160974154289, "learning_rate": 6.244106895021088e-06, "loss": 0.4951, "step": 16223 }, { "epoch": 0.47799890988907384, "grad_norm": 1.5309607468802116, "learning_rate": 6.243608821976901e-06, "loss": 0.396, "step": 16224 }, { "epoch": 0.47802837234653744, "grad_norm": 1.4414257773294283, "learning_rate": 6.243110735778197e-06, "loss": 0.4367, "step": 16225 }, { "epoch": 0.478057834804001, "grad_norm": 1.521008917868392, "learning_rate": 6.242612636430248e-06, "loss": 0.3864, "step": 16226 }, { "epoch": 0.4780872972614646, "grad_norm": 1.5356593686976114, "learning_rate": 6.24211452393832e-06, "loss": 0.4747, "step": 16227 }, { "epoch": 0.47811675971892814, "grad_norm": 1.3972227710153509, "learning_rate": 6.241616398307684e-06, "loss": 0.4187, "step": 16228 }, { "epoch": 0.47814622217639174, "grad_norm": 1.4171474318484167, "learning_rate": 6.2411182595436085e-06, "loss": 0.3564, "step": 16229 }, { "epoch": 0.4781756846338553, "grad_norm": 1.3309755098495775, "learning_rate": 6.240620107651361e-06, "loss": 0.3844, "step": 16230 }, { "epoch": 0.4782051470913189, "grad_norm": 1.385130584247373, "learning_rate": 6.240121942636212e-06, "loss": 0.4893, "step": 16231 }, { "epoch": 0.47823460954878244, "grad_norm": 1.3785532849086886, "learning_rate": 6.239623764503432e-06, "loss": 0.4124, "step": 16232 }, { "epoch": 0.47826407200624604, "grad_norm": 1.448836023188293, "learning_rate": 6.239125573258289e-06, "loss": 0.4653, "step": 16233 }, { "epoch": 0.4782935344637096, "grad_norm": 1.4127346631263238, "learning_rate": 6.238627368906055e-06, "loss": 0.4502, "step": 16234 }, { "epoch": 0.4783229969211732, "grad_norm": 1.647515146715115, "learning_rate": 6.238129151451996e-06, "loss": 0.4979, "step": 16235 }, { "epoch": 0.4783524593786368, "grad_norm": 1.628328010650699, "learning_rate": 6.237630920901385e-06, "loss": 0.512, "step": 16236 }, { "epoch": 0.47838192183610034, "grad_norm": 1.9773969999541994, "learning_rate": 6.237132677259491e-06, "loss": 0.6321, "step": 16237 }, { "epoch": 0.47841138429356395, "grad_norm": 1.4685982689069639, "learning_rate": 6.236634420531586e-06, "loss": 0.3938, "step": 16238 }, { "epoch": 0.4784408467510275, "grad_norm": 1.3959224960745393, "learning_rate": 6.236136150722937e-06, "loss": 0.4738, "step": 16239 }, { "epoch": 0.4784703092084911, "grad_norm": 1.5495005927253223, "learning_rate": 6.235637867838817e-06, "loss": 0.3836, "step": 16240 }, { "epoch": 0.47849977166595464, "grad_norm": 1.4475133915875498, "learning_rate": 6.235139571884497e-06, "loss": 0.3367, "step": 16241 }, { "epoch": 0.47852923412341825, "grad_norm": 1.6150209713719863, "learning_rate": 6.234641262865246e-06, "loss": 0.6028, "step": 16242 }, { "epoch": 0.4785586965808818, "grad_norm": 1.4918305570864852, "learning_rate": 6.234142940786338e-06, "loss": 0.4486, "step": 16243 }, { "epoch": 0.4785881590383454, "grad_norm": 1.7004169229917028, "learning_rate": 6.233644605653039e-06, "loss": 0.6583, "step": 16244 }, { "epoch": 0.47861762149580894, "grad_norm": 1.6133563839329883, "learning_rate": 6.233146257470625e-06, "loss": 0.3811, "step": 16245 }, { "epoch": 0.47864708395327255, "grad_norm": 1.7056192765575042, "learning_rate": 6.232647896244366e-06, "loss": 0.4622, "step": 16246 }, { "epoch": 0.4786765464107361, "grad_norm": 1.697916220084315, "learning_rate": 6.232149521979532e-06, "loss": 0.4225, "step": 16247 }, { "epoch": 0.4787060088681997, "grad_norm": 1.4615024257330334, "learning_rate": 6.231651134681398e-06, "loss": 0.4595, "step": 16248 }, { "epoch": 0.4787354713256633, "grad_norm": 1.4010107535535041, "learning_rate": 6.2311527343552325e-06, "loss": 0.4828, "step": 16249 }, { "epoch": 0.47876493378312684, "grad_norm": 1.8446622775116173, "learning_rate": 6.230654321006309e-06, "loss": 0.4701, "step": 16250 }, { "epoch": 0.47879439624059045, "grad_norm": 1.449750375999064, "learning_rate": 6.230155894639897e-06, "loss": 0.3711, "step": 16251 }, { "epoch": 0.478823858698054, "grad_norm": 1.3933212126261574, "learning_rate": 6.2296574552612734e-06, "loss": 0.444, "step": 16252 }, { "epoch": 0.4788533211555176, "grad_norm": 1.6308705754732695, "learning_rate": 6.229159002875707e-06, "loss": 0.3982, "step": 16253 }, { "epoch": 0.47888278361298114, "grad_norm": 1.6106964800198045, "learning_rate": 6.228660537488471e-06, "loss": 0.5052, "step": 16254 }, { "epoch": 0.47891224607044475, "grad_norm": 1.5951202574108994, "learning_rate": 6.2281620591048394e-06, "loss": 0.4712, "step": 16255 }, { "epoch": 0.4789417085279083, "grad_norm": 1.6877601854766076, "learning_rate": 6.2276635677300825e-06, "loss": 0.3586, "step": 16256 }, { "epoch": 0.4789711709853719, "grad_norm": 1.3911390887252646, "learning_rate": 6.227165063369476e-06, "loss": 0.422, "step": 16257 }, { "epoch": 0.47900063344283544, "grad_norm": 1.6438748192208918, "learning_rate": 6.2266665460282905e-06, "loss": 0.5081, "step": 16258 }, { "epoch": 0.47903009590029905, "grad_norm": 1.4142688972810704, "learning_rate": 6.2261680157118e-06, "loss": 0.5344, "step": 16259 }, { "epoch": 0.4790595583577626, "grad_norm": 1.5727699577279344, "learning_rate": 6.225669472425277e-06, "loss": 0.4355, "step": 16260 }, { "epoch": 0.4790890208152262, "grad_norm": 1.6281726152789127, "learning_rate": 6.225170916173997e-06, "loss": 0.4723, "step": 16261 }, { "epoch": 0.4791184832726898, "grad_norm": 1.6153978234488116, "learning_rate": 6.224672346963232e-06, "loss": 0.4575, "step": 16262 }, { "epoch": 0.47914794573015335, "grad_norm": 1.7119622285860174, "learning_rate": 6.224173764798257e-06, "loss": 0.5494, "step": 16263 }, { "epoch": 0.47917740818761695, "grad_norm": 1.6862086064213688, "learning_rate": 6.223675169684345e-06, "loss": 0.5923, "step": 16264 }, { "epoch": 0.4792068706450805, "grad_norm": 1.6343854025171543, "learning_rate": 6.223176561626769e-06, "loss": 0.4054, "step": 16265 }, { "epoch": 0.4792363331025441, "grad_norm": 1.6063749269388607, "learning_rate": 6.222677940630805e-06, "loss": 0.4515, "step": 16266 }, { "epoch": 0.47926579556000765, "grad_norm": 1.5112850467853165, "learning_rate": 6.222179306701726e-06, "loss": 0.463, "step": 16267 }, { "epoch": 0.47929525801747125, "grad_norm": 1.2450745371450271, "learning_rate": 6.2216806598448064e-06, "loss": 0.3607, "step": 16268 }, { "epoch": 0.4793247204749348, "grad_norm": 1.2749452863643023, "learning_rate": 6.221182000065321e-06, "loss": 0.3915, "step": 16269 }, { "epoch": 0.4793541829323984, "grad_norm": 1.4773195338461795, "learning_rate": 6.220683327368546e-06, "loss": 0.5305, "step": 16270 }, { "epoch": 0.47938364538986195, "grad_norm": 1.753039411564543, "learning_rate": 6.220184641759754e-06, "loss": 0.6062, "step": 16271 }, { "epoch": 0.47941310784732555, "grad_norm": 1.3272182942354391, "learning_rate": 6.219685943244219e-06, "loss": 0.377, "step": 16272 }, { "epoch": 0.4794425703047891, "grad_norm": 1.766118506651802, "learning_rate": 6.2191872318272205e-06, "loss": 0.6625, "step": 16273 }, { "epoch": 0.4794720327622527, "grad_norm": 1.5963004055474037, "learning_rate": 6.218688507514031e-06, "loss": 0.5593, "step": 16274 }, { "epoch": 0.4795014952197163, "grad_norm": 1.5807292145038274, "learning_rate": 6.2181897703099226e-06, "loss": 0.4024, "step": 16275 }, { "epoch": 0.47953095767717985, "grad_norm": 1.665903923026107, "learning_rate": 6.217691020220177e-06, "loss": 0.4414, "step": 16276 }, { "epoch": 0.47956042013464345, "grad_norm": 1.5408500469911304, "learning_rate": 6.217192257250065e-06, "loss": 0.3844, "step": 16277 }, { "epoch": 0.479589882592107, "grad_norm": 1.4923459962067978, "learning_rate": 6.216693481404866e-06, "loss": 0.4775, "step": 16278 }, { "epoch": 0.4796193450495706, "grad_norm": 1.4286670587523427, "learning_rate": 6.216194692689854e-06, "loss": 0.4357, "step": 16279 }, { "epoch": 0.47964880750703415, "grad_norm": 1.64698401950548, "learning_rate": 6.215695891110304e-06, "loss": 0.5829, "step": 16280 }, { "epoch": 0.47967826996449775, "grad_norm": 1.5124483730671208, "learning_rate": 6.215197076671493e-06, "loss": 0.4272, "step": 16281 }, { "epoch": 0.4797077324219613, "grad_norm": 1.6151204754591109, "learning_rate": 6.2146982493787e-06, "loss": 0.4318, "step": 16282 }, { "epoch": 0.4797371948794249, "grad_norm": 1.5458162795897132, "learning_rate": 6.2141994092371985e-06, "loss": 0.4246, "step": 16283 }, { "epoch": 0.47976665733688845, "grad_norm": 1.7000274319060185, "learning_rate": 6.213700556252264e-06, "loss": 0.3958, "step": 16284 }, { "epoch": 0.47979611979435205, "grad_norm": 1.5495886812551325, "learning_rate": 6.2132016904291756e-06, "loss": 0.4843, "step": 16285 }, { "epoch": 0.4798255822518156, "grad_norm": 1.2953289585291983, "learning_rate": 6.212702811773211e-06, "loss": 0.4149, "step": 16286 }, { "epoch": 0.4798550447092792, "grad_norm": 1.365879218906375, "learning_rate": 6.212203920289642e-06, "loss": 0.3319, "step": 16287 }, { "epoch": 0.4798845071667428, "grad_norm": 1.835156542289687, "learning_rate": 6.211705015983754e-06, "loss": 0.5431, "step": 16288 }, { "epoch": 0.47991396962420635, "grad_norm": 1.488125758428184, "learning_rate": 6.2112060988608165e-06, "loss": 0.4163, "step": 16289 }, { "epoch": 0.47994343208166995, "grad_norm": 1.5478595717467205, "learning_rate": 6.21070716892611e-06, "loss": 0.4964, "step": 16290 }, { "epoch": 0.4799728945391335, "grad_norm": 1.5029256340587842, "learning_rate": 6.210208226184914e-06, "loss": 0.4329, "step": 16291 }, { "epoch": 0.4800023569965971, "grad_norm": 1.460551758622381, "learning_rate": 6.2097092706425024e-06, "loss": 0.4276, "step": 16292 }, { "epoch": 0.48003181945406065, "grad_norm": 1.71481544060379, "learning_rate": 6.209210302304156e-06, "loss": 0.4964, "step": 16293 }, { "epoch": 0.48006128191152425, "grad_norm": 1.4864997905618322, "learning_rate": 6.208711321175151e-06, "loss": 0.33, "step": 16294 }, { "epoch": 0.4800907443689878, "grad_norm": 1.4102003149352504, "learning_rate": 6.208212327260766e-06, "loss": 0.3766, "step": 16295 }, { "epoch": 0.4801202068264514, "grad_norm": 1.5357693744105692, "learning_rate": 6.207713320566279e-06, "loss": 0.4017, "step": 16296 }, { "epoch": 0.48014966928391495, "grad_norm": 1.4885567379553457, "learning_rate": 6.207214301096969e-06, "loss": 0.3957, "step": 16297 }, { "epoch": 0.48017913174137855, "grad_norm": 1.74623187978866, "learning_rate": 6.206715268858115e-06, "loss": 0.5413, "step": 16298 }, { "epoch": 0.4802085941988421, "grad_norm": 1.716435867818935, "learning_rate": 6.2062162238549926e-06, "loss": 0.5135, "step": 16299 }, { "epoch": 0.4802380566563057, "grad_norm": 1.546342658785309, "learning_rate": 6.2057171660928835e-06, "loss": 0.4456, "step": 16300 }, { "epoch": 0.4802675191137693, "grad_norm": 1.4978022128695094, "learning_rate": 6.205218095577066e-06, "loss": 0.4366, "step": 16301 }, { "epoch": 0.48029698157123285, "grad_norm": 1.4872489810833087, "learning_rate": 6.204719012312818e-06, "loss": 0.3562, "step": 16302 }, { "epoch": 0.48032644402869645, "grad_norm": 1.5620214581664051, "learning_rate": 6.20421991630542e-06, "loss": 0.4597, "step": 16303 }, { "epoch": 0.48035590648616, "grad_norm": 1.3549004927956576, "learning_rate": 6.203720807560151e-06, "loss": 0.3539, "step": 16304 }, { "epoch": 0.4803853689436236, "grad_norm": 1.4248063078547721, "learning_rate": 6.2032216860822895e-06, "loss": 0.4184, "step": 16305 }, { "epoch": 0.48041483140108715, "grad_norm": 1.3639859376923524, "learning_rate": 6.202722551877116e-06, "loss": 0.3014, "step": 16306 }, { "epoch": 0.48044429385855075, "grad_norm": 1.2913969629191064, "learning_rate": 6.20222340494991e-06, "loss": 0.4762, "step": 16307 }, { "epoch": 0.4804737563160143, "grad_norm": 1.5538972537285514, "learning_rate": 6.201724245305951e-06, "loss": 0.4911, "step": 16308 }, { "epoch": 0.4805032187734779, "grad_norm": 1.5309646114371998, "learning_rate": 6.2012250729505195e-06, "loss": 0.4731, "step": 16309 }, { "epoch": 0.48053268123094145, "grad_norm": 1.395824041751005, "learning_rate": 6.200725887888894e-06, "loss": 0.3619, "step": 16310 }, { "epoch": 0.48056214368840505, "grad_norm": 1.4621157369793143, "learning_rate": 6.2002266901263566e-06, "loss": 0.5007, "step": 16311 }, { "epoch": 0.4805916061458686, "grad_norm": 1.519350693581158, "learning_rate": 6.199727479668188e-06, "loss": 0.5547, "step": 16312 }, { "epoch": 0.4806210686033322, "grad_norm": 1.6813438978985924, "learning_rate": 6.199228256519668e-06, "loss": 0.5183, "step": 16313 }, { "epoch": 0.4806505310607958, "grad_norm": 1.6641035964552942, "learning_rate": 6.198729020686076e-06, "loss": 0.464, "step": 16314 }, { "epoch": 0.48067999351825935, "grad_norm": 1.4898845745925686, "learning_rate": 6.198229772172693e-06, "loss": 0.4249, "step": 16315 }, { "epoch": 0.48070945597572295, "grad_norm": 1.3349736119242508, "learning_rate": 6.197730510984802e-06, "loss": 0.3697, "step": 16316 }, { "epoch": 0.4807389184331865, "grad_norm": 1.5561662920963706, "learning_rate": 6.197231237127681e-06, "loss": 0.4534, "step": 16317 }, { "epoch": 0.4807683808906501, "grad_norm": 1.3846249893532474, "learning_rate": 6.196731950606615e-06, "loss": 0.4997, "step": 16318 }, { "epoch": 0.48079784334811365, "grad_norm": 1.401956198967344, "learning_rate": 6.1962326514268835e-06, "loss": 0.4117, "step": 16319 }, { "epoch": 0.48082730580557725, "grad_norm": 1.681243907728219, "learning_rate": 6.195733339593766e-06, "loss": 0.4764, "step": 16320 }, { "epoch": 0.4808567682630408, "grad_norm": 1.271102350876118, "learning_rate": 6.195234015112546e-06, "loss": 0.3866, "step": 16321 }, { "epoch": 0.4808862307205044, "grad_norm": 1.3364671168356879, "learning_rate": 6.194734677988504e-06, "loss": 0.4281, "step": 16322 }, { "epoch": 0.48091569317796795, "grad_norm": 1.548030759541546, "learning_rate": 6.194235328226924e-06, "loss": 0.4634, "step": 16323 }, { "epoch": 0.48094515563543155, "grad_norm": 1.6058499121138183, "learning_rate": 6.193735965833086e-06, "loss": 0.4755, "step": 16324 }, { "epoch": 0.4809746180928951, "grad_norm": 1.582300182037114, "learning_rate": 6.193236590812273e-06, "loss": 0.3521, "step": 16325 }, { "epoch": 0.4810040805503587, "grad_norm": 1.4849703342662197, "learning_rate": 6.192737203169766e-06, "loss": 0.5437, "step": 16326 }, { "epoch": 0.4810335430078223, "grad_norm": 1.4747980132835172, "learning_rate": 6.1922378029108496e-06, "loss": 0.4496, "step": 16327 }, { "epoch": 0.48106300546528585, "grad_norm": 1.4310922116995282, "learning_rate": 6.191738390040806e-06, "loss": 0.4802, "step": 16328 }, { "epoch": 0.48109246792274946, "grad_norm": 1.3556166667193532, "learning_rate": 6.191238964564914e-06, "loss": 0.3683, "step": 16329 }, { "epoch": 0.481121930380213, "grad_norm": 1.525974646394576, "learning_rate": 6.190739526488462e-06, "loss": 0.4487, "step": 16330 }, { "epoch": 0.4811513928376766, "grad_norm": 1.5121841557589693, "learning_rate": 6.190240075816728e-06, "loss": 0.4934, "step": 16331 }, { "epoch": 0.48118085529514015, "grad_norm": 1.6129983102301788, "learning_rate": 6.189740612554998e-06, "loss": 0.4643, "step": 16332 }, { "epoch": 0.48121031775260376, "grad_norm": 1.4831700920668398, "learning_rate": 6.189241136708555e-06, "loss": 0.5584, "step": 16333 }, { "epoch": 0.4812397802100673, "grad_norm": 1.5550726348420256, "learning_rate": 6.188741648282682e-06, "loss": 0.5598, "step": 16334 }, { "epoch": 0.4812692426675309, "grad_norm": 1.4683803625679663, "learning_rate": 6.188242147282662e-06, "loss": 0.4782, "step": 16335 }, { "epoch": 0.48129870512499445, "grad_norm": 1.5370411985336982, "learning_rate": 6.187742633713778e-06, "loss": 0.4681, "step": 16336 }, { "epoch": 0.48132816758245806, "grad_norm": 1.6333184651328703, "learning_rate": 6.187243107581313e-06, "loss": 0.4854, "step": 16337 }, { "epoch": 0.4813576300399216, "grad_norm": 1.4704722247414137, "learning_rate": 6.186743568890555e-06, "loss": 0.473, "step": 16338 }, { "epoch": 0.4813870924973852, "grad_norm": 1.5848347140194745, "learning_rate": 6.186244017646783e-06, "loss": 0.4036, "step": 16339 }, { "epoch": 0.4814165549548488, "grad_norm": 1.526695435831085, "learning_rate": 6.185744453855285e-06, "loss": 0.5443, "step": 16340 }, { "epoch": 0.48144601741231235, "grad_norm": 1.4354324317608105, "learning_rate": 6.1852448775213426e-06, "loss": 0.4232, "step": 16341 }, { "epoch": 0.48147547986977596, "grad_norm": 1.677801170677574, "learning_rate": 6.184745288650241e-06, "loss": 0.5592, "step": 16342 }, { "epoch": 0.4815049423272395, "grad_norm": 1.7514374039155438, "learning_rate": 6.184245687247265e-06, "loss": 0.5146, "step": 16343 }, { "epoch": 0.4815344047847031, "grad_norm": 1.33248459797547, "learning_rate": 6.1837460733177e-06, "loss": 0.3188, "step": 16344 }, { "epoch": 0.48156386724216665, "grad_norm": 1.6878511533000786, "learning_rate": 6.1832464468668295e-06, "loss": 0.6122, "step": 16345 }, { "epoch": 0.48159332969963026, "grad_norm": 1.7432186421096023, "learning_rate": 6.182746807899937e-06, "loss": 0.4001, "step": 16346 }, { "epoch": 0.4816227921570938, "grad_norm": 1.403100752198649, "learning_rate": 6.18224715642231e-06, "loss": 0.3496, "step": 16347 }, { "epoch": 0.4816522546145574, "grad_norm": 1.4576746768371223, "learning_rate": 6.181747492439234e-06, "loss": 0.4675, "step": 16348 }, { "epoch": 0.48168171707202095, "grad_norm": 1.570534190764904, "learning_rate": 6.181247815955993e-06, "loss": 0.6412, "step": 16349 }, { "epoch": 0.48171117952948456, "grad_norm": 1.576131736096431, "learning_rate": 6.180748126977871e-06, "loss": 0.3527, "step": 16350 }, { "epoch": 0.4817406419869481, "grad_norm": 1.5303021608281606, "learning_rate": 6.180248425510157e-06, "loss": 0.481, "step": 16351 }, { "epoch": 0.4817701044444117, "grad_norm": 1.6071579486182843, "learning_rate": 6.179748711558133e-06, "loss": 0.6324, "step": 16352 }, { "epoch": 0.4817995669018753, "grad_norm": 1.2773989642251014, "learning_rate": 6.179248985127089e-06, "loss": 0.2956, "step": 16353 }, { "epoch": 0.48182902935933886, "grad_norm": 1.4986872733363714, "learning_rate": 6.178749246222306e-06, "loss": 0.3401, "step": 16354 }, { "epoch": 0.48185849181680246, "grad_norm": 1.5048384187591215, "learning_rate": 6.178249494849075e-06, "loss": 0.4034, "step": 16355 }, { "epoch": 0.481887954274266, "grad_norm": 1.6621063593733147, "learning_rate": 6.177749731012679e-06, "loss": 0.4765, "step": 16356 }, { "epoch": 0.4819174167317296, "grad_norm": 1.4738558992901911, "learning_rate": 6.177249954718405e-06, "loss": 0.332, "step": 16357 }, { "epoch": 0.48194687918919316, "grad_norm": 1.5162982866805212, "learning_rate": 6.17675016597154e-06, "loss": 0.5351, "step": 16358 }, { "epoch": 0.48197634164665676, "grad_norm": 1.6169824335750975, "learning_rate": 6.17625036477737e-06, "loss": 0.5157, "step": 16359 }, { "epoch": 0.4820058041041203, "grad_norm": 1.3692687554083924, "learning_rate": 6.175750551141182e-06, "loss": 0.3106, "step": 16360 }, { "epoch": 0.4820352665615839, "grad_norm": 1.5563956258805625, "learning_rate": 6.175250725068265e-06, "loss": 0.3852, "step": 16361 }, { "epoch": 0.48206472901904746, "grad_norm": 1.6668116620730606, "learning_rate": 6.174750886563901e-06, "loss": 0.5966, "step": 16362 }, { "epoch": 0.48209419147651106, "grad_norm": 1.45114459313069, "learning_rate": 6.1742510356333814e-06, "loss": 0.5739, "step": 16363 }, { "epoch": 0.4821236539339746, "grad_norm": 1.3429049235353185, "learning_rate": 6.173751172281995e-06, "loss": 0.3044, "step": 16364 }, { "epoch": 0.4821531163914382, "grad_norm": 1.6141976168714935, "learning_rate": 6.173251296515022e-06, "loss": 0.433, "step": 16365 }, { "epoch": 0.4821825788489018, "grad_norm": 1.9297534669911371, "learning_rate": 6.172751408337756e-06, "loss": 0.4634, "step": 16366 }, { "epoch": 0.48221204130636536, "grad_norm": 1.5323848907123783, "learning_rate": 6.172251507755484e-06, "loss": 0.5391, "step": 16367 }, { "epoch": 0.48224150376382896, "grad_norm": 1.4421213876619006, "learning_rate": 6.171751594773492e-06, "loss": 0.5401, "step": 16368 }, { "epoch": 0.4822709662212925, "grad_norm": 1.6344751044657204, "learning_rate": 6.171251669397068e-06, "loss": 0.5522, "step": 16369 }, { "epoch": 0.4823004286787561, "grad_norm": 1.637659966178491, "learning_rate": 6.170751731631501e-06, "loss": 0.4815, "step": 16370 }, { "epoch": 0.48232989113621966, "grad_norm": 1.4847829271925144, "learning_rate": 6.17025178148208e-06, "loss": 0.3644, "step": 16371 }, { "epoch": 0.48235935359368326, "grad_norm": 1.5377336903433718, "learning_rate": 6.169751818954092e-06, "loss": 0.4985, "step": 16372 }, { "epoch": 0.4823888160511468, "grad_norm": 1.459673605338989, "learning_rate": 6.169251844052826e-06, "loss": 0.4626, "step": 16373 }, { "epoch": 0.4824182785086104, "grad_norm": 1.5570004821730785, "learning_rate": 6.168751856783569e-06, "loss": 0.3173, "step": 16374 }, { "epoch": 0.48244774096607396, "grad_norm": 1.5378252973507731, "learning_rate": 6.168251857151611e-06, "loss": 0.4664, "step": 16375 }, { "epoch": 0.48247720342353756, "grad_norm": 1.432487960786993, "learning_rate": 6.167751845162243e-06, "loss": 0.3977, "step": 16376 }, { "epoch": 0.4825066658810011, "grad_norm": 1.6187835787043447, "learning_rate": 6.16725182082075e-06, "loss": 0.3491, "step": 16377 }, { "epoch": 0.4825361283384647, "grad_norm": 1.6034541416559827, "learning_rate": 6.166751784132424e-06, "loss": 0.4294, "step": 16378 }, { "epoch": 0.4825655907959283, "grad_norm": 1.5843323210821834, "learning_rate": 6.166251735102554e-06, "loss": 0.4074, "step": 16379 }, { "epoch": 0.48259505325339186, "grad_norm": 1.751563564109666, "learning_rate": 6.165751673736426e-06, "loss": 0.3594, "step": 16380 }, { "epoch": 0.48262451571085546, "grad_norm": 1.4612655212421648, "learning_rate": 6.1652516000393335e-06, "loss": 0.2893, "step": 16381 }, { "epoch": 0.482653978168319, "grad_norm": 1.4149893725939195, "learning_rate": 6.1647515140165645e-06, "loss": 0.4824, "step": 16382 }, { "epoch": 0.4826834406257826, "grad_norm": 1.6225117116855925, "learning_rate": 6.16425141567341e-06, "loss": 0.5527, "step": 16383 }, { "epoch": 0.48271290308324616, "grad_norm": 1.4988009132524118, "learning_rate": 6.163751305015158e-06, "loss": 0.5108, "step": 16384 }, { "epoch": 0.48274236554070976, "grad_norm": 1.4607945098924005, "learning_rate": 6.163251182047099e-06, "loss": 0.3866, "step": 16385 }, { "epoch": 0.4827718279981733, "grad_norm": 1.3384165067920828, "learning_rate": 6.1627510467745246e-06, "loss": 0.3317, "step": 16386 }, { "epoch": 0.4828012904556369, "grad_norm": 1.3599514042821754, "learning_rate": 6.162250899202722e-06, "loss": 0.4788, "step": 16387 }, { "epoch": 0.48283075291310046, "grad_norm": 1.8725951076515825, "learning_rate": 6.161750739336987e-06, "loss": 0.5732, "step": 16388 }, { "epoch": 0.48286021537056406, "grad_norm": 1.4758127075807825, "learning_rate": 6.161250567182604e-06, "loss": 0.3962, "step": 16389 }, { "epoch": 0.4828896778280276, "grad_norm": 1.4926535340621223, "learning_rate": 6.160750382744866e-06, "loss": 0.5135, "step": 16390 }, { "epoch": 0.4829191402854912, "grad_norm": 1.646592633509327, "learning_rate": 6.1602501860290656e-06, "loss": 0.3931, "step": 16391 }, { "epoch": 0.4829486027429548, "grad_norm": 1.5286064521503089, "learning_rate": 6.1597499770404916e-06, "loss": 0.4545, "step": 16392 }, { "epoch": 0.48297806520041836, "grad_norm": 1.3678314307538908, "learning_rate": 6.159249755784437e-06, "loss": 0.4027, "step": 16393 }, { "epoch": 0.48300752765788196, "grad_norm": 1.3958768905806955, "learning_rate": 6.158749522266191e-06, "loss": 0.3597, "step": 16394 }, { "epoch": 0.4830369901153455, "grad_norm": 1.6028184042374514, "learning_rate": 6.158249276491045e-06, "loss": 0.4377, "step": 16395 }, { "epoch": 0.4830664525728091, "grad_norm": 1.5061388266221067, "learning_rate": 6.157749018464291e-06, "loss": 0.5333, "step": 16396 }, { "epoch": 0.48309591503027266, "grad_norm": 1.4959536151014197, "learning_rate": 6.157248748191222e-06, "loss": 0.5103, "step": 16397 }, { "epoch": 0.48312537748773626, "grad_norm": 1.3498134779707358, "learning_rate": 6.156748465677128e-06, "loss": 0.4477, "step": 16398 }, { "epoch": 0.4831548399451998, "grad_norm": 1.404614817863891, "learning_rate": 6.156248170927302e-06, "loss": 0.3423, "step": 16399 }, { "epoch": 0.4831843024026634, "grad_norm": 1.430156439317098, "learning_rate": 6.1557478639470335e-06, "loss": 0.4066, "step": 16400 }, { "epoch": 0.48321376486012696, "grad_norm": 1.4622212986890224, "learning_rate": 6.155247544741617e-06, "loss": 0.4467, "step": 16401 }, { "epoch": 0.48324322731759056, "grad_norm": 1.5249283234985624, "learning_rate": 6.154747213316344e-06, "loss": 0.4742, "step": 16402 }, { "epoch": 0.4832726897750541, "grad_norm": 1.5127278798477153, "learning_rate": 6.154246869676508e-06, "loss": 0.514, "step": 16403 }, { "epoch": 0.4833021522325177, "grad_norm": 1.5501129719814297, "learning_rate": 6.153746513827399e-06, "loss": 0.4985, "step": 16404 }, { "epoch": 0.4833316146899813, "grad_norm": 1.4579983648324801, "learning_rate": 6.153246145774311e-06, "loss": 0.4117, "step": 16405 }, { "epoch": 0.48336107714744486, "grad_norm": 1.5506959417597674, "learning_rate": 6.1527457655225375e-06, "loss": 0.4967, "step": 16406 }, { "epoch": 0.48339053960490846, "grad_norm": 1.4841128603277702, "learning_rate": 6.15224537307737e-06, "loss": 0.3845, "step": 16407 }, { "epoch": 0.483420002062372, "grad_norm": 1.5235274935912653, "learning_rate": 6.151744968444103e-06, "loss": 0.4424, "step": 16408 }, { "epoch": 0.4834494645198356, "grad_norm": 1.2729919068753714, "learning_rate": 6.151244551628028e-06, "loss": 0.4322, "step": 16409 }, { "epoch": 0.48347892697729916, "grad_norm": 1.4331627999688397, "learning_rate": 6.150744122634439e-06, "loss": 0.4958, "step": 16410 }, { "epoch": 0.48350838943476276, "grad_norm": 1.482291997698605, "learning_rate": 6.15024368146863e-06, "loss": 0.4411, "step": 16411 }, { "epoch": 0.4835378518922263, "grad_norm": 1.5126477767295106, "learning_rate": 6.149743228135892e-06, "loss": 0.4306, "step": 16412 }, { "epoch": 0.4835673143496899, "grad_norm": 1.6214608095723722, "learning_rate": 6.149242762641523e-06, "loss": 0.3564, "step": 16413 }, { "epoch": 0.48359677680715346, "grad_norm": 1.6067458783131787, "learning_rate": 6.148742284990813e-06, "loss": 0.5932, "step": 16414 }, { "epoch": 0.48362623926461706, "grad_norm": 1.302742519560185, "learning_rate": 6.1482417951890586e-06, "loss": 0.3625, "step": 16415 }, { "epoch": 0.48365570172208067, "grad_norm": 1.404737494852862, "learning_rate": 6.14774129324155e-06, "loss": 0.3419, "step": 16416 }, { "epoch": 0.4836851641795442, "grad_norm": 1.4782057737418275, "learning_rate": 6.147240779153585e-06, "loss": 0.4977, "step": 16417 }, { "epoch": 0.4837146266370078, "grad_norm": 1.5485958448903403, "learning_rate": 6.146740252930458e-06, "loss": 0.4769, "step": 16418 }, { "epoch": 0.48374408909447136, "grad_norm": 1.4792423369115417, "learning_rate": 6.14623971457746e-06, "loss": 0.5157, "step": 16419 }, { "epoch": 0.48377355155193497, "grad_norm": 1.4847051824271407, "learning_rate": 6.145739164099888e-06, "loss": 0.5213, "step": 16420 }, { "epoch": 0.4838030140093985, "grad_norm": 1.3233567249171576, "learning_rate": 6.145238601503037e-06, "loss": 0.3591, "step": 16421 }, { "epoch": 0.4838324764668621, "grad_norm": 1.6604397135253584, "learning_rate": 6.144738026792201e-06, "loss": 0.4877, "step": 16422 }, { "epoch": 0.48386193892432566, "grad_norm": 1.3966461068503297, "learning_rate": 6.1442374399726746e-06, "loss": 0.4235, "step": 16423 }, { "epoch": 0.48389140138178927, "grad_norm": 1.7728593508287194, "learning_rate": 6.143736841049754e-06, "loss": 0.5836, "step": 16424 }, { "epoch": 0.4839208638392528, "grad_norm": 1.7061863643146766, "learning_rate": 6.143236230028733e-06, "loss": 0.5805, "step": 16425 }, { "epoch": 0.4839503262967164, "grad_norm": 1.358372785372998, "learning_rate": 6.142735606914907e-06, "loss": 0.4787, "step": 16426 }, { "epoch": 0.48397978875417996, "grad_norm": 1.4940462385801883, "learning_rate": 6.142234971713573e-06, "loss": 0.4394, "step": 16427 }, { "epoch": 0.48400925121164357, "grad_norm": 1.691805339345208, "learning_rate": 6.141734324430026e-06, "loss": 0.541, "step": 16428 }, { "epoch": 0.48403871366910717, "grad_norm": 1.4344492501940735, "learning_rate": 6.14123366506956e-06, "loss": 0.4768, "step": 16429 }, { "epoch": 0.4840681761265707, "grad_norm": 1.6401460053313517, "learning_rate": 6.140732993637473e-06, "loss": 0.5044, "step": 16430 }, { "epoch": 0.4840976385840343, "grad_norm": 1.630989953232586, "learning_rate": 6.14023231013906e-06, "loss": 0.5824, "step": 16431 }, { "epoch": 0.48412710104149786, "grad_norm": 1.4520713084783832, "learning_rate": 6.139731614579616e-06, "loss": 0.4535, "step": 16432 }, { "epoch": 0.48415656349896147, "grad_norm": 1.6431621724711414, "learning_rate": 6.139230906964441e-06, "loss": 0.4868, "step": 16433 }, { "epoch": 0.484186025956425, "grad_norm": 1.3674454358640806, "learning_rate": 6.138730187298826e-06, "loss": 0.3046, "step": 16434 }, { "epoch": 0.4842154884138886, "grad_norm": 1.3962446213436666, "learning_rate": 6.138229455588069e-06, "loss": 0.327, "step": 16435 }, { "epoch": 0.48424495087135216, "grad_norm": 1.4188829669617968, "learning_rate": 6.137728711837472e-06, "loss": 0.4317, "step": 16436 }, { "epoch": 0.48427441332881577, "grad_norm": 1.3476751335504529, "learning_rate": 6.137227956052324e-06, "loss": 0.3346, "step": 16437 }, { "epoch": 0.4843038757862793, "grad_norm": 1.567569613995268, "learning_rate": 6.1367271882379265e-06, "loss": 0.3898, "step": 16438 }, { "epoch": 0.4843333382437429, "grad_norm": 1.4670436761472931, "learning_rate": 6.136226408399576e-06, "loss": 0.4471, "step": 16439 }, { "epoch": 0.48436280070120646, "grad_norm": 1.4406995797642614, "learning_rate": 6.135725616542567e-06, "loss": 0.3915, "step": 16440 }, { "epoch": 0.48439226315867007, "grad_norm": 1.5316253695034354, "learning_rate": 6.135224812672199e-06, "loss": 0.4559, "step": 16441 }, { "epoch": 0.48442172561613367, "grad_norm": 1.5500044658748446, "learning_rate": 6.13472399679377e-06, "loss": 0.4117, "step": 16442 }, { "epoch": 0.4844511880735972, "grad_norm": 1.5598813844949424, "learning_rate": 6.134223168912576e-06, "loss": 0.3651, "step": 16443 }, { "epoch": 0.4844806505310608, "grad_norm": 1.562327913869632, "learning_rate": 6.133722329033915e-06, "loss": 0.5068, "step": 16444 }, { "epoch": 0.48451011298852437, "grad_norm": 1.563686914855504, "learning_rate": 6.133221477163084e-06, "loss": 0.499, "step": 16445 }, { "epoch": 0.48453957544598797, "grad_norm": 1.68551692523637, "learning_rate": 6.1327206133053815e-06, "loss": 0.377, "step": 16446 }, { "epoch": 0.4845690379034515, "grad_norm": 1.5234519238902964, "learning_rate": 6.1322197374661055e-06, "loss": 0.4711, "step": 16447 }, { "epoch": 0.4845985003609151, "grad_norm": 1.6447042375795589, "learning_rate": 6.131718849650555e-06, "loss": 0.5959, "step": 16448 }, { "epoch": 0.48462796281837867, "grad_norm": 1.5331232249546731, "learning_rate": 6.1312179498640274e-06, "loss": 0.5329, "step": 16449 }, { "epoch": 0.48465742527584227, "grad_norm": 1.5728986069175672, "learning_rate": 6.130717038111821e-06, "loss": 0.4581, "step": 16450 }, { "epoch": 0.4846868877333058, "grad_norm": 1.4770479946359554, "learning_rate": 6.1302161143992346e-06, "loss": 0.3404, "step": 16451 }, { "epoch": 0.4847163501907694, "grad_norm": 1.424713182901018, "learning_rate": 6.129715178731566e-06, "loss": 0.3451, "step": 16452 }, { "epoch": 0.48474581264823297, "grad_norm": 1.6575233006662842, "learning_rate": 6.129214231114114e-06, "loss": 0.5456, "step": 16453 }, { "epoch": 0.48477527510569657, "grad_norm": 1.6406075513651592, "learning_rate": 6.128713271552179e-06, "loss": 0.5648, "step": 16454 }, { "epoch": 0.48480473756316017, "grad_norm": 1.5154165323122621, "learning_rate": 6.128212300051059e-06, "loss": 0.4757, "step": 16455 }, { "epoch": 0.4848342000206237, "grad_norm": 1.520289575959705, "learning_rate": 6.127711316616053e-06, "loss": 0.5367, "step": 16456 }, { "epoch": 0.4848636624780873, "grad_norm": 1.466344084033164, "learning_rate": 6.12721032125246e-06, "loss": 0.3983, "step": 16457 }, { "epoch": 0.48489312493555087, "grad_norm": 1.6073353322190294, "learning_rate": 6.126709313965581e-06, "loss": 0.5146, "step": 16458 }, { "epoch": 0.48492258739301447, "grad_norm": 1.3356865023308584, "learning_rate": 6.126208294760712e-06, "loss": 0.3262, "step": 16459 }, { "epoch": 0.484952049850478, "grad_norm": 1.2953410359003665, "learning_rate": 6.125707263643157e-06, "loss": 0.4093, "step": 16460 }, { "epoch": 0.4849815123079416, "grad_norm": 1.4725372652417472, "learning_rate": 6.125206220618212e-06, "loss": 0.319, "step": 16461 }, { "epoch": 0.48501097476540517, "grad_norm": 1.5247060465667557, "learning_rate": 6.12470516569118e-06, "loss": 0.4018, "step": 16462 }, { "epoch": 0.48504043722286877, "grad_norm": 1.6591086012232634, "learning_rate": 6.1242040988673575e-06, "loss": 0.4229, "step": 16463 }, { "epoch": 0.4850698996803323, "grad_norm": 1.619085104421112, "learning_rate": 6.123703020152049e-06, "loss": 0.578, "step": 16464 }, { "epoch": 0.4850993621377959, "grad_norm": 1.5594515238880131, "learning_rate": 6.123201929550551e-06, "loss": 0.4774, "step": 16465 }, { "epoch": 0.48512882459525947, "grad_norm": 1.4264333573982473, "learning_rate": 6.122700827068166e-06, "loss": 0.3932, "step": 16466 }, { "epoch": 0.48515828705272307, "grad_norm": 1.4982366896995063, "learning_rate": 6.1221997127101925e-06, "loss": 0.4579, "step": 16467 }, { "epoch": 0.48518774951018667, "grad_norm": 1.5932857451197093, "learning_rate": 6.1216985864819346e-06, "loss": 0.3717, "step": 16468 }, { "epoch": 0.4852172119676502, "grad_norm": 1.5734918898509829, "learning_rate": 6.121197448388689e-06, "loss": 0.5831, "step": 16469 }, { "epoch": 0.4852466744251138, "grad_norm": 1.6008968501128165, "learning_rate": 6.12069629843576e-06, "loss": 0.4684, "step": 16470 }, { "epoch": 0.48527613688257737, "grad_norm": 1.5147118378478197, "learning_rate": 6.1201951366284455e-06, "loss": 0.4895, "step": 16471 }, { "epoch": 0.48530559934004097, "grad_norm": 1.5222968364424527, "learning_rate": 6.119693962972049e-06, "loss": 0.2826, "step": 16472 }, { "epoch": 0.4853350617975045, "grad_norm": 1.6803827859818128, "learning_rate": 6.119192777471872e-06, "loss": 0.36, "step": 16473 }, { "epoch": 0.4853645242549681, "grad_norm": 1.5496558815865324, "learning_rate": 6.118691580133214e-06, "loss": 0.5719, "step": 16474 }, { "epoch": 0.48539398671243167, "grad_norm": 1.48678563336606, "learning_rate": 6.118190370961375e-06, "loss": 0.41, "step": 16475 }, { "epoch": 0.48542344916989527, "grad_norm": 1.5322821116988237, "learning_rate": 6.117689149961663e-06, "loss": 0.5179, "step": 16476 }, { "epoch": 0.4854529116273588, "grad_norm": 1.4010488486620452, "learning_rate": 6.117187917139373e-06, "loss": 0.3605, "step": 16477 }, { "epoch": 0.4854823740848224, "grad_norm": 1.4448115493467473, "learning_rate": 6.116686672499812e-06, "loss": 0.3524, "step": 16478 }, { "epoch": 0.48551183654228597, "grad_norm": 1.5411954234152627, "learning_rate": 6.116185416048278e-06, "loss": 0.4251, "step": 16479 }, { "epoch": 0.48554129899974957, "grad_norm": 1.3355091539846249, "learning_rate": 6.115684147790075e-06, "loss": 0.3858, "step": 16480 }, { "epoch": 0.4855707614572132, "grad_norm": 1.5031238884726836, "learning_rate": 6.115182867730505e-06, "loss": 0.415, "step": 16481 }, { "epoch": 0.4856002239146767, "grad_norm": 1.7468680412134563, "learning_rate": 6.114681575874871e-06, "loss": 0.511, "step": 16482 }, { "epoch": 0.4856296863721403, "grad_norm": 1.5819482662019761, "learning_rate": 6.114180272228476e-06, "loss": 0.4245, "step": 16483 }, { "epoch": 0.48565914882960387, "grad_norm": 1.8440161612070909, "learning_rate": 6.11367895679662e-06, "loss": 0.637, "step": 16484 }, { "epoch": 0.4856886112870675, "grad_norm": 1.6202288046018676, "learning_rate": 6.113177629584609e-06, "loss": 0.5386, "step": 16485 }, { "epoch": 0.485718073744531, "grad_norm": 1.4030560270627062, "learning_rate": 6.112676290597742e-06, "loss": 0.3764, "step": 16486 }, { "epoch": 0.4857475362019946, "grad_norm": 1.4392595617148816, "learning_rate": 6.112174939841327e-06, "loss": 0.4134, "step": 16487 }, { "epoch": 0.48577699865945817, "grad_norm": 1.6195125572086548, "learning_rate": 6.111673577320663e-06, "loss": 0.4324, "step": 16488 }, { "epoch": 0.4858064611169218, "grad_norm": 1.509932553501398, "learning_rate": 6.111172203041053e-06, "loss": 0.3821, "step": 16489 }, { "epoch": 0.4858359235743853, "grad_norm": 1.367280391532742, "learning_rate": 6.110670817007805e-06, "loss": 0.3436, "step": 16490 }, { "epoch": 0.4858653860318489, "grad_norm": 1.6338003589703434, "learning_rate": 6.1101694192262185e-06, "loss": 0.5471, "step": 16491 }, { "epoch": 0.48589484848931247, "grad_norm": 1.538312844838481, "learning_rate": 6.109668009701597e-06, "loss": 0.4257, "step": 16492 }, { "epoch": 0.4859243109467761, "grad_norm": 1.8577357632524365, "learning_rate": 6.109166588439247e-06, "loss": 0.4619, "step": 16493 }, { "epoch": 0.4859537734042397, "grad_norm": 1.429834888929399, "learning_rate": 6.108665155444471e-06, "loss": 0.396, "step": 16494 }, { "epoch": 0.4859832358617032, "grad_norm": 1.3615787005552256, "learning_rate": 6.1081637107225726e-06, "loss": 0.3192, "step": 16495 }, { "epoch": 0.4860126983191668, "grad_norm": 1.6237292013393672, "learning_rate": 6.107662254278855e-06, "loss": 0.519, "step": 16496 }, { "epoch": 0.48604216077663037, "grad_norm": 1.6781338314922394, "learning_rate": 6.107160786118624e-06, "loss": 0.4789, "step": 16497 }, { "epoch": 0.486071623234094, "grad_norm": 1.5252825008620408, "learning_rate": 6.1066593062471844e-06, "loss": 0.4557, "step": 16498 }, { "epoch": 0.4861010856915575, "grad_norm": 1.3812873394318408, "learning_rate": 6.106157814669839e-06, "loss": 0.3747, "step": 16499 }, { "epoch": 0.4861305481490211, "grad_norm": 1.3991267220659, "learning_rate": 6.105656311391895e-06, "loss": 0.4044, "step": 16500 }, { "epoch": 0.48616001060648467, "grad_norm": 1.5683316464352353, "learning_rate": 6.105154796418654e-06, "loss": 0.4122, "step": 16501 }, { "epoch": 0.4861894730639483, "grad_norm": 1.667893285763687, "learning_rate": 6.104653269755423e-06, "loss": 0.5353, "step": 16502 }, { "epoch": 0.4862189355214118, "grad_norm": 1.6362382061837437, "learning_rate": 6.104151731407507e-06, "loss": 0.4662, "step": 16503 }, { "epoch": 0.4862483979788754, "grad_norm": 1.5825977010092118, "learning_rate": 6.1036501813802095e-06, "loss": 0.4771, "step": 16504 }, { "epoch": 0.48627786043633897, "grad_norm": 1.2526833685641656, "learning_rate": 6.103148619678836e-06, "loss": 0.3191, "step": 16505 }, { "epoch": 0.4863073228938026, "grad_norm": 1.5378782440794043, "learning_rate": 6.102647046308694e-06, "loss": 0.4144, "step": 16506 }, { "epoch": 0.4863367853512662, "grad_norm": 1.6931594143595865, "learning_rate": 6.102145461275087e-06, "loss": 0.537, "step": 16507 }, { "epoch": 0.4863662478087297, "grad_norm": 1.6880966346040305, "learning_rate": 6.101643864583322e-06, "loss": 0.5662, "step": 16508 }, { "epoch": 0.4863957102661933, "grad_norm": 1.5060826696571346, "learning_rate": 6.101142256238703e-06, "loss": 0.5379, "step": 16509 }, { "epoch": 0.4864251727236569, "grad_norm": 1.327829617295031, "learning_rate": 6.100640636246537e-06, "loss": 0.3946, "step": 16510 }, { "epoch": 0.4864546351811205, "grad_norm": 1.3054315815322852, "learning_rate": 6.1001390046121286e-06, "loss": 0.3415, "step": 16511 }, { "epoch": 0.486484097638584, "grad_norm": 1.646447892141121, "learning_rate": 6.099637361340788e-06, "loss": 0.5972, "step": 16512 }, { "epoch": 0.4865135600960476, "grad_norm": 1.5720241912244275, "learning_rate": 6.099135706437816e-06, "loss": 0.5954, "step": 16513 }, { "epoch": 0.4865430225535112, "grad_norm": 1.4038325960917795, "learning_rate": 6.098634039908522e-06, "loss": 0.4157, "step": 16514 }, { "epoch": 0.4865724850109748, "grad_norm": 1.5948736292635524, "learning_rate": 6.098132361758213e-06, "loss": 0.5927, "step": 16515 }, { "epoch": 0.4866019474684383, "grad_norm": 1.556752896412333, "learning_rate": 6.097630671992194e-06, "loss": 0.6148, "step": 16516 }, { "epoch": 0.4866314099259019, "grad_norm": 1.389797224456664, "learning_rate": 6.097128970615771e-06, "loss": 0.4262, "step": 16517 }, { "epoch": 0.4866608723833655, "grad_norm": 1.417252125694945, "learning_rate": 6.096627257634254e-06, "loss": 0.4247, "step": 16518 }, { "epoch": 0.4866903348408291, "grad_norm": 1.639426550883949, "learning_rate": 6.096125533052948e-06, "loss": 0.478, "step": 16519 }, { "epoch": 0.4867197972982927, "grad_norm": 1.5441080113613679, "learning_rate": 6.095623796877158e-06, "loss": 0.483, "step": 16520 }, { "epoch": 0.4867492597557562, "grad_norm": 1.5788389891145747, "learning_rate": 6.095122049112196e-06, "loss": 0.5203, "step": 16521 }, { "epoch": 0.4867787222132198, "grad_norm": 1.5592404835461036, "learning_rate": 6.094620289763364e-06, "loss": 0.3719, "step": 16522 }, { "epoch": 0.4868081846706834, "grad_norm": 1.5907194367958553, "learning_rate": 6.094118518835974e-06, "loss": 0.4477, "step": 16523 }, { "epoch": 0.486837647128147, "grad_norm": 1.58203344472705, "learning_rate": 6.093616736335333e-06, "loss": 0.4729, "step": 16524 }, { "epoch": 0.4868671095856105, "grad_norm": 1.3376159317559335, "learning_rate": 6.093114942266745e-06, "loss": 0.4007, "step": 16525 }, { "epoch": 0.4868965720430741, "grad_norm": 1.5815262732717401, "learning_rate": 6.0926131366355205e-06, "loss": 0.5882, "step": 16526 }, { "epoch": 0.4869260345005377, "grad_norm": 1.3668994757601556, "learning_rate": 6.092111319446968e-06, "loss": 0.4125, "step": 16527 }, { "epoch": 0.4869554969580013, "grad_norm": 1.3994383783832354, "learning_rate": 6.091609490706396e-06, "loss": 0.3486, "step": 16528 }, { "epoch": 0.4869849594154648, "grad_norm": 1.592404805373184, "learning_rate": 6.091107650419109e-06, "loss": 0.5256, "step": 16529 }, { "epoch": 0.4870144218729284, "grad_norm": 1.3897720193550633, "learning_rate": 6.09060579859042e-06, "loss": 0.4119, "step": 16530 }, { "epoch": 0.487043884330392, "grad_norm": 1.2885595955396973, "learning_rate": 6.090103935225633e-06, "loss": 0.2562, "step": 16531 }, { "epoch": 0.4870733467878556, "grad_norm": 1.5534518246838702, "learning_rate": 6.0896020603300595e-06, "loss": 0.6422, "step": 16532 }, { "epoch": 0.4871028092453192, "grad_norm": 1.505428247970893, "learning_rate": 6.089100173909011e-06, "loss": 0.3603, "step": 16533 }, { "epoch": 0.4871322717027827, "grad_norm": 1.5458152051947793, "learning_rate": 6.088598275967788e-06, "loss": 0.3335, "step": 16534 }, { "epoch": 0.48716173416024633, "grad_norm": 1.464437787219395, "learning_rate": 6.088096366511705e-06, "loss": 0.4436, "step": 16535 }, { "epoch": 0.4871911966177099, "grad_norm": 1.7117752676951072, "learning_rate": 6.087594445546072e-06, "loss": 0.4948, "step": 16536 }, { "epoch": 0.4872206590751735, "grad_norm": 1.3973119699050056, "learning_rate": 6.087092513076195e-06, "loss": 0.4348, "step": 16537 }, { "epoch": 0.487250121532637, "grad_norm": 1.4075444670178876, "learning_rate": 6.0865905691073854e-06, "loss": 0.2977, "step": 16538 }, { "epoch": 0.48727958399010063, "grad_norm": 1.4349586999328772, "learning_rate": 6.086088613644952e-06, "loss": 0.5121, "step": 16539 }, { "epoch": 0.4873090464475642, "grad_norm": 1.4880076302289205, "learning_rate": 6.085586646694205e-06, "loss": 0.5313, "step": 16540 }, { "epoch": 0.4873385089050278, "grad_norm": 1.454194787749398, "learning_rate": 6.085084668260451e-06, "loss": 0.4378, "step": 16541 }, { "epoch": 0.4873679713624913, "grad_norm": 1.326014201577433, "learning_rate": 6.084582678349005e-06, "loss": 0.2547, "step": 16542 }, { "epoch": 0.48739743381995493, "grad_norm": 1.5633079733123478, "learning_rate": 6.084080676965172e-06, "loss": 0.3997, "step": 16543 }, { "epoch": 0.4874268962774185, "grad_norm": 1.6936072865501743, "learning_rate": 6.083578664114263e-06, "loss": 0.5646, "step": 16544 }, { "epoch": 0.4874563587348821, "grad_norm": 1.22495377438036, "learning_rate": 6.083076639801592e-06, "loss": 0.3172, "step": 16545 }, { "epoch": 0.4874858211923457, "grad_norm": 1.5143441337836365, "learning_rate": 6.082574604032464e-06, "loss": 0.44, "step": 16546 }, { "epoch": 0.48751528364980923, "grad_norm": 1.3019822036943276, "learning_rate": 6.082072556812192e-06, "loss": 0.252, "step": 16547 }, { "epoch": 0.48754474610727283, "grad_norm": 1.5810905386730907, "learning_rate": 6.0815704981460885e-06, "loss": 0.445, "step": 16548 }, { "epoch": 0.4875742085647364, "grad_norm": 1.3521347008242972, "learning_rate": 6.081068428039459e-06, "loss": 0.3422, "step": 16549 }, { "epoch": 0.4876036710222, "grad_norm": 1.657386821613206, "learning_rate": 6.080566346497619e-06, "loss": 0.2945, "step": 16550 }, { "epoch": 0.4876331334796635, "grad_norm": 1.416043734823282, "learning_rate": 6.080064253525877e-06, "loss": 0.4558, "step": 16551 }, { "epoch": 0.48766259593712713, "grad_norm": 1.5816550623081234, "learning_rate": 6.079562149129544e-06, "loss": 0.3829, "step": 16552 }, { "epoch": 0.4876920583945907, "grad_norm": 1.6934184705213464, "learning_rate": 6.0790600333139326e-06, "loss": 0.4969, "step": 16553 }, { "epoch": 0.4877215208520543, "grad_norm": 1.4268480470420564, "learning_rate": 6.078557906084353e-06, "loss": 0.445, "step": 16554 }, { "epoch": 0.4877509833095178, "grad_norm": 1.4639763188726425, "learning_rate": 6.078055767446115e-06, "loss": 0.4775, "step": 16555 }, { "epoch": 0.48778044576698143, "grad_norm": 1.554591039012911, "learning_rate": 6.077553617404533e-06, "loss": 0.3518, "step": 16556 }, { "epoch": 0.487809908224445, "grad_norm": 1.5648602748287288, "learning_rate": 6.077051455964917e-06, "loss": 0.3339, "step": 16557 }, { "epoch": 0.4878393706819086, "grad_norm": 1.4100729276909532, "learning_rate": 6.076549283132579e-06, "loss": 0.497, "step": 16558 }, { "epoch": 0.4878688331393722, "grad_norm": 1.5750770808960186, "learning_rate": 6.07604709891283e-06, "loss": 0.5444, "step": 16559 }, { "epoch": 0.48789829559683573, "grad_norm": 1.5235552052250472, "learning_rate": 6.0755449033109835e-06, "loss": 0.47, "step": 16560 }, { "epoch": 0.48792775805429933, "grad_norm": 1.4570319741318962, "learning_rate": 6.0750426963323505e-06, "loss": 0.3428, "step": 16561 }, { "epoch": 0.4879572205117629, "grad_norm": 1.7940397220081639, "learning_rate": 6.074540477982243e-06, "loss": 0.5491, "step": 16562 }, { "epoch": 0.4879866829692265, "grad_norm": 1.500572946581355, "learning_rate": 6.074038248265975e-06, "loss": 0.4968, "step": 16563 }, { "epoch": 0.48801614542669003, "grad_norm": 1.5685889038981329, "learning_rate": 6.073536007188857e-06, "loss": 0.6258, "step": 16564 }, { "epoch": 0.48804560788415363, "grad_norm": 1.366412374581905, "learning_rate": 6.073033754756203e-06, "loss": 0.4133, "step": 16565 }, { "epoch": 0.4880750703416172, "grad_norm": 1.207567907650776, "learning_rate": 6.072531490973323e-06, "loss": 0.3771, "step": 16566 }, { "epoch": 0.4881045327990808, "grad_norm": 1.4042609949241125, "learning_rate": 6.072029215845535e-06, "loss": 0.3987, "step": 16567 }, { "epoch": 0.48813399525654433, "grad_norm": 1.6816663276215151, "learning_rate": 6.071526929378147e-06, "loss": 0.6635, "step": 16568 }, { "epoch": 0.48816345771400793, "grad_norm": 1.5383376153348032, "learning_rate": 6.071024631576472e-06, "loss": 0.4912, "step": 16569 }, { "epoch": 0.4881929201714715, "grad_norm": 1.4374345939153268, "learning_rate": 6.0705223224458265e-06, "loss": 0.5192, "step": 16570 }, { "epoch": 0.4882223826289351, "grad_norm": 1.4608021008095617, "learning_rate": 6.070020001991521e-06, "loss": 0.4283, "step": 16571 }, { "epoch": 0.4882518450863987, "grad_norm": 1.5220004729267926, "learning_rate": 6.069517670218872e-06, "loss": 0.4034, "step": 16572 }, { "epoch": 0.48828130754386223, "grad_norm": 1.493348055287371, "learning_rate": 6.069015327133191e-06, "loss": 0.4951, "step": 16573 }, { "epoch": 0.48831077000132583, "grad_norm": 1.4958783795527064, "learning_rate": 6.06851297273979e-06, "loss": 0.4095, "step": 16574 }, { "epoch": 0.4883402324587894, "grad_norm": 1.4300712438333198, "learning_rate": 6.068010607043986e-06, "loss": 0.5071, "step": 16575 }, { "epoch": 0.488369694916253, "grad_norm": 1.5552606078213478, "learning_rate": 6.067508230051089e-06, "loss": 0.3554, "step": 16576 }, { "epoch": 0.48839915737371653, "grad_norm": 1.6895886934209143, "learning_rate": 6.067005841766417e-06, "loss": 0.5394, "step": 16577 }, { "epoch": 0.48842861983118013, "grad_norm": 1.7026407444574623, "learning_rate": 6.066503442195282e-06, "loss": 0.4386, "step": 16578 }, { "epoch": 0.4884580822886437, "grad_norm": 1.8701980422674367, "learning_rate": 6.0660010313429984e-06, "loss": 0.4255, "step": 16579 }, { "epoch": 0.4884875447461073, "grad_norm": 1.4416706016932292, "learning_rate": 6.065498609214881e-06, "loss": 0.3735, "step": 16580 }, { "epoch": 0.48851700720357083, "grad_norm": 1.5343213607643316, "learning_rate": 6.064996175816245e-06, "loss": 0.5029, "step": 16581 }, { "epoch": 0.48854646966103443, "grad_norm": 1.4642320612837698, "learning_rate": 6.064493731152404e-06, "loss": 0.4764, "step": 16582 }, { "epoch": 0.488575932118498, "grad_norm": 1.6049585476906114, "learning_rate": 6.06399127522867e-06, "loss": 0.5648, "step": 16583 }, { "epoch": 0.4886053945759616, "grad_norm": 1.4760356968789836, "learning_rate": 6.063488808050362e-06, "loss": 0.4441, "step": 16584 }, { "epoch": 0.4886348570334252, "grad_norm": 1.5113105405907539, "learning_rate": 6.062986329622795e-06, "loss": 0.5461, "step": 16585 }, { "epoch": 0.48866431949088873, "grad_norm": 1.428181406164759, "learning_rate": 6.062483839951281e-06, "loss": 0.4204, "step": 16586 }, { "epoch": 0.48869378194835233, "grad_norm": 1.4056699344320964, "learning_rate": 6.061981339041136e-06, "loss": 0.4582, "step": 16587 }, { "epoch": 0.4887232444058159, "grad_norm": 1.5768909527197636, "learning_rate": 6.061478826897679e-06, "loss": 0.3827, "step": 16588 }, { "epoch": 0.4887527068632795, "grad_norm": 1.5862157220150306, "learning_rate": 6.06097630352622e-06, "loss": 0.551, "step": 16589 }, { "epoch": 0.48878216932074303, "grad_norm": 1.4570832347655707, "learning_rate": 6.060473768932077e-06, "loss": 0.4108, "step": 16590 }, { "epoch": 0.48881163177820663, "grad_norm": 1.4420959038210472, "learning_rate": 6.0599712231205666e-06, "loss": 0.4011, "step": 16591 }, { "epoch": 0.4888410942356702, "grad_norm": 1.4634096192666926, "learning_rate": 6.059468666097004e-06, "loss": 0.3809, "step": 16592 }, { "epoch": 0.4888705566931338, "grad_norm": 1.4563299955114337, "learning_rate": 6.058966097866703e-06, "loss": 0.4726, "step": 16593 }, { "epoch": 0.48890001915059733, "grad_norm": 1.6648097764101757, "learning_rate": 6.058463518434984e-06, "loss": 0.4657, "step": 16594 }, { "epoch": 0.48892948160806093, "grad_norm": 1.4332772528553617, "learning_rate": 6.0579609278071585e-06, "loss": 0.3922, "step": 16595 }, { "epoch": 0.4889589440655245, "grad_norm": 1.6259128683530741, "learning_rate": 6.057458325988545e-06, "loss": 0.4237, "step": 16596 }, { "epoch": 0.4889884065229881, "grad_norm": 1.3858003001684518, "learning_rate": 6.05695571298446e-06, "loss": 0.4657, "step": 16597 }, { "epoch": 0.4890178689804517, "grad_norm": 1.4805562424867982, "learning_rate": 6.05645308880022e-06, "loss": 0.5047, "step": 16598 }, { "epoch": 0.48904733143791523, "grad_norm": 1.573678557936295, "learning_rate": 6.05595045344114e-06, "loss": 0.4685, "step": 16599 }, { "epoch": 0.48907679389537884, "grad_norm": 1.4877906899291753, "learning_rate": 6.055447806912538e-06, "loss": 0.4809, "step": 16600 }, { "epoch": 0.4891062563528424, "grad_norm": 1.5127618237216687, "learning_rate": 6.054945149219731e-06, "loss": 0.4143, "step": 16601 }, { "epoch": 0.489135718810306, "grad_norm": 1.3670951336916406, "learning_rate": 6.054442480368036e-06, "loss": 0.4377, "step": 16602 }, { "epoch": 0.48916518126776953, "grad_norm": 1.4063735335900127, "learning_rate": 6.05393980036277e-06, "loss": 0.4359, "step": 16603 }, { "epoch": 0.48919464372523314, "grad_norm": 1.6174534590576928, "learning_rate": 6.0534371092092495e-06, "loss": 0.5014, "step": 16604 }, { "epoch": 0.4892241061826967, "grad_norm": 1.490534138816971, "learning_rate": 6.052934406912791e-06, "loss": 0.4479, "step": 16605 }, { "epoch": 0.4892535686401603, "grad_norm": 1.3630249473760283, "learning_rate": 6.052431693478716e-06, "loss": 0.3465, "step": 16606 }, { "epoch": 0.48928303109762383, "grad_norm": 1.601968058424346, "learning_rate": 6.051928968912338e-06, "loss": 0.5634, "step": 16607 }, { "epoch": 0.48931249355508744, "grad_norm": 1.3604889991883202, "learning_rate": 6.051426233218976e-06, "loss": 0.4451, "step": 16608 }, { "epoch": 0.489341956012551, "grad_norm": 1.3785233127488037, "learning_rate": 6.050923486403948e-06, "loss": 0.3607, "step": 16609 }, { "epoch": 0.4893714184700146, "grad_norm": 1.4585343407278457, "learning_rate": 6.050420728472571e-06, "loss": 0.4578, "step": 16610 }, { "epoch": 0.4894008809274782, "grad_norm": 1.532528921578764, "learning_rate": 6.049917959430164e-06, "loss": 0.4382, "step": 16611 }, { "epoch": 0.48943034338494174, "grad_norm": 1.629062375541133, "learning_rate": 6.049415179282045e-06, "loss": 0.5948, "step": 16612 }, { "epoch": 0.48945980584240534, "grad_norm": 1.6126853919978401, "learning_rate": 6.048912388033532e-06, "loss": 0.3943, "step": 16613 }, { "epoch": 0.4894892682998689, "grad_norm": 1.4497495752919118, "learning_rate": 6.048409585689943e-06, "loss": 0.4284, "step": 16614 }, { "epoch": 0.4895187307573325, "grad_norm": 1.575052072155166, "learning_rate": 6.047906772256597e-06, "loss": 0.5536, "step": 16615 }, { "epoch": 0.48954819321479603, "grad_norm": 1.4030435956027236, "learning_rate": 6.0474039477388126e-06, "loss": 0.5005, "step": 16616 }, { "epoch": 0.48957765567225964, "grad_norm": 1.4972773112198334, "learning_rate": 6.04690111214191e-06, "loss": 0.5383, "step": 16617 }, { "epoch": 0.4896071181297232, "grad_norm": 1.7357814567788747, "learning_rate": 6.046398265471205e-06, "loss": 0.4313, "step": 16618 }, { "epoch": 0.4896365805871868, "grad_norm": 1.4058845905586759, "learning_rate": 6.045895407732018e-06, "loss": 0.5406, "step": 16619 }, { "epoch": 0.48966604304465033, "grad_norm": 1.425034799750176, "learning_rate": 6.045392538929667e-06, "loss": 0.4763, "step": 16620 }, { "epoch": 0.48969550550211394, "grad_norm": 1.3567408565647912, "learning_rate": 6.044889659069474e-06, "loss": 0.4691, "step": 16621 }, { "epoch": 0.4897249679595775, "grad_norm": 1.3735510851342034, "learning_rate": 6.0443867681567565e-06, "loss": 0.4086, "step": 16622 }, { "epoch": 0.4897544304170411, "grad_norm": 1.5208539790958988, "learning_rate": 6.043883866196834e-06, "loss": 0.5374, "step": 16623 }, { "epoch": 0.4897838928745047, "grad_norm": 1.445606173386887, "learning_rate": 6.043380953195026e-06, "loss": 0.4631, "step": 16624 }, { "epoch": 0.48981335533196824, "grad_norm": 1.350430074793338, "learning_rate": 6.042878029156651e-06, "loss": 0.4808, "step": 16625 }, { "epoch": 0.48984281778943184, "grad_norm": 1.5559483654899755, "learning_rate": 6.0423750940870305e-06, "loss": 0.3469, "step": 16626 }, { "epoch": 0.4898722802468954, "grad_norm": 1.4424731357344287, "learning_rate": 6.041872147991486e-06, "loss": 0.5559, "step": 16627 }, { "epoch": 0.489901742704359, "grad_norm": 1.6088502190183902, "learning_rate": 6.041369190875334e-06, "loss": 0.4245, "step": 16628 }, { "epoch": 0.48993120516182254, "grad_norm": 1.677261951450317, "learning_rate": 6.040866222743895e-06, "loss": 0.5026, "step": 16629 }, { "epoch": 0.48996066761928614, "grad_norm": 1.6656914848469637, "learning_rate": 6.040363243602493e-06, "loss": 0.5724, "step": 16630 }, { "epoch": 0.4899901300767497, "grad_norm": 1.4949469902583243, "learning_rate": 6.039860253456443e-06, "loss": 0.4653, "step": 16631 }, { "epoch": 0.4900195925342133, "grad_norm": 1.473210096373654, "learning_rate": 6.03935725231107e-06, "loss": 0.4464, "step": 16632 }, { "epoch": 0.49004905499167684, "grad_norm": 1.417892797900303, "learning_rate": 6.038854240171692e-06, "loss": 0.4426, "step": 16633 }, { "epoch": 0.49007851744914044, "grad_norm": 1.2732283940187996, "learning_rate": 6.03835121704363e-06, "loss": 0.4187, "step": 16634 }, { "epoch": 0.490107979906604, "grad_norm": 1.4514917365313105, "learning_rate": 6.037848182932204e-06, "loss": 0.4056, "step": 16635 }, { "epoch": 0.4901374423640676, "grad_norm": 1.5938477891536948, "learning_rate": 6.03734513784274e-06, "loss": 0.3634, "step": 16636 }, { "epoch": 0.4901669048215312, "grad_norm": 1.4031496961068972, "learning_rate": 6.036842081780553e-06, "loss": 0.3779, "step": 16637 }, { "epoch": 0.49019636727899474, "grad_norm": 1.4877623599813485, "learning_rate": 6.036339014750966e-06, "loss": 0.4153, "step": 16638 }, { "epoch": 0.49022582973645834, "grad_norm": 1.459924692631354, "learning_rate": 6.035835936759302e-06, "loss": 0.3429, "step": 16639 }, { "epoch": 0.4902552921939219, "grad_norm": 1.6768020027250834, "learning_rate": 6.03533284781088e-06, "loss": 0.5166, "step": 16640 }, { "epoch": 0.4902847546513855, "grad_norm": 1.4021980959897467, "learning_rate": 6.034829747911022e-06, "loss": 0.4174, "step": 16641 }, { "epoch": 0.49031421710884904, "grad_norm": 1.511606393546441, "learning_rate": 6.034326637065053e-06, "loss": 0.5229, "step": 16642 }, { "epoch": 0.49034367956631264, "grad_norm": 1.4001698193214354, "learning_rate": 6.03382351527829e-06, "loss": 0.3796, "step": 16643 }, { "epoch": 0.4903731420237762, "grad_norm": 1.5893451208977047, "learning_rate": 6.033320382556057e-06, "loss": 0.5227, "step": 16644 }, { "epoch": 0.4904026044812398, "grad_norm": 1.5388229385643677, "learning_rate": 6.032817238903677e-06, "loss": 0.5428, "step": 16645 }, { "epoch": 0.49043206693870334, "grad_norm": 1.5307667411391213, "learning_rate": 6.032314084326469e-06, "loss": 0.4889, "step": 16646 }, { "epoch": 0.49046152939616694, "grad_norm": 1.5151730553102811, "learning_rate": 6.031810918829759e-06, "loss": 0.3843, "step": 16647 }, { "epoch": 0.4904909918536305, "grad_norm": 1.5083620871283812, "learning_rate": 6.0313077424188685e-06, "loss": 0.5589, "step": 16648 }, { "epoch": 0.4905204543110941, "grad_norm": 1.4468530615961277, "learning_rate": 6.030804555099117e-06, "loss": 0.4602, "step": 16649 }, { "epoch": 0.4905499167685577, "grad_norm": 1.3703341726772162, "learning_rate": 6.0303013568758295e-06, "loss": 0.4417, "step": 16650 }, { "epoch": 0.49057937922602124, "grad_norm": 1.41126430091036, "learning_rate": 6.029798147754329e-06, "loss": 0.3954, "step": 16651 }, { "epoch": 0.49060884168348484, "grad_norm": 1.3992775950772167, "learning_rate": 6.0292949277399374e-06, "loss": 0.3233, "step": 16652 }, { "epoch": 0.4906383041409484, "grad_norm": 1.6825791008546662, "learning_rate": 6.028791696837977e-06, "loss": 0.5274, "step": 16653 }, { "epoch": 0.490667766598412, "grad_norm": 1.6870327484009615, "learning_rate": 6.028288455053771e-06, "loss": 0.3881, "step": 16654 }, { "epoch": 0.49069722905587554, "grad_norm": 1.3307069187048628, "learning_rate": 6.027785202392644e-06, "loss": 0.4259, "step": 16655 }, { "epoch": 0.49072669151333914, "grad_norm": 1.7465271627479926, "learning_rate": 6.027281938859918e-06, "loss": 0.4104, "step": 16656 }, { "epoch": 0.4907561539708027, "grad_norm": 1.5660873357479832, "learning_rate": 6.026778664460917e-06, "loss": 0.5748, "step": 16657 }, { "epoch": 0.4907856164282663, "grad_norm": 1.4526872894710543, "learning_rate": 6.026275379200965e-06, "loss": 0.3395, "step": 16658 }, { "epoch": 0.49081507888572984, "grad_norm": 1.5603603059665154, "learning_rate": 6.025772083085384e-06, "loss": 0.4052, "step": 16659 }, { "epoch": 0.49084454134319344, "grad_norm": 1.5541299240496858, "learning_rate": 6.0252687761194985e-06, "loss": 0.5509, "step": 16660 }, { "epoch": 0.490874003800657, "grad_norm": 1.298253864181199, "learning_rate": 6.024765458308633e-06, "loss": 0.401, "step": 16661 }, { "epoch": 0.4909034662581206, "grad_norm": 1.5183992726452806, "learning_rate": 6.024262129658109e-06, "loss": 0.3906, "step": 16662 }, { "epoch": 0.4909329287155842, "grad_norm": 1.542813578521627, "learning_rate": 6.023758790173255e-06, "loss": 0.4652, "step": 16663 }, { "epoch": 0.49096239117304774, "grad_norm": 1.5411360729422137, "learning_rate": 6.02325543985939e-06, "loss": 0.3757, "step": 16664 }, { "epoch": 0.49099185363051134, "grad_norm": 1.768419214464211, "learning_rate": 6.022752078721842e-06, "loss": 0.5922, "step": 16665 }, { "epoch": 0.4910213160879749, "grad_norm": 1.382317583080198, "learning_rate": 6.022248706765934e-06, "loss": 0.3708, "step": 16666 }, { "epoch": 0.4910507785454385, "grad_norm": 1.4746755438818209, "learning_rate": 6.021745323996991e-06, "loss": 0.4435, "step": 16667 }, { "epoch": 0.49108024100290204, "grad_norm": 1.6308623746936843, "learning_rate": 6.021241930420337e-06, "loss": 0.4746, "step": 16668 }, { "epoch": 0.49110970346036564, "grad_norm": 1.4120368877367826, "learning_rate": 6.020738526041298e-06, "loss": 0.4246, "step": 16669 }, { "epoch": 0.4911391659178292, "grad_norm": 1.4266226912718045, "learning_rate": 6.020235110865196e-06, "loss": 0.3368, "step": 16670 }, { "epoch": 0.4911686283752928, "grad_norm": 1.5338363408977231, "learning_rate": 6.019731684897359e-06, "loss": 0.4546, "step": 16671 }, { "epoch": 0.49119809083275634, "grad_norm": 1.7334049667866935, "learning_rate": 6.019228248143112e-06, "loss": 0.6789, "step": 16672 }, { "epoch": 0.49122755329021994, "grad_norm": 1.3041247882226588, "learning_rate": 6.018724800607778e-06, "loss": 0.2989, "step": 16673 }, { "epoch": 0.4912570157476835, "grad_norm": 1.3178066670390909, "learning_rate": 6.018221342296684e-06, "loss": 0.364, "step": 16674 }, { "epoch": 0.4912864782051471, "grad_norm": 1.7330176130689554, "learning_rate": 6.017717873215156e-06, "loss": 0.5329, "step": 16675 }, { "epoch": 0.4913159406626107, "grad_norm": 1.4867540516626634, "learning_rate": 6.017214393368516e-06, "loss": 0.3832, "step": 16676 }, { "epoch": 0.49134540312007424, "grad_norm": 1.7407269790638196, "learning_rate": 6.016710902762094e-06, "loss": 0.428, "step": 16677 }, { "epoch": 0.49137486557753784, "grad_norm": 1.5975612906358885, "learning_rate": 6.0162074014012126e-06, "loss": 0.5891, "step": 16678 }, { "epoch": 0.4914043280350014, "grad_norm": 1.7041764890874944, "learning_rate": 6.0157038892912005e-06, "loss": 0.4526, "step": 16679 }, { "epoch": 0.491433790492465, "grad_norm": 1.4765490375808183, "learning_rate": 6.01520036643738e-06, "loss": 0.397, "step": 16680 }, { "epoch": 0.49146325294992854, "grad_norm": 1.4598269092598712, "learning_rate": 6.014696832845082e-06, "loss": 0.4278, "step": 16681 }, { "epoch": 0.49149271540739214, "grad_norm": 1.3625851009761603, "learning_rate": 6.0141932885196295e-06, "loss": 0.4671, "step": 16682 }, { "epoch": 0.4915221778648557, "grad_norm": 1.3485663853797099, "learning_rate": 6.013689733466349e-06, "loss": 0.4234, "step": 16683 }, { "epoch": 0.4915516403223193, "grad_norm": 1.4813739362089957, "learning_rate": 6.013186167690568e-06, "loss": 0.469, "step": 16684 }, { "epoch": 0.49158110277978284, "grad_norm": 1.460926938680384, "learning_rate": 6.012682591197612e-06, "loss": 0.4708, "step": 16685 }, { "epoch": 0.49161056523724644, "grad_norm": 1.4898758949531365, "learning_rate": 6.0121790039928064e-06, "loss": 0.4771, "step": 16686 }, { "epoch": 0.49164002769471, "grad_norm": 1.4780444084284514, "learning_rate": 6.0116754060814815e-06, "loss": 0.4122, "step": 16687 }, { "epoch": 0.4916694901521736, "grad_norm": 1.5530850308676085, "learning_rate": 6.011171797468964e-06, "loss": 0.4541, "step": 16688 }, { "epoch": 0.4916989526096372, "grad_norm": 1.455754607141708, "learning_rate": 6.0106681781605766e-06, "loss": 0.3821, "step": 16689 }, { "epoch": 0.49172841506710074, "grad_norm": 1.4689016144125409, "learning_rate": 6.010164548161651e-06, "loss": 0.4243, "step": 16690 }, { "epoch": 0.49175787752456435, "grad_norm": 1.4152179764291541, "learning_rate": 6.009660907477512e-06, "loss": 0.4303, "step": 16691 }, { "epoch": 0.4917873399820279, "grad_norm": 1.6119163598666066, "learning_rate": 6.009157256113488e-06, "loss": 0.5847, "step": 16692 }, { "epoch": 0.4918168024394915, "grad_norm": 1.5554656312045603, "learning_rate": 6.008653594074905e-06, "loss": 0.5889, "step": 16693 }, { "epoch": 0.49184626489695504, "grad_norm": 1.3510138275286099, "learning_rate": 6.008149921367093e-06, "loss": 0.3927, "step": 16694 }, { "epoch": 0.49187572735441865, "grad_norm": 1.5260079608584762, "learning_rate": 6.007646237995378e-06, "loss": 0.4934, "step": 16695 }, { "epoch": 0.4919051898118822, "grad_norm": 1.4774373323205536, "learning_rate": 6.0071425439650885e-06, "loss": 0.475, "step": 16696 }, { "epoch": 0.4919346522693458, "grad_norm": 1.387876242065673, "learning_rate": 6.006638839281552e-06, "loss": 0.3511, "step": 16697 }, { "epoch": 0.49196411472680934, "grad_norm": 1.5552786675534531, "learning_rate": 6.006135123950096e-06, "loss": 0.3721, "step": 16698 }, { "epoch": 0.49199357718427295, "grad_norm": 1.301037907542273, "learning_rate": 6.00563139797605e-06, "loss": 0.3748, "step": 16699 }, { "epoch": 0.4920230396417365, "grad_norm": 1.4647403828659735, "learning_rate": 6.0051276613647415e-06, "loss": 0.3544, "step": 16700 }, { "epoch": 0.4920525020992001, "grad_norm": 1.5075626111637361, "learning_rate": 6.004623914121498e-06, "loss": 0.3734, "step": 16701 }, { "epoch": 0.4920819645566637, "grad_norm": 1.8153273035451465, "learning_rate": 6.004120156251649e-06, "loss": 0.6615, "step": 16702 }, { "epoch": 0.49211142701412725, "grad_norm": 1.4651186884352918, "learning_rate": 6.0036163877605245e-06, "loss": 0.342, "step": 16703 }, { "epoch": 0.49214088947159085, "grad_norm": 1.8167263847361363, "learning_rate": 6.0031126086534505e-06, "loss": 0.6388, "step": 16704 }, { "epoch": 0.4921703519290544, "grad_norm": 1.5390628347660635, "learning_rate": 6.002608818935757e-06, "loss": 0.3001, "step": 16705 }, { "epoch": 0.492199814386518, "grad_norm": 1.4992004921413005, "learning_rate": 6.002105018612774e-06, "loss": 0.4537, "step": 16706 }, { "epoch": 0.49222927684398154, "grad_norm": 1.7107846687244883, "learning_rate": 6.001601207689829e-06, "loss": 0.5784, "step": 16707 }, { "epoch": 0.49225873930144515, "grad_norm": 1.614688632068327, "learning_rate": 6.001097386172251e-06, "loss": 0.4625, "step": 16708 }, { "epoch": 0.4922882017589087, "grad_norm": 1.539807291379138, "learning_rate": 6.0005935540653705e-06, "loss": 0.5689, "step": 16709 }, { "epoch": 0.4923176642163723, "grad_norm": 1.5218916439667276, "learning_rate": 6.000089711374516e-06, "loss": 0.55, "step": 16710 }, { "epoch": 0.49234712667383584, "grad_norm": 1.439263829988848, "learning_rate": 5.999585858105018e-06, "loss": 0.515, "step": 16711 }, { "epoch": 0.49237658913129945, "grad_norm": 1.7430746515755777, "learning_rate": 5.999081994262206e-06, "loss": 0.5045, "step": 16712 }, { "epoch": 0.492406051588763, "grad_norm": 1.4242599024381788, "learning_rate": 5.9985781198514074e-06, "loss": 0.391, "step": 16713 }, { "epoch": 0.4924355140462266, "grad_norm": 1.3747240203208817, "learning_rate": 5.998074234877953e-06, "loss": 0.4379, "step": 16714 }, { "epoch": 0.4924649765036902, "grad_norm": 1.649284769717189, "learning_rate": 5.997570339347176e-06, "loss": 0.4754, "step": 16715 }, { "epoch": 0.49249443896115375, "grad_norm": 1.4889312179409102, "learning_rate": 5.9970664332644024e-06, "loss": 0.4269, "step": 16716 }, { "epoch": 0.49252390141861735, "grad_norm": 1.4430533690657268, "learning_rate": 5.996562516634965e-06, "loss": 0.4366, "step": 16717 }, { "epoch": 0.4925533638760809, "grad_norm": 1.4986290694707383, "learning_rate": 5.996058589464193e-06, "loss": 0.4073, "step": 16718 }, { "epoch": 0.4925828263335445, "grad_norm": 1.4603447068801985, "learning_rate": 5.995554651757416e-06, "loss": 0.4077, "step": 16719 }, { "epoch": 0.49261228879100805, "grad_norm": 1.4170382723598498, "learning_rate": 5.995050703519965e-06, "loss": 0.4551, "step": 16720 }, { "epoch": 0.49264175124847165, "grad_norm": 1.5113785350611686, "learning_rate": 5.994546744757171e-06, "loss": 0.4688, "step": 16721 }, { "epoch": 0.4926712137059352, "grad_norm": 1.6356396632840902, "learning_rate": 5.994042775474367e-06, "loss": 0.5129, "step": 16722 }, { "epoch": 0.4927006761633988, "grad_norm": 1.5851195583924744, "learning_rate": 5.993538795676879e-06, "loss": 0.4808, "step": 16723 }, { "epoch": 0.49273013862086235, "grad_norm": 1.8648382078773533, "learning_rate": 5.993034805370041e-06, "loss": 0.4097, "step": 16724 }, { "epoch": 0.49275960107832595, "grad_norm": 1.6283481832111277, "learning_rate": 5.9925308045591825e-06, "loss": 0.4729, "step": 16725 }, { "epoch": 0.4927890635357895, "grad_norm": 1.413138089116474, "learning_rate": 5.992026793249636e-06, "loss": 0.3252, "step": 16726 }, { "epoch": 0.4928185259932531, "grad_norm": 1.4859564215979448, "learning_rate": 5.991522771446735e-06, "loss": 0.4456, "step": 16727 }, { "epoch": 0.4928479884507167, "grad_norm": 1.5029757718737329, "learning_rate": 5.991018739155806e-06, "loss": 0.5017, "step": 16728 }, { "epoch": 0.49287745090818025, "grad_norm": 1.5094366479515189, "learning_rate": 5.9905146963821826e-06, "loss": 0.4342, "step": 16729 }, { "epoch": 0.49290691336564385, "grad_norm": 1.5104683409843633, "learning_rate": 5.990010643131198e-06, "loss": 0.5463, "step": 16730 }, { "epoch": 0.4929363758231074, "grad_norm": 1.5831154876863414, "learning_rate": 5.989506579408181e-06, "loss": 0.4701, "step": 16731 }, { "epoch": 0.492965838280571, "grad_norm": 1.6452600509192308, "learning_rate": 5.989002505218465e-06, "loss": 0.6619, "step": 16732 }, { "epoch": 0.49299530073803455, "grad_norm": 1.523418061176438, "learning_rate": 5.9884984205673834e-06, "loss": 0.549, "step": 16733 }, { "epoch": 0.49302476319549815, "grad_norm": 1.4289710201040045, "learning_rate": 5.987994325460266e-06, "loss": 0.4195, "step": 16734 }, { "epoch": 0.4930542256529617, "grad_norm": 1.6032137875234451, "learning_rate": 5.987490219902444e-06, "loss": 0.486, "step": 16735 }, { "epoch": 0.4930836881104253, "grad_norm": 1.5612746564658164, "learning_rate": 5.986986103899254e-06, "loss": 0.5016, "step": 16736 }, { "epoch": 0.49311315056788885, "grad_norm": 1.4943459444825695, "learning_rate": 5.986481977456026e-06, "loss": 0.4325, "step": 16737 }, { "epoch": 0.49314261302535245, "grad_norm": 1.6570368875310735, "learning_rate": 5.98597784057809e-06, "loss": 0.4936, "step": 16738 }, { "epoch": 0.493172075482816, "grad_norm": 1.455593525670069, "learning_rate": 5.985473693270783e-06, "loss": 0.3979, "step": 16739 }, { "epoch": 0.4932015379402796, "grad_norm": 1.381458516139345, "learning_rate": 5.984969535539433e-06, "loss": 0.2857, "step": 16740 }, { "epoch": 0.4932310003977432, "grad_norm": 1.3882607940967677, "learning_rate": 5.984465367389377e-06, "loss": 0.4431, "step": 16741 }, { "epoch": 0.49326046285520675, "grad_norm": 1.4246411585952061, "learning_rate": 5.983961188825949e-06, "loss": 0.4271, "step": 16742 }, { "epoch": 0.49328992531267035, "grad_norm": 1.4545714822134268, "learning_rate": 5.983456999854476e-06, "loss": 0.3524, "step": 16743 }, { "epoch": 0.4933193877701339, "grad_norm": 1.7864067597548168, "learning_rate": 5.982952800480294e-06, "loss": 0.4726, "step": 16744 }, { "epoch": 0.4933488502275975, "grad_norm": 1.3306874085000484, "learning_rate": 5.9824485907087395e-06, "loss": 0.385, "step": 16745 }, { "epoch": 0.49337831268506105, "grad_norm": 1.6070458876648535, "learning_rate": 5.981944370545141e-06, "loss": 0.4247, "step": 16746 }, { "epoch": 0.49340777514252465, "grad_norm": 1.447186003334742, "learning_rate": 5.981440139994835e-06, "loss": 0.3931, "step": 16747 }, { "epoch": 0.4934372375999882, "grad_norm": 1.438785425467521, "learning_rate": 5.980935899063156e-06, "loss": 0.5015, "step": 16748 }, { "epoch": 0.4934667000574518, "grad_norm": 1.4818221883410974, "learning_rate": 5.980431647755434e-06, "loss": 0.3802, "step": 16749 }, { "epoch": 0.49349616251491535, "grad_norm": 1.4764045881369778, "learning_rate": 5.979927386077005e-06, "loss": 0.3907, "step": 16750 }, { "epoch": 0.49352562497237895, "grad_norm": 1.4242715250661697, "learning_rate": 5.9794231140332025e-06, "loss": 0.3864, "step": 16751 }, { "epoch": 0.4935550874298425, "grad_norm": 1.5876325286537383, "learning_rate": 5.978918831629362e-06, "loss": 0.4812, "step": 16752 }, { "epoch": 0.4935845498873061, "grad_norm": 1.5443533338101414, "learning_rate": 5.978414538870816e-06, "loss": 0.4229, "step": 16753 }, { "epoch": 0.4936140123447697, "grad_norm": 1.6942046125540933, "learning_rate": 5.977910235762898e-06, "loss": 0.6068, "step": 16754 }, { "epoch": 0.49364347480223325, "grad_norm": 1.620881363226871, "learning_rate": 5.977405922310945e-06, "loss": 0.521, "step": 16755 }, { "epoch": 0.49367293725969685, "grad_norm": 1.4492726240237952, "learning_rate": 5.976901598520289e-06, "loss": 0.3737, "step": 16756 }, { "epoch": 0.4937023997171604, "grad_norm": 1.6385559229089963, "learning_rate": 5.976397264396268e-06, "loss": 0.4204, "step": 16757 }, { "epoch": 0.493731862174624, "grad_norm": 1.4997972261207313, "learning_rate": 5.975892919944211e-06, "loss": 0.4707, "step": 16758 }, { "epoch": 0.49376132463208755, "grad_norm": 1.4532891556335341, "learning_rate": 5.975388565169458e-06, "loss": 0.3856, "step": 16759 }, { "epoch": 0.49379078708955115, "grad_norm": 1.7644912696703547, "learning_rate": 5.974884200077341e-06, "loss": 0.586, "step": 16760 }, { "epoch": 0.4938202495470147, "grad_norm": 1.4929856056415542, "learning_rate": 5.974379824673197e-06, "loss": 0.5068, "step": 16761 }, { "epoch": 0.4938497120044783, "grad_norm": 1.4583540350893194, "learning_rate": 5.97387543896236e-06, "loss": 0.4648, "step": 16762 }, { "epoch": 0.49387917446194185, "grad_norm": 1.6464818827757428, "learning_rate": 5.973371042950166e-06, "loss": 0.5364, "step": 16763 }, { "epoch": 0.49390863691940545, "grad_norm": 1.7212194451600369, "learning_rate": 5.972866636641947e-06, "loss": 0.4513, "step": 16764 }, { "epoch": 0.493938099376869, "grad_norm": 1.5860450611470487, "learning_rate": 5.972362220043043e-06, "loss": 0.4695, "step": 16765 }, { "epoch": 0.4939675618343326, "grad_norm": 1.5257603697231432, "learning_rate": 5.971857793158788e-06, "loss": 0.601, "step": 16766 }, { "epoch": 0.4939970242917962, "grad_norm": 1.5941439512836781, "learning_rate": 5.971353355994519e-06, "loss": 0.4811, "step": 16767 }, { "epoch": 0.49402648674925975, "grad_norm": 1.4433149563018908, "learning_rate": 5.970848908555568e-06, "loss": 0.4131, "step": 16768 }, { "epoch": 0.49405594920672335, "grad_norm": 1.4212581931501116, "learning_rate": 5.970344450847274e-06, "loss": 0.4925, "step": 16769 }, { "epoch": 0.4940854116641869, "grad_norm": 1.3466827939635093, "learning_rate": 5.969839982874971e-06, "loss": 0.4423, "step": 16770 }, { "epoch": 0.4941148741216505, "grad_norm": 1.430323429805238, "learning_rate": 5.969335504643997e-06, "loss": 0.4591, "step": 16771 }, { "epoch": 0.49414433657911405, "grad_norm": 1.5562492570004225, "learning_rate": 5.968831016159689e-06, "loss": 0.5331, "step": 16772 }, { "epoch": 0.49417379903657765, "grad_norm": 1.5322450183746925, "learning_rate": 5.968326517427379e-06, "loss": 0.4889, "step": 16773 }, { "epoch": 0.4942032614940412, "grad_norm": 1.45739955039125, "learning_rate": 5.967822008452407e-06, "loss": 0.3185, "step": 16774 }, { "epoch": 0.4942327239515048, "grad_norm": 1.3859148278110447, "learning_rate": 5.96731748924011e-06, "loss": 0.3922, "step": 16775 }, { "epoch": 0.49426218640896835, "grad_norm": 1.513049423235396, "learning_rate": 5.9668129597958225e-06, "loss": 0.4256, "step": 16776 }, { "epoch": 0.49429164886643195, "grad_norm": 1.4389050580264777, "learning_rate": 5.966308420124882e-06, "loss": 0.3797, "step": 16777 }, { "epoch": 0.4943211113238955, "grad_norm": 1.5703897406796046, "learning_rate": 5.965803870232627e-06, "loss": 0.4963, "step": 16778 }, { "epoch": 0.4943505737813591, "grad_norm": 1.6094347623416292, "learning_rate": 5.9652993101243916e-06, "loss": 0.4962, "step": 16779 }, { "epoch": 0.4943800362388227, "grad_norm": 1.6514228697155013, "learning_rate": 5.964794739805514e-06, "loss": 0.3877, "step": 16780 }, { "epoch": 0.49440949869628625, "grad_norm": 1.5469079068905602, "learning_rate": 5.964290159281332e-06, "loss": 0.3716, "step": 16781 }, { "epoch": 0.49443896115374986, "grad_norm": 1.6848211247238918, "learning_rate": 5.963785568557183e-06, "loss": 0.597, "step": 16782 }, { "epoch": 0.4944684236112134, "grad_norm": 1.4410842861071924, "learning_rate": 5.963280967638404e-06, "loss": 0.3816, "step": 16783 }, { "epoch": 0.494497886068677, "grad_norm": 1.5178307207790132, "learning_rate": 5.962776356530333e-06, "loss": 0.3574, "step": 16784 }, { "epoch": 0.49452734852614055, "grad_norm": 1.512315871519946, "learning_rate": 5.962271735238305e-06, "loss": 0.4978, "step": 16785 }, { "epoch": 0.49455681098360416, "grad_norm": 1.4852337606213162, "learning_rate": 5.961767103767661e-06, "loss": 0.4218, "step": 16786 }, { "epoch": 0.4945862734410677, "grad_norm": 1.5266484764034844, "learning_rate": 5.961262462123738e-06, "loss": 0.437, "step": 16787 }, { "epoch": 0.4946157358985313, "grad_norm": 1.4260722817861566, "learning_rate": 5.960757810311875e-06, "loss": 0.5388, "step": 16788 }, { "epoch": 0.49464519835599485, "grad_norm": 1.655993964563504, "learning_rate": 5.960253148337406e-06, "loss": 0.5256, "step": 16789 }, { "epoch": 0.49467466081345846, "grad_norm": 1.5899063858997886, "learning_rate": 5.959748476205673e-06, "loss": 0.5076, "step": 16790 }, { "epoch": 0.494704123270922, "grad_norm": 1.5203870509607107, "learning_rate": 5.959243793922013e-06, "loss": 0.4696, "step": 16791 }, { "epoch": 0.4947335857283856, "grad_norm": 1.5008492187843685, "learning_rate": 5.958739101491765e-06, "loss": 0.4803, "step": 16792 }, { "epoch": 0.4947630481858492, "grad_norm": 1.393831244603906, "learning_rate": 5.958234398920266e-06, "loss": 0.4359, "step": 16793 }, { "epoch": 0.49479251064331276, "grad_norm": 1.505379843610468, "learning_rate": 5.957729686212856e-06, "loss": 0.498, "step": 16794 }, { "epoch": 0.49482197310077636, "grad_norm": 1.5474893131292156, "learning_rate": 5.957224963374873e-06, "loss": 0.3495, "step": 16795 }, { "epoch": 0.4948514355582399, "grad_norm": 1.544887079953219, "learning_rate": 5.956720230411658e-06, "loss": 0.4033, "step": 16796 }, { "epoch": 0.4948808980157035, "grad_norm": 1.4977098069672183, "learning_rate": 5.956215487328547e-06, "loss": 0.5062, "step": 16797 }, { "epoch": 0.49491036047316705, "grad_norm": 1.5074236663468854, "learning_rate": 5.955710734130878e-06, "loss": 0.3373, "step": 16798 }, { "epoch": 0.49493982293063066, "grad_norm": 1.5105139188408447, "learning_rate": 5.955205970823995e-06, "loss": 0.5471, "step": 16799 }, { "epoch": 0.4949692853880942, "grad_norm": 1.545324824809606, "learning_rate": 5.954701197413232e-06, "loss": 0.478, "step": 16800 }, { "epoch": 0.4949987478455578, "grad_norm": 1.6986413220069323, "learning_rate": 5.954196413903931e-06, "loss": 0.6867, "step": 16801 }, { "epoch": 0.49502821030302135, "grad_norm": 1.4992463089717682, "learning_rate": 5.953691620301432e-06, "loss": 0.4457, "step": 16802 }, { "epoch": 0.49505767276048496, "grad_norm": 1.6146487880264069, "learning_rate": 5.953186816611074e-06, "loss": 0.4721, "step": 16803 }, { "epoch": 0.4950871352179485, "grad_norm": 1.4283902221913316, "learning_rate": 5.9526820028381945e-06, "loss": 0.3789, "step": 16804 }, { "epoch": 0.4951165976754121, "grad_norm": 1.399413800795537, "learning_rate": 5.952177178988138e-06, "loss": 0.4041, "step": 16805 }, { "epoch": 0.4951460601328757, "grad_norm": 1.6113201877411123, "learning_rate": 5.95167234506624e-06, "loss": 0.4564, "step": 16806 }, { "epoch": 0.49517552259033926, "grad_norm": 1.5915331507317845, "learning_rate": 5.951167501077841e-06, "loss": 0.3242, "step": 16807 }, { "epoch": 0.49520498504780286, "grad_norm": 1.4500681392936443, "learning_rate": 5.950662647028283e-06, "loss": 0.4841, "step": 16808 }, { "epoch": 0.4952344475052664, "grad_norm": 1.8002072167977097, "learning_rate": 5.950157782922905e-06, "loss": 0.3921, "step": 16809 }, { "epoch": 0.49526390996273, "grad_norm": 1.6964822396362482, "learning_rate": 5.9496529087670475e-06, "loss": 0.381, "step": 16810 }, { "epoch": 0.49529337242019356, "grad_norm": 1.640855526612684, "learning_rate": 5.949148024566052e-06, "loss": 0.4003, "step": 16811 }, { "epoch": 0.49532283487765716, "grad_norm": 1.3332390176067512, "learning_rate": 5.948643130325257e-06, "loss": 0.4009, "step": 16812 }, { "epoch": 0.4953522973351207, "grad_norm": 1.6160280816068477, "learning_rate": 5.948138226050004e-06, "loss": 0.4864, "step": 16813 }, { "epoch": 0.4953817597925843, "grad_norm": 1.6673587572855422, "learning_rate": 5.947633311745633e-06, "loss": 0.4645, "step": 16814 }, { "epoch": 0.49541122225004786, "grad_norm": 1.4408960031308435, "learning_rate": 5.947128387417486e-06, "loss": 0.5052, "step": 16815 }, { "epoch": 0.49544068470751146, "grad_norm": 1.505056531495866, "learning_rate": 5.9466234530709045e-06, "loss": 0.4207, "step": 16816 }, { "epoch": 0.495470147164975, "grad_norm": 1.5008285776593289, "learning_rate": 5.946118508711229e-06, "loss": 0.5118, "step": 16817 }, { "epoch": 0.4954996096224386, "grad_norm": 1.5275817115997157, "learning_rate": 5.9456135543438e-06, "loss": 0.5046, "step": 16818 }, { "epoch": 0.4955290720799022, "grad_norm": 1.409951498867524, "learning_rate": 5.945108589973958e-06, "loss": 0.4404, "step": 16819 }, { "epoch": 0.49555853453736576, "grad_norm": 1.4446649389394057, "learning_rate": 5.944603615607046e-06, "loss": 0.3924, "step": 16820 }, { "epoch": 0.49558799699482936, "grad_norm": 1.5937886124462604, "learning_rate": 5.944098631248404e-06, "loss": 0.5494, "step": 16821 }, { "epoch": 0.4956174594522929, "grad_norm": 1.465199923687064, "learning_rate": 5.943593636903376e-06, "loss": 0.4913, "step": 16822 }, { "epoch": 0.4956469219097565, "grad_norm": 1.5448683561125847, "learning_rate": 5.9430886325773006e-06, "loss": 0.509, "step": 16823 }, { "epoch": 0.49567638436722006, "grad_norm": 1.4977884316423438, "learning_rate": 5.942583618275522e-06, "loss": 0.3789, "step": 16824 }, { "epoch": 0.49570584682468366, "grad_norm": 1.7604257818191331, "learning_rate": 5.94207859400338e-06, "loss": 0.7118, "step": 16825 }, { "epoch": 0.4957353092821472, "grad_norm": 1.8604326357145287, "learning_rate": 5.9415735597662196e-06, "loss": 0.4442, "step": 16826 }, { "epoch": 0.4957647717396108, "grad_norm": 1.508667309316076, "learning_rate": 5.941068515569381e-06, "loss": 0.4151, "step": 16827 }, { "epoch": 0.49579423419707436, "grad_norm": 1.438797964733283, "learning_rate": 5.940563461418204e-06, "loss": 0.4472, "step": 16828 }, { "epoch": 0.49582369665453796, "grad_norm": 1.6021746751422998, "learning_rate": 5.9400583973180345e-06, "loss": 0.5199, "step": 16829 }, { "epoch": 0.4958531591120015, "grad_norm": 1.7227476082103854, "learning_rate": 5.939553323274214e-06, "loss": 0.4553, "step": 16830 }, { "epoch": 0.4958826215694651, "grad_norm": 1.3531924875566637, "learning_rate": 5.9390482392920845e-06, "loss": 0.451, "step": 16831 }, { "epoch": 0.4959120840269287, "grad_norm": 1.439912327004712, "learning_rate": 5.938543145376989e-06, "loss": 0.4652, "step": 16832 }, { "epoch": 0.49594154648439226, "grad_norm": 1.519227324879538, "learning_rate": 5.938038041534272e-06, "loss": 0.4881, "step": 16833 }, { "epoch": 0.49597100894185586, "grad_norm": 1.6164109378721845, "learning_rate": 5.937532927769272e-06, "loss": 0.5133, "step": 16834 }, { "epoch": 0.4960004713993194, "grad_norm": 1.7366409338467859, "learning_rate": 5.937027804087334e-06, "loss": 0.5205, "step": 16835 }, { "epoch": 0.496029933856783, "grad_norm": 1.511664466101427, "learning_rate": 5.936522670493804e-06, "loss": 0.469, "step": 16836 }, { "epoch": 0.49605939631424656, "grad_norm": 1.580566526058212, "learning_rate": 5.936017526994021e-06, "loss": 0.4841, "step": 16837 }, { "epoch": 0.49608885877171016, "grad_norm": 1.625255606388467, "learning_rate": 5.9355123735933305e-06, "loss": 0.4633, "step": 16838 }, { "epoch": 0.4961183212291737, "grad_norm": 1.4154883315879012, "learning_rate": 5.935007210297074e-06, "loss": 0.3785, "step": 16839 }, { "epoch": 0.4961477836866373, "grad_norm": 1.500957350075016, "learning_rate": 5.934502037110597e-06, "loss": 0.3618, "step": 16840 }, { "epoch": 0.49617724614410086, "grad_norm": 1.6202583169199796, "learning_rate": 5.933996854039242e-06, "loss": 0.5122, "step": 16841 }, { "epoch": 0.49620670860156446, "grad_norm": 1.639636560189256, "learning_rate": 5.933491661088355e-06, "loss": 0.4148, "step": 16842 }, { "epoch": 0.496236171059028, "grad_norm": 1.4058699751963388, "learning_rate": 5.932986458263276e-06, "loss": 0.2846, "step": 16843 }, { "epoch": 0.4962656335164916, "grad_norm": 1.4531722688945992, "learning_rate": 5.932481245569349e-06, "loss": 0.3322, "step": 16844 }, { "epoch": 0.4962950959739552, "grad_norm": 1.3460536937406526, "learning_rate": 5.931976023011923e-06, "loss": 0.3478, "step": 16845 }, { "epoch": 0.49632455843141876, "grad_norm": 1.5975869695887224, "learning_rate": 5.931470790596336e-06, "loss": 0.5759, "step": 16846 }, { "epoch": 0.49635402088888236, "grad_norm": 1.647395593279015, "learning_rate": 5.930965548327936e-06, "loss": 0.4357, "step": 16847 }, { "epoch": 0.4963834833463459, "grad_norm": 1.7141655252259431, "learning_rate": 5.930460296212066e-06, "loss": 0.6109, "step": 16848 }, { "epoch": 0.4964129458038095, "grad_norm": 1.490829687007388, "learning_rate": 5.929955034254069e-06, "loss": 0.5258, "step": 16849 }, { "epoch": 0.49644240826127306, "grad_norm": 1.4739818556131412, "learning_rate": 5.929449762459292e-06, "loss": 0.3832, "step": 16850 }, { "epoch": 0.49647187071873666, "grad_norm": 1.6345516811065588, "learning_rate": 5.9289444808330794e-06, "loss": 0.5368, "step": 16851 }, { "epoch": 0.4965013331762002, "grad_norm": 1.6675400699640142, "learning_rate": 5.928439189380776e-06, "loss": 0.5901, "step": 16852 }, { "epoch": 0.4965307956336638, "grad_norm": 1.464850912403504, "learning_rate": 5.9279338881077245e-06, "loss": 0.4334, "step": 16853 }, { "epoch": 0.49656025809112736, "grad_norm": 1.3655346741517282, "learning_rate": 5.927428577019271e-06, "loss": 0.4194, "step": 16854 }, { "epoch": 0.49658972054859096, "grad_norm": 1.6096815409496859, "learning_rate": 5.92692325612076e-06, "loss": 0.4908, "step": 16855 }, { "epoch": 0.4966191830060545, "grad_norm": 1.7606770560010625, "learning_rate": 5.926417925417538e-06, "loss": 0.4079, "step": 16856 }, { "epoch": 0.4966486454635181, "grad_norm": 1.3965365195340886, "learning_rate": 5.925912584914952e-06, "loss": 0.3987, "step": 16857 }, { "epoch": 0.4966781079209817, "grad_norm": 1.4748684498443845, "learning_rate": 5.9254072346183415e-06, "loss": 0.4406, "step": 16858 }, { "epoch": 0.49670757037844526, "grad_norm": 1.437162802363185, "learning_rate": 5.924901874533055e-06, "loss": 0.3672, "step": 16859 }, { "epoch": 0.49673703283590886, "grad_norm": 1.5401299237818091, "learning_rate": 5.924396504664441e-06, "loss": 0.5072, "step": 16860 }, { "epoch": 0.4967664952933724, "grad_norm": 1.5900601619508896, "learning_rate": 5.923891125017841e-06, "loss": 0.5957, "step": 16861 }, { "epoch": 0.496795957750836, "grad_norm": 1.4416046527929745, "learning_rate": 5.923385735598603e-06, "loss": 0.3805, "step": 16862 }, { "epoch": 0.49682542020829956, "grad_norm": 1.7614734195611723, "learning_rate": 5.922880336412072e-06, "loss": 0.537, "step": 16863 }, { "epoch": 0.49685488266576316, "grad_norm": 1.4813105567102411, "learning_rate": 5.922374927463593e-06, "loss": 0.4113, "step": 16864 }, { "epoch": 0.4968843451232267, "grad_norm": 1.5402101134060002, "learning_rate": 5.921869508758513e-06, "loss": 0.4336, "step": 16865 }, { "epoch": 0.4969138075806903, "grad_norm": 1.555261401556832, "learning_rate": 5.921364080302179e-06, "loss": 0.424, "step": 16866 }, { "epoch": 0.49694327003815386, "grad_norm": 1.4570082727699183, "learning_rate": 5.920858642099937e-06, "loss": 0.4951, "step": 16867 }, { "epoch": 0.49697273249561746, "grad_norm": 1.4210253845684953, "learning_rate": 5.920353194157133e-06, "loss": 0.4147, "step": 16868 }, { "epoch": 0.497002194953081, "grad_norm": 1.4372671158620185, "learning_rate": 5.919847736479113e-06, "loss": 0.3397, "step": 16869 }, { "epoch": 0.4970316574105446, "grad_norm": 1.491793761531489, "learning_rate": 5.919342269071223e-06, "loss": 0.3992, "step": 16870 }, { "epoch": 0.4970611198680082, "grad_norm": 1.4998365128614415, "learning_rate": 5.918836791938811e-06, "loss": 0.33, "step": 16871 }, { "epoch": 0.49709058232547176, "grad_norm": 1.6823880920219472, "learning_rate": 5.918331305087226e-06, "loss": 0.4709, "step": 16872 }, { "epoch": 0.49712004478293537, "grad_norm": 1.45390688510081, "learning_rate": 5.917825808521809e-06, "loss": 0.4208, "step": 16873 }, { "epoch": 0.4971495072403989, "grad_norm": 1.5044560647029344, "learning_rate": 5.917320302247911e-06, "loss": 0.4761, "step": 16874 }, { "epoch": 0.4971789696978625, "grad_norm": 1.8578293358323092, "learning_rate": 5.91681478627088e-06, "loss": 0.4592, "step": 16875 }, { "epoch": 0.49720843215532606, "grad_norm": 1.4278386896737765, "learning_rate": 5.91630926059606e-06, "loss": 0.452, "step": 16876 }, { "epoch": 0.49723789461278967, "grad_norm": 1.6560551550056946, "learning_rate": 5.915803725228801e-06, "loss": 0.4477, "step": 16877 }, { "epoch": 0.4972673570702532, "grad_norm": 1.5816125594357744, "learning_rate": 5.9152981801744495e-06, "loss": 0.3931, "step": 16878 }, { "epoch": 0.4972968195277168, "grad_norm": 1.5444456726868556, "learning_rate": 5.914792625438352e-06, "loss": 0.6188, "step": 16879 }, { "epoch": 0.49732628198518036, "grad_norm": 1.7735545173690401, "learning_rate": 5.914287061025856e-06, "loss": 0.6881, "step": 16880 }, { "epoch": 0.49735574444264397, "grad_norm": 1.5140182674177847, "learning_rate": 5.9137814869423114e-06, "loss": 0.4086, "step": 16881 }, { "epoch": 0.4973852069001075, "grad_norm": 1.5645211820040896, "learning_rate": 5.913275903193065e-06, "loss": 0.4485, "step": 16882 }, { "epoch": 0.4974146693575711, "grad_norm": 1.4929965249264248, "learning_rate": 5.912770309783463e-06, "loss": 0.4572, "step": 16883 }, { "epoch": 0.4974441318150347, "grad_norm": 1.41135711831312, "learning_rate": 5.912264706718856e-06, "loss": 0.4231, "step": 16884 }, { "epoch": 0.49747359427249827, "grad_norm": 1.4365073437990135, "learning_rate": 5.91175909400459e-06, "loss": 0.4535, "step": 16885 }, { "epoch": 0.49750305672996187, "grad_norm": 1.4872388204748808, "learning_rate": 5.911253471646016e-06, "loss": 0.4477, "step": 16886 }, { "epoch": 0.4975325191874254, "grad_norm": 1.6088289572733323, "learning_rate": 5.9107478396484795e-06, "loss": 0.4167, "step": 16887 }, { "epoch": 0.497561981644889, "grad_norm": 1.5844869019589782, "learning_rate": 5.910242198017328e-06, "loss": 0.5508, "step": 16888 }, { "epoch": 0.49759144410235256, "grad_norm": 1.924803507928235, "learning_rate": 5.909736546757914e-06, "loss": 0.3993, "step": 16889 }, { "epoch": 0.49762090655981617, "grad_norm": 1.5390684483730794, "learning_rate": 5.909230885875584e-06, "loss": 0.3799, "step": 16890 }, { "epoch": 0.4976503690172797, "grad_norm": 1.5977832929156204, "learning_rate": 5.908725215375688e-06, "loss": 0.6584, "step": 16891 }, { "epoch": 0.4976798314747433, "grad_norm": 1.5222134647456536, "learning_rate": 5.908219535263571e-06, "loss": 0.4104, "step": 16892 }, { "epoch": 0.49770929393220686, "grad_norm": 1.4972450594317614, "learning_rate": 5.907713845544586e-06, "loss": 0.3603, "step": 16893 }, { "epoch": 0.49773875638967047, "grad_norm": 1.4870972990175326, "learning_rate": 5.90720814622408e-06, "loss": 0.3759, "step": 16894 }, { "epoch": 0.497768218847134, "grad_norm": 1.8636602749442712, "learning_rate": 5.906702437307403e-06, "loss": 0.4148, "step": 16895 }, { "epoch": 0.4977976813045976, "grad_norm": 1.4654100724072443, "learning_rate": 5.906196718799904e-06, "loss": 0.409, "step": 16896 }, { "epoch": 0.4978271437620612, "grad_norm": 1.3960928681788054, "learning_rate": 5.905690990706933e-06, "loss": 0.3454, "step": 16897 }, { "epoch": 0.49785660621952477, "grad_norm": 1.4687613870722216, "learning_rate": 5.905185253033836e-06, "loss": 0.3224, "step": 16898 }, { "epoch": 0.49788606867698837, "grad_norm": 1.739557197469044, "learning_rate": 5.904679505785969e-06, "loss": 0.5159, "step": 16899 }, { "epoch": 0.4979155311344519, "grad_norm": 1.7705028287631968, "learning_rate": 5.904173748968675e-06, "loss": 0.6256, "step": 16900 }, { "epoch": 0.4979449935919155, "grad_norm": 1.4216547741081351, "learning_rate": 5.903667982587308e-06, "loss": 0.4702, "step": 16901 }, { "epoch": 0.49797445604937907, "grad_norm": 1.4127879724003947, "learning_rate": 5.903162206647215e-06, "loss": 0.4655, "step": 16902 }, { "epoch": 0.49800391850684267, "grad_norm": 1.2655454296380997, "learning_rate": 5.90265642115375e-06, "loss": 0.2487, "step": 16903 }, { "epoch": 0.4980333809643062, "grad_norm": 1.520182917588618, "learning_rate": 5.902150626112258e-06, "loss": 0.3727, "step": 16904 }, { "epoch": 0.4980628434217698, "grad_norm": 1.6288575423458391, "learning_rate": 5.901644821528093e-06, "loss": 0.4778, "step": 16905 }, { "epoch": 0.49809230587923337, "grad_norm": 1.6707025995653646, "learning_rate": 5.9011390074066045e-06, "loss": 0.5448, "step": 16906 }, { "epoch": 0.49812176833669697, "grad_norm": 1.65609851159367, "learning_rate": 5.900633183753141e-06, "loss": 0.5268, "step": 16907 }, { "epoch": 0.4981512307941605, "grad_norm": 1.4769802292651006, "learning_rate": 5.900127350573055e-06, "loss": 0.4663, "step": 16908 }, { "epoch": 0.4981806932516241, "grad_norm": 1.4986226723844582, "learning_rate": 5.899621507871695e-06, "loss": 0.4941, "step": 16909 }, { "epoch": 0.4982101557090877, "grad_norm": 1.5100306107048667, "learning_rate": 5.899115655654412e-06, "loss": 0.3107, "step": 16910 }, { "epoch": 0.49823961816655127, "grad_norm": 1.4306381625434375, "learning_rate": 5.89860979392656e-06, "loss": 0.4359, "step": 16911 }, { "epoch": 0.49826908062401487, "grad_norm": 1.5431408957547432, "learning_rate": 5.898103922693487e-06, "loss": 0.5065, "step": 16912 }, { "epoch": 0.4982985430814784, "grad_norm": 1.5362293184095253, "learning_rate": 5.897598041960544e-06, "loss": 0.4119, "step": 16913 }, { "epoch": 0.498328005538942, "grad_norm": 1.76531567695354, "learning_rate": 5.897092151733082e-06, "loss": 0.5246, "step": 16914 }, { "epoch": 0.49835746799640557, "grad_norm": 1.5892854961529346, "learning_rate": 5.896586252016453e-06, "loss": 0.4817, "step": 16915 }, { "epoch": 0.49838693045386917, "grad_norm": 1.6615631847050258, "learning_rate": 5.896080342816009e-06, "loss": 0.4523, "step": 16916 }, { "epoch": 0.4984163929113327, "grad_norm": 1.3888899317300045, "learning_rate": 5.8955744241370985e-06, "loss": 0.2895, "step": 16917 }, { "epoch": 0.4984458553687963, "grad_norm": 1.5564726869002854, "learning_rate": 5.895068495985074e-06, "loss": 0.3543, "step": 16918 }, { "epoch": 0.49847531782625987, "grad_norm": 1.6409960666516379, "learning_rate": 5.89456255836529e-06, "loss": 0.4839, "step": 16919 }, { "epoch": 0.49850478028372347, "grad_norm": 1.5042887791993282, "learning_rate": 5.894056611283094e-06, "loss": 0.4254, "step": 16920 }, { "epoch": 0.498534242741187, "grad_norm": 1.7722279084337633, "learning_rate": 5.893550654743843e-06, "loss": 0.4541, "step": 16921 }, { "epoch": 0.4985637051986506, "grad_norm": 1.6321804335559889, "learning_rate": 5.8930446887528815e-06, "loss": 0.4371, "step": 16922 }, { "epoch": 0.4985931676561142, "grad_norm": 1.7549728749932965, "learning_rate": 5.892538713315566e-06, "loss": 0.3596, "step": 16923 }, { "epoch": 0.49862263011357777, "grad_norm": 1.3332848010455378, "learning_rate": 5.89203272843725e-06, "loss": 0.3465, "step": 16924 }, { "epoch": 0.49865209257104137, "grad_norm": 1.4628350915507025, "learning_rate": 5.8915267341232814e-06, "loss": 0.3562, "step": 16925 }, { "epoch": 0.4986815550285049, "grad_norm": 1.525400027888128, "learning_rate": 5.891020730379016e-06, "loss": 0.4812, "step": 16926 }, { "epoch": 0.4987110174859685, "grad_norm": 1.8183310446953662, "learning_rate": 5.890514717209806e-06, "loss": 0.5671, "step": 16927 }, { "epoch": 0.49874047994343207, "grad_norm": 1.6387252291926258, "learning_rate": 5.890008694621001e-06, "loss": 0.4778, "step": 16928 }, { "epoch": 0.49876994240089567, "grad_norm": 1.306375690624326, "learning_rate": 5.889502662617955e-06, "loss": 0.3838, "step": 16929 }, { "epoch": 0.4987994048583592, "grad_norm": 1.4358815825423825, "learning_rate": 5.888996621206022e-06, "loss": 0.3878, "step": 16930 }, { "epoch": 0.4988288673158228, "grad_norm": 1.7153355405584463, "learning_rate": 5.888490570390555e-06, "loss": 0.5687, "step": 16931 }, { "epoch": 0.49885832977328637, "grad_norm": 1.6751795749521368, "learning_rate": 5.887984510176903e-06, "loss": 0.4007, "step": 16932 }, { "epoch": 0.49888779223074997, "grad_norm": 1.289910283220594, "learning_rate": 5.887478440570423e-06, "loss": 0.3423, "step": 16933 }, { "epoch": 0.4989172546882135, "grad_norm": 1.4308957608851909, "learning_rate": 5.886972361576465e-06, "loss": 0.4344, "step": 16934 }, { "epoch": 0.4989467171456771, "grad_norm": 1.57934474635207, "learning_rate": 5.886466273200385e-06, "loss": 0.5934, "step": 16935 }, { "epoch": 0.4989761796031407, "grad_norm": 1.4412679130813482, "learning_rate": 5.885960175447536e-06, "loss": 0.353, "step": 16936 }, { "epoch": 0.49900564206060427, "grad_norm": 1.4176324363815593, "learning_rate": 5.885454068323267e-06, "loss": 0.4708, "step": 16937 }, { "epoch": 0.4990351045180679, "grad_norm": 1.4931598936300157, "learning_rate": 5.884947951832937e-06, "loss": 0.3727, "step": 16938 }, { "epoch": 0.4990645669755314, "grad_norm": 1.3505027058142902, "learning_rate": 5.884441825981897e-06, "loss": 0.4538, "step": 16939 }, { "epoch": 0.499094029432995, "grad_norm": 1.6922699196104427, "learning_rate": 5.8839356907754995e-06, "loss": 0.4767, "step": 16940 }, { "epoch": 0.49912349189045857, "grad_norm": 1.526700798878064, "learning_rate": 5.883429546219102e-06, "loss": 0.5252, "step": 16941 }, { "epoch": 0.4991529543479222, "grad_norm": 1.4807913866877873, "learning_rate": 5.882923392318055e-06, "loss": 0.3814, "step": 16942 }, { "epoch": 0.4991824168053857, "grad_norm": 1.36417050065249, "learning_rate": 5.882417229077712e-06, "loss": 0.3262, "step": 16943 }, { "epoch": 0.4992118792628493, "grad_norm": 1.4674188148408596, "learning_rate": 5.88191105650343e-06, "loss": 0.2654, "step": 16944 }, { "epoch": 0.49924134172031287, "grad_norm": 1.6452601011979724, "learning_rate": 5.881404874600561e-06, "loss": 0.6207, "step": 16945 }, { "epoch": 0.4992708041777765, "grad_norm": 1.5722941528596008, "learning_rate": 5.880898683374461e-06, "loss": 0.3099, "step": 16946 }, { "epoch": 0.49930026663524, "grad_norm": 1.3721638686018311, "learning_rate": 5.880392482830483e-06, "loss": 0.4543, "step": 16947 }, { "epoch": 0.4993297290927036, "grad_norm": 1.4326870365048412, "learning_rate": 5.87988627297398e-06, "loss": 0.3362, "step": 16948 }, { "epoch": 0.4993591915501672, "grad_norm": 1.3759273644100396, "learning_rate": 5.879380053810309e-06, "loss": 0.4476, "step": 16949 }, { "epoch": 0.4993886540076308, "grad_norm": 1.752954098473689, "learning_rate": 5.878873825344824e-06, "loss": 0.591, "step": 16950 }, { "epoch": 0.4994181164650944, "grad_norm": 1.4887619380896044, "learning_rate": 5.87836758758288e-06, "loss": 0.5542, "step": 16951 }, { "epoch": 0.4994475789225579, "grad_norm": 1.4695536174327055, "learning_rate": 5.877861340529831e-06, "loss": 0.4266, "step": 16952 }, { "epoch": 0.4994770413800215, "grad_norm": 1.3389762098444995, "learning_rate": 5.877355084191032e-06, "loss": 0.2965, "step": 16953 }, { "epoch": 0.49950650383748507, "grad_norm": 1.5620667167725677, "learning_rate": 5.876848818571839e-06, "loss": 0.532, "step": 16954 }, { "epoch": 0.4995359662949487, "grad_norm": 1.3462638890449843, "learning_rate": 5.876342543677604e-06, "loss": 0.4938, "step": 16955 }, { "epoch": 0.4995654287524122, "grad_norm": 1.473816050394278, "learning_rate": 5.8758362595136874e-06, "loss": 0.4972, "step": 16956 }, { "epoch": 0.4995948912098758, "grad_norm": 1.4804476034031886, "learning_rate": 5.875329966085441e-06, "loss": 0.5393, "step": 16957 }, { "epoch": 0.49962435366733937, "grad_norm": 1.4242891235040807, "learning_rate": 5.8748236633982216e-06, "loss": 0.4115, "step": 16958 }, { "epoch": 0.499653816124803, "grad_norm": 1.6692619156281183, "learning_rate": 5.8743173514573815e-06, "loss": 0.5397, "step": 16959 }, { "epoch": 0.4996832785822665, "grad_norm": 1.6496476260332715, "learning_rate": 5.8738110302682816e-06, "loss": 0.4324, "step": 16960 }, { "epoch": 0.4997127410397301, "grad_norm": 1.5110730456729595, "learning_rate": 5.873304699836275e-06, "loss": 0.4586, "step": 16961 }, { "epoch": 0.4997422034971937, "grad_norm": 1.6293046333997254, "learning_rate": 5.872798360166716e-06, "loss": 0.4973, "step": 16962 }, { "epoch": 0.4997716659546573, "grad_norm": 1.5307607474729685, "learning_rate": 5.8722920112649625e-06, "loss": 0.4813, "step": 16963 }, { "epoch": 0.4998011284121209, "grad_norm": 1.534340374202411, "learning_rate": 5.8717856531363695e-06, "loss": 0.4288, "step": 16964 }, { "epoch": 0.4998305908695844, "grad_norm": 1.5415782892277277, "learning_rate": 5.871279285786293e-06, "loss": 0.4788, "step": 16965 }, { "epoch": 0.499860053327048, "grad_norm": 1.5373615899860378, "learning_rate": 5.870772909220093e-06, "loss": 0.4316, "step": 16966 }, { "epoch": 0.4998895157845116, "grad_norm": 1.6312986592526002, "learning_rate": 5.870266523443119e-06, "loss": 0.5249, "step": 16967 }, { "epoch": 0.4999189782419752, "grad_norm": 1.6600546442255484, "learning_rate": 5.869760128460732e-06, "loss": 0.5718, "step": 16968 }, { "epoch": 0.4999484406994387, "grad_norm": 1.3237460556860132, "learning_rate": 5.869253724278288e-06, "loss": 0.32, "step": 16969 }, { "epoch": 0.4999779031569023, "grad_norm": 1.6704332543899076, "learning_rate": 5.8687473109011415e-06, "loss": 0.5016, "step": 16970 }, { "epoch": 0.5000073656143659, "grad_norm": 1.49708932368358, "learning_rate": 5.8682408883346535e-06, "loss": 0.4835, "step": 16971 }, { "epoch": 0.5000368280718295, "grad_norm": 1.6058182362990745, "learning_rate": 5.867734456584175e-06, "loss": 0.4887, "step": 16972 }, { "epoch": 0.5000662905292931, "grad_norm": 1.5061390876629577, "learning_rate": 5.867228015655067e-06, "loss": 0.3881, "step": 16973 }, { "epoch": 0.5000957529867567, "grad_norm": 1.3724914035765188, "learning_rate": 5.866721565552685e-06, "loss": 0.4463, "step": 16974 }, { "epoch": 0.5001252154442202, "grad_norm": 1.6234850508518033, "learning_rate": 5.866215106282388e-06, "loss": 0.5215, "step": 16975 }, { "epoch": 0.5001546779016838, "grad_norm": 1.7449737618555639, "learning_rate": 5.865708637849531e-06, "loss": 0.6513, "step": 16976 }, { "epoch": 0.5001841403591474, "grad_norm": 1.5174538117350111, "learning_rate": 5.865202160259471e-06, "loss": 0.4276, "step": 16977 }, { "epoch": 0.500213602816611, "grad_norm": 1.4887821985383873, "learning_rate": 5.864695673517567e-06, "loss": 0.5524, "step": 16978 }, { "epoch": 0.5002430652740745, "grad_norm": 1.5342413468808174, "learning_rate": 5.864189177629175e-06, "loss": 0.4818, "step": 16979 }, { "epoch": 0.5002725277315381, "grad_norm": 1.303202951903171, "learning_rate": 5.863682672599652e-06, "loss": 0.3616, "step": 16980 }, { "epoch": 0.5003019901890017, "grad_norm": 1.6584803276113569, "learning_rate": 5.86317615843436e-06, "loss": 0.3574, "step": 16981 }, { "epoch": 0.5003314526464653, "grad_norm": 1.4708162163250356, "learning_rate": 5.862669635138652e-06, "loss": 0.401, "step": 16982 }, { "epoch": 0.5003609151039288, "grad_norm": 1.42238436841013, "learning_rate": 5.862163102717887e-06, "loss": 0.5112, "step": 16983 }, { "epoch": 0.5003903775613924, "grad_norm": 1.6322855600889994, "learning_rate": 5.861656561177424e-06, "loss": 0.3343, "step": 16984 }, { "epoch": 0.500419840018856, "grad_norm": 1.3892077148028548, "learning_rate": 5.861150010522621e-06, "loss": 0.4, "step": 16985 }, { "epoch": 0.5004493024763196, "grad_norm": 1.5564750763162596, "learning_rate": 5.860643450758835e-06, "loss": 0.527, "step": 16986 }, { "epoch": 0.5004787649337832, "grad_norm": 1.5108571346034185, "learning_rate": 5.860136881891426e-06, "loss": 0.4288, "step": 16987 }, { "epoch": 0.5005082273912467, "grad_norm": 1.435416720369291, "learning_rate": 5.8596303039257495e-06, "loss": 0.4252, "step": 16988 }, { "epoch": 0.5005376898487103, "grad_norm": 1.51317051825372, "learning_rate": 5.859123716867166e-06, "loss": 0.4997, "step": 16989 }, { "epoch": 0.5005671523061739, "grad_norm": 1.5824879760706572, "learning_rate": 5.8586171207210355e-06, "loss": 0.496, "step": 16990 }, { "epoch": 0.5005966147636375, "grad_norm": 1.3506195635003366, "learning_rate": 5.858110515492713e-06, "loss": 0.3468, "step": 16991 }, { "epoch": 0.500626077221101, "grad_norm": 1.4292963054056436, "learning_rate": 5.857603901187559e-06, "loss": 0.4901, "step": 16992 }, { "epoch": 0.5006555396785646, "grad_norm": 1.4494342142556766, "learning_rate": 5.857097277810933e-06, "loss": 0.4968, "step": 16993 }, { "epoch": 0.5006850021360282, "grad_norm": 1.6516540504480786, "learning_rate": 5.856590645368192e-06, "loss": 0.5513, "step": 16994 }, { "epoch": 0.5007144645934918, "grad_norm": 1.530422742417526, "learning_rate": 5.856084003864696e-06, "loss": 0.3556, "step": 16995 }, { "epoch": 0.5007439270509553, "grad_norm": 1.6313455347068009, "learning_rate": 5.855577353305807e-06, "loss": 0.4889, "step": 16996 }, { "epoch": 0.5007733895084189, "grad_norm": 1.4976862166838487, "learning_rate": 5.855070693696879e-06, "loss": 0.4474, "step": 16997 }, { "epoch": 0.5008028519658825, "grad_norm": 1.5827803068787865, "learning_rate": 5.854564025043274e-06, "loss": 0.4509, "step": 16998 }, { "epoch": 0.5008323144233461, "grad_norm": 1.537320377021127, "learning_rate": 5.854057347350352e-06, "loss": 0.5083, "step": 16999 }, { "epoch": 0.5008617768808097, "grad_norm": 1.5121065596382968, "learning_rate": 5.853550660623472e-06, "loss": 0.4412, "step": 17000 }, { "epoch": 0.5008912393382732, "grad_norm": 1.4918209172542491, "learning_rate": 5.853043964867992e-06, "loss": 0.5495, "step": 17001 }, { "epoch": 0.5009207017957368, "grad_norm": 1.6622058381031404, "learning_rate": 5.852537260089275e-06, "loss": 0.4281, "step": 17002 }, { "epoch": 0.5009501642532004, "grad_norm": 1.4874141007073478, "learning_rate": 5.852030546292677e-06, "loss": 0.5159, "step": 17003 }, { "epoch": 0.500979626710664, "grad_norm": 1.6312381536578855, "learning_rate": 5.85152382348356e-06, "loss": 0.5275, "step": 17004 }, { "epoch": 0.5010090891681275, "grad_norm": 1.4357619286898782, "learning_rate": 5.851017091667282e-06, "loss": 0.4626, "step": 17005 }, { "epoch": 0.5010385516255911, "grad_norm": 1.5347808785143124, "learning_rate": 5.850510350849208e-06, "loss": 0.3718, "step": 17006 }, { "epoch": 0.5010680140830547, "grad_norm": 1.41502850782581, "learning_rate": 5.850003601034692e-06, "loss": 0.251, "step": 17007 }, { "epoch": 0.5010974765405183, "grad_norm": 1.4122381088267162, "learning_rate": 5.849496842229098e-06, "loss": 0.4423, "step": 17008 }, { "epoch": 0.5011269389979818, "grad_norm": 1.6211930997544837, "learning_rate": 5.8489900744377835e-06, "loss": 0.5069, "step": 17009 }, { "epoch": 0.5011564014554454, "grad_norm": 1.4394656643630386, "learning_rate": 5.8484832976661125e-06, "loss": 0.4161, "step": 17010 }, { "epoch": 0.501185863912909, "grad_norm": 1.2889274907623933, "learning_rate": 5.847976511919443e-06, "loss": 0.3447, "step": 17011 }, { "epoch": 0.5012153263703726, "grad_norm": 1.3151433174590395, "learning_rate": 5.847469717203137e-06, "loss": 0.3108, "step": 17012 }, { "epoch": 0.5012447888278362, "grad_norm": 1.5042665297303148, "learning_rate": 5.846962913522554e-06, "loss": 0.4471, "step": 17013 }, { "epoch": 0.5012742512852997, "grad_norm": 1.606830627435511, "learning_rate": 5.846456100883057e-06, "loss": 0.4942, "step": 17014 }, { "epoch": 0.5013037137427633, "grad_norm": 1.719677225551499, "learning_rate": 5.845949279290003e-06, "loss": 0.4563, "step": 17015 }, { "epoch": 0.5013331762002269, "grad_norm": 1.512375385806433, "learning_rate": 5.8454424487487574e-06, "loss": 0.4947, "step": 17016 }, { "epoch": 0.5013626386576905, "grad_norm": 1.5178177404278546, "learning_rate": 5.844935609264677e-06, "loss": 0.4955, "step": 17017 }, { "epoch": 0.501392101115154, "grad_norm": 1.5005866522496294, "learning_rate": 5.844428760843127e-06, "loss": 0.5797, "step": 17018 }, { "epoch": 0.5014215635726176, "grad_norm": 1.4168791574218718, "learning_rate": 5.843921903489466e-06, "loss": 0.5061, "step": 17019 }, { "epoch": 0.5014510260300812, "grad_norm": 1.5415926220858953, "learning_rate": 5.843415037209057e-06, "loss": 0.3961, "step": 17020 }, { "epoch": 0.5014804884875448, "grad_norm": 1.4434792373813572, "learning_rate": 5.842908162007262e-06, "loss": 0.42, "step": 17021 }, { "epoch": 0.5015099509450083, "grad_norm": 1.7014109807396622, "learning_rate": 5.8424012778894376e-06, "loss": 0.4901, "step": 17022 }, { "epoch": 0.5015394134024719, "grad_norm": 1.5449038420796082, "learning_rate": 5.841894384860952e-06, "loss": 0.4967, "step": 17023 }, { "epoch": 0.5015688758599355, "grad_norm": 1.5435574891627437, "learning_rate": 5.841387482927163e-06, "loss": 0.5287, "step": 17024 }, { "epoch": 0.5015983383173991, "grad_norm": 1.4673568882029242, "learning_rate": 5.840880572093435e-06, "loss": 0.4919, "step": 17025 }, { "epoch": 0.5016278007748627, "grad_norm": 1.4666969178986222, "learning_rate": 5.840373652365128e-06, "loss": 0.4555, "step": 17026 }, { "epoch": 0.5016572632323262, "grad_norm": 1.629087461585749, "learning_rate": 5.839866723747605e-06, "loss": 0.5363, "step": 17027 }, { "epoch": 0.5016867256897898, "grad_norm": 1.5629153382239922, "learning_rate": 5.839359786246226e-06, "loss": 0.5411, "step": 17028 }, { "epoch": 0.5017161881472534, "grad_norm": 1.4959848643363811, "learning_rate": 5.838852839866357e-06, "loss": 0.3846, "step": 17029 }, { "epoch": 0.501745650604717, "grad_norm": 1.5566476119549528, "learning_rate": 5.8383458846133565e-06, "loss": 0.5413, "step": 17030 }, { "epoch": 0.5017751130621805, "grad_norm": 1.4832010148747432, "learning_rate": 5.83783892049259e-06, "loss": 0.4752, "step": 17031 }, { "epoch": 0.5018045755196441, "grad_norm": 1.4455495611382718, "learning_rate": 5.837331947509418e-06, "loss": 0.3703, "step": 17032 }, { "epoch": 0.5018340379771077, "grad_norm": 1.8451537283461499, "learning_rate": 5.8368249656692056e-06, "loss": 0.5616, "step": 17033 }, { "epoch": 0.5018635004345713, "grad_norm": 1.3527137020699915, "learning_rate": 5.836317974977311e-06, "loss": 0.3959, "step": 17034 }, { "epoch": 0.5018929628920348, "grad_norm": 1.5327975790738848, "learning_rate": 5.8358109754391025e-06, "loss": 0.521, "step": 17035 }, { "epoch": 0.5019224253494984, "grad_norm": 1.554926333329955, "learning_rate": 5.835303967059939e-06, "loss": 0.5034, "step": 17036 }, { "epoch": 0.501951887806962, "grad_norm": 1.7360666739801651, "learning_rate": 5.834796949845184e-06, "loss": 0.6294, "step": 17037 }, { "epoch": 0.5019813502644256, "grad_norm": 1.6907060365105857, "learning_rate": 5.8342899238002015e-06, "loss": 0.4425, "step": 17038 }, { "epoch": 0.5020108127218892, "grad_norm": 1.4927334734323745, "learning_rate": 5.833782888930355e-06, "loss": 0.4012, "step": 17039 }, { "epoch": 0.5020402751793527, "grad_norm": 1.493597984847042, "learning_rate": 5.833275845241006e-06, "loss": 0.4007, "step": 17040 }, { "epoch": 0.5020697376368163, "grad_norm": 1.672336275434998, "learning_rate": 5.832768792737521e-06, "loss": 0.5081, "step": 17041 }, { "epoch": 0.5020992000942799, "grad_norm": 1.5092184882748128, "learning_rate": 5.832261731425262e-06, "loss": 0.4455, "step": 17042 }, { "epoch": 0.5021286625517435, "grad_norm": 1.3649157835353964, "learning_rate": 5.83175466130959e-06, "loss": 0.3028, "step": 17043 }, { "epoch": 0.502158125009207, "grad_norm": 1.6527014272214822, "learning_rate": 5.83124758239587e-06, "loss": 0.452, "step": 17044 }, { "epoch": 0.5021875874666706, "grad_norm": 1.6684680048279454, "learning_rate": 5.830740494689468e-06, "loss": 0.4526, "step": 17045 }, { "epoch": 0.5022170499241342, "grad_norm": 1.6811580286814707, "learning_rate": 5.830233398195747e-06, "loss": 0.417, "step": 17046 }, { "epoch": 0.5022465123815978, "grad_norm": 1.459738891914513, "learning_rate": 5.829726292920068e-06, "loss": 0.5193, "step": 17047 }, { "epoch": 0.5022759748390613, "grad_norm": 1.5579786259770056, "learning_rate": 5.829219178867799e-06, "loss": 0.4382, "step": 17048 }, { "epoch": 0.5023054372965249, "grad_norm": 1.5185982209460345, "learning_rate": 5.828712056044301e-06, "loss": 0.446, "step": 17049 }, { "epoch": 0.5023348997539885, "grad_norm": 1.380907238709285, "learning_rate": 5.82820492445494e-06, "loss": 0.4057, "step": 17050 }, { "epoch": 0.5023643622114521, "grad_norm": 1.6814286975932118, "learning_rate": 5.82769778410508e-06, "loss": 0.503, "step": 17051 }, { "epoch": 0.5023938246689157, "grad_norm": 1.4327941355317342, "learning_rate": 5.827190635000084e-06, "loss": 0.3171, "step": 17052 }, { "epoch": 0.5024232871263792, "grad_norm": 1.2648932613483153, "learning_rate": 5.826683477145318e-06, "loss": 0.2917, "step": 17053 }, { "epoch": 0.5024527495838428, "grad_norm": 1.5591984201875173, "learning_rate": 5.8261763105461466e-06, "loss": 0.4444, "step": 17054 }, { "epoch": 0.5024822120413064, "grad_norm": 1.4298469410242438, "learning_rate": 5.825669135207933e-06, "loss": 0.4568, "step": 17055 }, { "epoch": 0.50251167449877, "grad_norm": 1.617977408313844, "learning_rate": 5.825161951136045e-06, "loss": 0.3994, "step": 17056 }, { "epoch": 0.5025411369562335, "grad_norm": 1.3873247880162727, "learning_rate": 5.824654758335843e-06, "loss": 0.3099, "step": 17057 }, { "epoch": 0.5025705994136971, "grad_norm": 1.2711670855230317, "learning_rate": 5.824147556812696e-06, "loss": 0.3558, "step": 17058 }, { "epoch": 0.5026000618711607, "grad_norm": 1.3735887942872218, "learning_rate": 5.823640346571966e-06, "loss": 0.3556, "step": 17059 }, { "epoch": 0.5026295243286243, "grad_norm": 1.4300032173661812, "learning_rate": 5.823133127619021e-06, "loss": 0.3896, "step": 17060 }, { "epoch": 0.5026589867860878, "grad_norm": 1.6537448920897457, "learning_rate": 5.822625899959223e-06, "loss": 0.5099, "step": 17061 }, { "epoch": 0.5026884492435514, "grad_norm": 1.4690265671318246, "learning_rate": 5.82211866359794e-06, "loss": 0.4749, "step": 17062 }, { "epoch": 0.502717911701015, "grad_norm": 1.4643966536506698, "learning_rate": 5.821611418540536e-06, "loss": 0.3407, "step": 17063 }, { "epoch": 0.5027473741584786, "grad_norm": 1.4755457195556787, "learning_rate": 5.821104164792376e-06, "loss": 0.3719, "step": 17064 }, { "epoch": 0.5027768366159422, "grad_norm": 1.7239578321740523, "learning_rate": 5.8205969023588275e-06, "loss": 0.6038, "step": 17065 }, { "epoch": 0.5028062990734057, "grad_norm": 1.4419427679078292, "learning_rate": 5.820089631245256e-06, "loss": 0.4525, "step": 17066 }, { "epoch": 0.5028357615308693, "grad_norm": 1.5817043934908397, "learning_rate": 5.819582351457024e-06, "loss": 0.4128, "step": 17067 }, { "epoch": 0.5028652239883329, "grad_norm": 1.7315278216803, "learning_rate": 5.8190750629995e-06, "loss": 0.2705, "step": 17068 }, { "epoch": 0.5028946864457965, "grad_norm": 1.644945500891072, "learning_rate": 5.818567765878051e-06, "loss": 0.5099, "step": 17069 }, { "epoch": 0.50292414890326, "grad_norm": 1.4576498090427403, "learning_rate": 5.81806046009804e-06, "loss": 0.4032, "step": 17070 }, { "epoch": 0.5029536113607236, "grad_norm": 1.4367461750396033, "learning_rate": 5.817553145664836e-06, "loss": 0.4168, "step": 17071 }, { "epoch": 0.5029830738181872, "grad_norm": 1.6632806475896866, "learning_rate": 5.817045822583805e-06, "loss": 0.5832, "step": 17072 }, { "epoch": 0.5030125362756508, "grad_norm": 1.5882272609391208, "learning_rate": 5.816538490860309e-06, "loss": 0.5534, "step": 17073 }, { "epoch": 0.5030419987331143, "grad_norm": 1.7876297854545353, "learning_rate": 5.816031150499719e-06, "loss": 0.6554, "step": 17074 }, { "epoch": 0.5030714611905779, "grad_norm": 1.3572299042394183, "learning_rate": 5.815523801507402e-06, "loss": 0.4449, "step": 17075 }, { "epoch": 0.5031009236480415, "grad_norm": 1.424270368079461, "learning_rate": 5.815016443888722e-06, "loss": 0.5209, "step": 17076 }, { "epoch": 0.5031303861055051, "grad_norm": 1.3013556563289672, "learning_rate": 5.8145090776490455e-06, "loss": 0.401, "step": 17077 }, { "epoch": 0.5031598485629687, "grad_norm": 1.596134366134619, "learning_rate": 5.814001702793741e-06, "loss": 0.5847, "step": 17078 }, { "epoch": 0.5031893110204322, "grad_norm": 1.4302036815581725, "learning_rate": 5.813494319328173e-06, "loss": 0.3381, "step": 17079 }, { "epoch": 0.5032187734778958, "grad_norm": 1.5524800884898495, "learning_rate": 5.812986927257711e-06, "loss": 0.3618, "step": 17080 }, { "epoch": 0.5032482359353594, "grad_norm": 1.3766844870293733, "learning_rate": 5.8124795265877234e-06, "loss": 0.423, "step": 17081 }, { "epoch": 0.503277698392823, "grad_norm": 1.385054016207587, "learning_rate": 5.8119721173235725e-06, "loss": 0.3623, "step": 17082 }, { "epoch": 0.5033071608502865, "grad_norm": 1.4378996219974112, "learning_rate": 5.811464699470628e-06, "loss": 0.4733, "step": 17083 }, { "epoch": 0.5033366233077501, "grad_norm": 1.7685443480058438, "learning_rate": 5.810957273034258e-06, "loss": 0.4958, "step": 17084 }, { "epoch": 0.5033660857652137, "grad_norm": 1.549269051783752, "learning_rate": 5.810449838019828e-06, "loss": 0.5025, "step": 17085 }, { "epoch": 0.5033955482226773, "grad_norm": 1.6600560918686518, "learning_rate": 5.809942394432708e-06, "loss": 0.5442, "step": 17086 }, { "epoch": 0.5034250106801408, "grad_norm": 1.532995417908636, "learning_rate": 5.809434942278264e-06, "loss": 0.4905, "step": 17087 }, { "epoch": 0.5034544731376044, "grad_norm": 1.5001296230544003, "learning_rate": 5.808927481561862e-06, "loss": 0.3779, "step": 17088 }, { "epoch": 0.503483935595068, "grad_norm": 1.5194965977278219, "learning_rate": 5.808420012288873e-06, "loss": 0.4289, "step": 17089 }, { "epoch": 0.5035133980525316, "grad_norm": 1.3394402176212339, "learning_rate": 5.807912534464664e-06, "loss": 0.3457, "step": 17090 }, { "epoch": 0.5035428605099952, "grad_norm": 1.6271295308761538, "learning_rate": 5.807405048094603e-06, "loss": 0.4645, "step": 17091 }, { "epoch": 0.5035723229674587, "grad_norm": 1.681855500963237, "learning_rate": 5.806897553184055e-06, "loss": 0.5798, "step": 17092 }, { "epoch": 0.5036017854249223, "grad_norm": 1.3792830501217987, "learning_rate": 5.806390049738393e-06, "loss": 0.3917, "step": 17093 }, { "epoch": 0.5036312478823859, "grad_norm": 1.7751485095908641, "learning_rate": 5.805882537762981e-06, "loss": 0.3372, "step": 17094 }, { "epoch": 0.5036607103398495, "grad_norm": 1.319698129640791, "learning_rate": 5.80537501726319e-06, "loss": 0.3329, "step": 17095 }, { "epoch": 0.503690172797313, "grad_norm": 1.6091119719815055, "learning_rate": 5.804867488244389e-06, "loss": 0.5087, "step": 17096 }, { "epoch": 0.5037196352547766, "grad_norm": 1.4988816720403437, "learning_rate": 5.804359950711943e-06, "loss": 0.422, "step": 17097 }, { "epoch": 0.5037490977122402, "grad_norm": 1.6627809790576944, "learning_rate": 5.803852404671222e-06, "loss": 0.5997, "step": 17098 }, { "epoch": 0.5037785601697038, "grad_norm": 1.3821886856804855, "learning_rate": 5.803344850127597e-06, "loss": 0.3807, "step": 17099 }, { "epoch": 0.5038080226271673, "grad_norm": 1.5415560163674653, "learning_rate": 5.802837287086434e-06, "loss": 0.5173, "step": 17100 }, { "epoch": 0.5038374850846309, "grad_norm": 1.4146137046942018, "learning_rate": 5.802329715553103e-06, "loss": 0.3755, "step": 17101 }, { "epoch": 0.5038669475420945, "grad_norm": 1.4878681933386366, "learning_rate": 5.801822135532973e-06, "loss": 0.4639, "step": 17102 }, { "epoch": 0.5038964099995581, "grad_norm": 1.4377670679020398, "learning_rate": 5.801314547031412e-06, "loss": 0.4085, "step": 17103 }, { "epoch": 0.5039258724570217, "grad_norm": 1.388215965384622, "learning_rate": 5.800806950053791e-06, "loss": 0.5131, "step": 17104 }, { "epoch": 0.5039553349144852, "grad_norm": 1.4664640353857794, "learning_rate": 5.800299344605478e-06, "loss": 0.4167, "step": 17105 }, { "epoch": 0.5039847973719488, "grad_norm": 1.5206737496359157, "learning_rate": 5.799791730691843e-06, "loss": 0.4622, "step": 17106 }, { "epoch": 0.5040142598294124, "grad_norm": 1.557188744752671, "learning_rate": 5.799284108318252e-06, "loss": 0.5739, "step": 17107 }, { "epoch": 0.504043722286876, "grad_norm": 1.557122938578624, "learning_rate": 5.798776477490081e-06, "loss": 0.5164, "step": 17108 }, { "epoch": 0.5040731847443395, "grad_norm": 1.6205391869383756, "learning_rate": 5.798268838212694e-06, "loss": 0.5394, "step": 17109 }, { "epoch": 0.5041026472018031, "grad_norm": 1.7363458893770094, "learning_rate": 5.797761190491462e-06, "loss": 0.4582, "step": 17110 }, { "epoch": 0.5041321096592667, "grad_norm": 2.1152228631707, "learning_rate": 5.797253534331757e-06, "loss": 0.5644, "step": 17111 }, { "epoch": 0.5041615721167303, "grad_norm": 1.3593258934555956, "learning_rate": 5.796745869738946e-06, "loss": 0.394, "step": 17112 }, { "epoch": 0.5041910345741938, "grad_norm": 1.3039096823478216, "learning_rate": 5.796238196718399e-06, "loss": 0.3209, "step": 17113 }, { "epoch": 0.5042204970316574, "grad_norm": 1.5630515211391038, "learning_rate": 5.795730515275488e-06, "loss": 0.5324, "step": 17114 }, { "epoch": 0.504249959489121, "grad_norm": 1.4639607874211544, "learning_rate": 5.795222825415582e-06, "loss": 0.4939, "step": 17115 }, { "epoch": 0.5042794219465846, "grad_norm": 1.5861144796110145, "learning_rate": 5.7947151271440525e-06, "loss": 0.5144, "step": 17116 }, { "epoch": 0.5043088844040482, "grad_norm": 1.4663269145522178, "learning_rate": 5.794207420466268e-06, "loss": 0.4418, "step": 17117 }, { "epoch": 0.5043383468615117, "grad_norm": 1.318748503262406, "learning_rate": 5.793699705387599e-06, "loss": 0.3721, "step": 17118 }, { "epoch": 0.5043678093189753, "grad_norm": 1.7585992002410653, "learning_rate": 5.793191981913415e-06, "loss": 0.5868, "step": 17119 }, { "epoch": 0.5043972717764389, "grad_norm": 1.4677770948760578, "learning_rate": 5.79268425004909e-06, "loss": 0.4561, "step": 17120 }, { "epoch": 0.5044267342339025, "grad_norm": 1.4706380845190947, "learning_rate": 5.7921765097999926e-06, "loss": 0.3539, "step": 17121 }, { "epoch": 0.504456196691366, "grad_norm": 1.838111373215445, "learning_rate": 5.791668761171492e-06, "loss": 0.6132, "step": 17122 }, { "epoch": 0.5044856591488296, "grad_norm": 1.4898742767028097, "learning_rate": 5.791161004168961e-06, "loss": 0.4299, "step": 17123 }, { "epoch": 0.5045151216062932, "grad_norm": 1.7300729136187776, "learning_rate": 5.790653238797771e-06, "loss": 0.5634, "step": 17124 }, { "epoch": 0.5045445840637568, "grad_norm": 1.4978210010934845, "learning_rate": 5.79014546506329e-06, "loss": 0.5045, "step": 17125 }, { "epoch": 0.5045740465212203, "grad_norm": 1.4174643970726581, "learning_rate": 5.7896376829708934e-06, "loss": 0.2984, "step": 17126 }, { "epoch": 0.5046035089786839, "grad_norm": 1.3786789882820227, "learning_rate": 5.78912989252595e-06, "loss": 0.3212, "step": 17127 }, { "epoch": 0.5046329714361475, "grad_norm": 1.5252088434266864, "learning_rate": 5.788622093733829e-06, "loss": 0.4451, "step": 17128 }, { "epoch": 0.5046624338936111, "grad_norm": 1.516578103674923, "learning_rate": 5.788114286599906e-06, "loss": 0.3109, "step": 17129 }, { "epoch": 0.5046918963510747, "grad_norm": 1.4274983851185434, "learning_rate": 5.787606471129549e-06, "loss": 0.3345, "step": 17130 }, { "epoch": 0.5047213588085382, "grad_norm": 1.4725759252044435, "learning_rate": 5.787098647328131e-06, "loss": 0.5496, "step": 17131 }, { "epoch": 0.5047508212660018, "grad_norm": 1.4482179284833916, "learning_rate": 5.786590815201023e-06, "loss": 0.4751, "step": 17132 }, { "epoch": 0.5047802837234654, "grad_norm": 1.5222254476040749, "learning_rate": 5.786082974753597e-06, "loss": 0.4472, "step": 17133 }, { "epoch": 0.504809746180929, "grad_norm": 1.7421111197919725, "learning_rate": 5.7855751259912255e-06, "loss": 0.5045, "step": 17134 }, { "epoch": 0.5048392086383925, "grad_norm": 1.6067086872744256, "learning_rate": 5.78506726891928e-06, "loss": 0.5018, "step": 17135 }, { "epoch": 0.5048686710958561, "grad_norm": 1.5385097460526205, "learning_rate": 5.784559403543133e-06, "loss": 0.4807, "step": 17136 }, { "epoch": 0.5048981335533197, "grad_norm": 1.5111505771620204, "learning_rate": 5.784051529868153e-06, "loss": 0.4783, "step": 17137 }, { "epoch": 0.5049275960107833, "grad_norm": 1.8479575508220387, "learning_rate": 5.783543647899718e-06, "loss": 0.6786, "step": 17138 }, { "epoch": 0.5049570584682468, "grad_norm": 1.5647408391885431, "learning_rate": 5.783035757643194e-06, "loss": 0.4839, "step": 17139 }, { "epoch": 0.5049865209257104, "grad_norm": 1.4748692952580704, "learning_rate": 5.782527859103957e-06, "loss": 0.6083, "step": 17140 }, { "epoch": 0.505015983383174, "grad_norm": 1.4640213330347416, "learning_rate": 5.782019952287381e-06, "loss": 0.4378, "step": 17141 }, { "epoch": 0.5050454458406376, "grad_norm": 1.557982840920822, "learning_rate": 5.781512037198835e-06, "loss": 0.5786, "step": 17142 }, { "epoch": 0.5050749082981012, "grad_norm": 1.4996515191764488, "learning_rate": 5.7810041138436925e-06, "loss": 0.5063, "step": 17143 }, { "epoch": 0.5051043707555647, "grad_norm": 1.428129342132415, "learning_rate": 5.780496182227327e-06, "loss": 0.394, "step": 17144 }, { "epoch": 0.5051338332130283, "grad_norm": 1.5300982829976384, "learning_rate": 5.77998824235511e-06, "loss": 0.5086, "step": 17145 }, { "epoch": 0.5051632956704919, "grad_norm": 1.5424816349532335, "learning_rate": 5.779480294232416e-06, "loss": 0.4895, "step": 17146 }, { "epoch": 0.5051927581279555, "grad_norm": 1.5026794229999918, "learning_rate": 5.778972337864616e-06, "loss": 0.3708, "step": 17147 }, { "epoch": 0.505222220585419, "grad_norm": 1.5489169092965334, "learning_rate": 5.778464373257086e-06, "loss": 0.4289, "step": 17148 }, { "epoch": 0.5052516830428826, "grad_norm": 1.423976507260179, "learning_rate": 5.777956400415194e-06, "loss": 0.2951, "step": 17149 }, { "epoch": 0.5052811455003462, "grad_norm": 1.3793450710129793, "learning_rate": 5.777448419344318e-06, "loss": 0.3646, "step": 17150 }, { "epoch": 0.5053106079578098, "grad_norm": 1.3438342663754945, "learning_rate": 5.776940430049831e-06, "loss": 0.3476, "step": 17151 }, { "epoch": 0.5053400704152733, "grad_norm": 1.4901624016906512, "learning_rate": 5.776432432537102e-06, "loss": 0.4908, "step": 17152 }, { "epoch": 0.5053695328727369, "grad_norm": 1.4125612158838807, "learning_rate": 5.775924426811509e-06, "loss": 0.4624, "step": 17153 }, { "epoch": 0.5053989953302005, "grad_norm": 1.4812660886003977, "learning_rate": 5.7754164128784244e-06, "loss": 0.424, "step": 17154 }, { "epoch": 0.5054284577876641, "grad_norm": 1.590247448853372, "learning_rate": 5.77490839074322e-06, "loss": 0.4586, "step": 17155 }, { "epoch": 0.5054579202451277, "grad_norm": 1.366091068401842, "learning_rate": 5.7744003604112715e-06, "loss": 0.4201, "step": 17156 }, { "epoch": 0.5054873827025912, "grad_norm": 1.5118928442816824, "learning_rate": 5.7738923218879526e-06, "loss": 0.3837, "step": 17157 }, { "epoch": 0.5055168451600548, "grad_norm": 1.3646310608132335, "learning_rate": 5.773384275178635e-06, "loss": 0.2849, "step": 17158 }, { "epoch": 0.5055463076175184, "grad_norm": 1.5336425230274804, "learning_rate": 5.772876220288696e-06, "loss": 0.3882, "step": 17159 }, { "epoch": 0.505575770074982, "grad_norm": 1.766027195695237, "learning_rate": 5.772368157223507e-06, "loss": 0.5237, "step": 17160 }, { "epoch": 0.5056052325324455, "grad_norm": 1.42809410178097, "learning_rate": 5.771860085988443e-06, "loss": 0.4584, "step": 17161 }, { "epoch": 0.5056346949899091, "grad_norm": 1.589583446719974, "learning_rate": 5.771352006588877e-06, "loss": 0.4537, "step": 17162 }, { "epoch": 0.5056641574473727, "grad_norm": 1.3820298591504705, "learning_rate": 5.770843919030187e-06, "loss": 0.3705, "step": 17163 }, { "epoch": 0.5056936199048363, "grad_norm": 1.5294855787464436, "learning_rate": 5.770335823317743e-06, "loss": 0.5779, "step": 17164 }, { "epoch": 0.5057230823622998, "grad_norm": 1.6603871296857038, "learning_rate": 5.769827719456923e-06, "loss": 0.4163, "step": 17165 }, { "epoch": 0.5057525448197634, "grad_norm": 1.3392917172515442, "learning_rate": 5.769319607453099e-06, "loss": 0.3438, "step": 17166 }, { "epoch": 0.505782007277227, "grad_norm": 1.3916299652875512, "learning_rate": 5.768811487311647e-06, "loss": 0.428, "step": 17167 }, { "epoch": 0.5058114697346906, "grad_norm": 1.3991455536082702, "learning_rate": 5.76830335903794e-06, "loss": 0.3627, "step": 17168 }, { "epoch": 0.5058409321921542, "grad_norm": 1.5011070196758802, "learning_rate": 5.767795222637356e-06, "loss": 0.5415, "step": 17169 }, { "epoch": 0.5058703946496177, "grad_norm": 1.6618830259549568, "learning_rate": 5.7672870781152665e-06, "loss": 0.5316, "step": 17170 }, { "epoch": 0.5058998571070813, "grad_norm": 1.6255182281816585, "learning_rate": 5.76677892547705e-06, "loss": 0.5631, "step": 17171 }, { "epoch": 0.5059293195645449, "grad_norm": 1.2912739641694744, "learning_rate": 5.7662707647280794e-06, "loss": 0.3166, "step": 17172 }, { "epoch": 0.5059587820220085, "grad_norm": 1.4473856709186013, "learning_rate": 5.76576259587373e-06, "loss": 0.4762, "step": 17173 }, { "epoch": 0.505988244479472, "grad_norm": 1.5315850041554018, "learning_rate": 5.765254418919376e-06, "loss": 0.3691, "step": 17174 }, { "epoch": 0.5060177069369356, "grad_norm": 1.4241537132096573, "learning_rate": 5.764746233870395e-06, "loss": 0.4268, "step": 17175 }, { "epoch": 0.5060471693943992, "grad_norm": 1.3935282221568832, "learning_rate": 5.7642380407321615e-06, "loss": 0.4191, "step": 17176 }, { "epoch": 0.5060766318518628, "grad_norm": 1.5467202910672897, "learning_rate": 5.76372983951005e-06, "loss": 0.4297, "step": 17177 }, { "epoch": 0.5061060943093263, "grad_norm": 1.5017105172065182, "learning_rate": 5.763221630209439e-06, "loss": 0.441, "step": 17178 }, { "epoch": 0.5061355567667899, "grad_norm": 1.543256937534914, "learning_rate": 5.7627134128356995e-06, "loss": 0.4058, "step": 17179 }, { "epoch": 0.5061650192242535, "grad_norm": 1.511877982459818, "learning_rate": 5.762205187394211e-06, "loss": 0.5611, "step": 17180 }, { "epoch": 0.5061944816817171, "grad_norm": 1.712137381599449, "learning_rate": 5.76169695389035e-06, "loss": 0.4443, "step": 17181 }, { "epoch": 0.5062239441391807, "grad_norm": 1.7167514304605656, "learning_rate": 5.76118871232949e-06, "loss": 0.3923, "step": 17182 }, { "epoch": 0.5062534065966442, "grad_norm": 1.4147575935221384, "learning_rate": 5.760680462717005e-06, "loss": 0.3624, "step": 17183 }, { "epoch": 0.5062828690541078, "grad_norm": 1.4013587677625323, "learning_rate": 5.760172205058278e-06, "loss": 0.2496, "step": 17184 }, { "epoch": 0.5063123315115714, "grad_norm": 1.5599305227851161, "learning_rate": 5.7596639393586785e-06, "loss": 0.4892, "step": 17185 }, { "epoch": 0.506341793969035, "grad_norm": 1.3680306217658527, "learning_rate": 5.759155665623586e-06, "loss": 0.3953, "step": 17186 }, { "epoch": 0.5063712564264985, "grad_norm": 1.6919043385809838, "learning_rate": 5.758647383858377e-06, "loss": 0.5476, "step": 17187 }, { "epoch": 0.5064007188839621, "grad_norm": 1.5576609678421485, "learning_rate": 5.7581390940684256e-06, "loss": 0.4752, "step": 17188 }, { "epoch": 0.5064301813414257, "grad_norm": 1.4829783576292597, "learning_rate": 5.757630796259111e-06, "loss": 0.3711, "step": 17189 }, { "epoch": 0.5064596437988893, "grad_norm": 1.57524208683798, "learning_rate": 5.757122490435809e-06, "loss": 0.3044, "step": 17190 }, { "epoch": 0.5064891062563528, "grad_norm": 1.4534033636891797, "learning_rate": 5.756614176603896e-06, "loss": 0.4678, "step": 17191 }, { "epoch": 0.5065185687138164, "grad_norm": 1.5629242773625416, "learning_rate": 5.756105854768748e-06, "loss": 0.4125, "step": 17192 }, { "epoch": 0.50654803117128, "grad_norm": 1.359697264203696, "learning_rate": 5.755597524935743e-06, "loss": 0.4383, "step": 17193 }, { "epoch": 0.5065774936287436, "grad_norm": 1.4261254328805126, "learning_rate": 5.7550891871102575e-06, "loss": 0.4408, "step": 17194 }, { "epoch": 0.5066069560862072, "grad_norm": 1.3303779759319765, "learning_rate": 5.754580841297668e-06, "loss": 0.4715, "step": 17195 }, { "epoch": 0.5066364185436707, "grad_norm": 1.4790872043547445, "learning_rate": 5.754072487503355e-06, "loss": 0.3812, "step": 17196 }, { "epoch": 0.5066658810011343, "grad_norm": 1.2630600986625915, "learning_rate": 5.75356412573269e-06, "loss": 0.4369, "step": 17197 }, { "epoch": 0.5066953434585979, "grad_norm": 1.5062688092578735, "learning_rate": 5.7530557559910545e-06, "loss": 0.4791, "step": 17198 }, { "epoch": 0.5067248059160615, "grad_norm": 1.585657012167691, "learning_rate": 5.752547378283826e-06, "loss": 0.4343, "step": 17199 }, { "epoch": 0.506754268373525, "grad_norm": 1.295847198300701, "learning_rate": 5.752038992616379e-06, "loss": 0.2944, "step": 17200 }, { "epoch": 0.5067837308309886, "grad_norm": 1.5595785462253084, "learning_rate": 5.751530598994092e-06, "loss": 0.5089, "step": 17201 }, { "epoch": 0.5068131932884522, "grad_norm": 1.578854486339828, "learning_rate": 5.751022197422347e-06, "loss": 0.4978, "step": 17202 }, { "epoch": 0.5068426557459158, "grad_norm": 1.7173594542261725, "learning_rate": 5.7505137879065145e-06, "loss": 0.4298, "step": 17203 }, { "epoch": 0.5068721182033793, "grad_norm": 1.4631234724323376, "learning_rate": 5.7500053704519766e-06, "loss": 0.5167, "step": 17204 }, { "epoch": 0.5069015806608429, "grad_norm": 1.4577840072593606, "learning_rate": 5.749496945064111e-06, "loss": 0.4303, "step": 17205 }, { "epoch": 0.5069310431183065, "grad_norm": 1.4520430650249758, "learning_rate": 5.748988511748296e-06, "loss": 0.4979, "step": 17206 }, { "epoch": 0.5069605055757701, "grad_norm": 1.5111550584927016, "learning_rate": 5.748480070509908e-06, "loss": 0.5239, "step": 17207 }, { "epoch": 0.5069899680332337, "grad_norm": 1.7589759954096609, "learning_rate": 5.747971621354326e-06, "loss": 0.6562, "step": 17208 }, { "epoch": 0.5070194304906972, "grad_norm": 1.4335274951461707, "learning_rate": 5.7474631642869276e-06, "loss": 0.5312, "step": 17209 }, { "epoch": 0.5070488929481608, "grad_norm": 1.4767599224402748, "learning_rate": 5.746954699313092e-06, "loss": 0.5837, "step": 17210 }, { "epoch": 0.5070783554056244, "grad_norm": 1.4204941516899365, "learning_rate": 5.746446226438197e-06, "loss": 0.3501, "step": 17211 }, { "epoch": 0.507107817863088, "grad_norm": 1.6862029393447802, "learning_rate": 5.7459377456676216e-06, "loss": 0.5238, "step": 17212 }, { "epoch": 0.5071372803205515, "grad_norm": 1.287339159432938, "learning_rate": 5.745429257006743e-06, "loss": 0.3806, "step": 17213 }, { "epoch": 0.5071667427780151, "grad_norm": 1.5565469067523097, "learning_rate": 5.744920760460942e-06, "loss": 0.4458, "step": 17214 }, { "epoch": 0.5071962052354787, "grad_norm": 1.6731184727774524, "learning_rate": 5.7444122560355966e-06, "loss": 0.5144, "step": 17215 }, { "epoch": 0.5072256676929423, "grad_norm": 1.8846874990937075, "learning_rate": 5.743903743736084e-06, "loss": 0.7219, "step": 17216 }, { "epoch": 0.5072551301504058, "grad_norm": 1.42722869975675, "learning_rate": 5.743395223567786e-06, "loss": 0.4454, "step": 17217 }, { "epoch": 0.5072845926078694, "grad_norm": 1.4535582755246863, "learning_rate": 5.742886695536078e-06, "loss": 0.4746, "step": 17218 }, { "epoch": 0.507314055065333, "grad_norm": 1.4867795703033504, "learning_rate": 5.7423781596463416e-06, "loss": 0.5655, "step": 17219 }, { "epoch": 0.5073435175227966, "grad_norm": 1.6197212313807683, "learning_rate": 5.741869615903956e-06, "loss": 0.4187, "step": 17220 }, { "epoch": 0.5073729799802602, "grad_norm": 1.875408799944073, "learning_rate": 5.741361064314299e-06, "loss": 0.4943, "step": 17221 }, { "epoch": 0.5074024424377237, "grad_norm": 1.389686657371568, "learning_rate": 5.74085250488275e-06, "loss": 0.3694, "step": 17222 }, { "epoch": 0.5074319048951873, "grad_norm": 1.4249982888264934, "learning_rate": 5.7403439376146905e-06, "loss": 0.3809, "step": 17223 }, { "epoch": 0.5074613673526509, "grad_norm": 1.641961700458773, "learning_rate": 5.739835362515498e-06, "loss": 0.5816, "step": 17224 }, { "epoch": 0.5074908298101145, "grad_norm": 1.3100527243521807, "learning_rate": 5.739326779590552e-06, "loss": 0.3408, "step": 17225 }, { "epoch": 0.507520292267578, "grad_norm": 1.5764254064324497, "learning_rate": 5.738818188845234e-06, "loss": 0.4946, "step": 17226 }, { "epoch": 0.5075497547250416, "grad_norm": 1.3731407534526707, "learning_rate": 5.738309590284921e-06, "loss": 0.4061, "step": 17227 }, { "epoch": 0.5075792171825052, "grad_norm": 1.8205589622378142, "learning_rate": 5.737800983914993e-06, "loss": 0.3802, "step": 17228 }, { "epoch": 0.5076086796399688, "grad_norm": 1.595921492405505, "learning_rate": 5.7372923697408335e-06, "loss": 0.4185, "step": 17229 }, { "epoch": 0.5076381420974323, "grad_norm": 1.6538202778066087, "learning_rate": 5.736783747767819e-06, "loss": 0.5985, "step": 17230 }, { "epoch": 0.5076676045548959, "grad_norm": 1.6140693655259633, "learning_rate": 5.73627511800133e-06, "loss": 0.414, "step": 17231 }, { "epoch": 0.5076970670123595, "grad_norm": 1.5868310539679245, "learning_rate": 5.735766480446749e-06, "loss": 0.4714, "step": 17232 }, { "epoch": 0.5077265294698231, "grad_norm": 1.4182194191961544, "learning_rate": 5.7352578351094515e-06, "loss": 0.4832, "step": 17233 }, { "epoch": 0.5077559919272867, "grad_norm": 1.4393630846782248, "learning_rate": 5.734749181994822e-06, "loss": 0.458, "step": 17234 }, { "epoch": 0.5077854543847502, "grad_norm": 1.4999605800767433, "learning_rate": 5.734240521108241e-06, "loss": 0.4954, "step": 17235 }, { "epoch": 0.5078149168422138, "grad_norm": 1.6306009555834944, "learning_rate": 5.733731852455087e-06, "loss": 0.4291, "step": 17236 }, { "epoch": 0.5078443792996774, "grad_norm": 1.6794035879992228, "learning_rate": 5.733223176040739e-06, "loss": 0.4219, "step": 17237 }, { "epoch": 0.507873841757141, "grad_norm": 1.6163183549066553, "learning_rate": 5.732714491870582e-06, "loss": 0.57, "step": 17238 }, { "epoch": 0.5079033042146045, "grad_norm": 1.4641159953274308, "learning_rate": 5.732205799949993e-06, "loss": 0.5211, "step": 17239 }, { "epoch": 0.5079327666720681, "grad_norm": 1.4779296195672396, "learning_rate": 5.731697100284355e-06, "loss": 0.4285, "step": 17240 }, { "epoch": 0.5079622291295317, "grad_norm": 1.4485025834670675, "learning_rate": 5.731188392879048e-06, "loss": 0.4524, "step": 17241 }, { "epoch": 0.5079916915869953, "grad_norm": 1.4550474608389414, "learning_rate": 5.730679677739452e-06, "loss": 0.2903, "step": 17242 }, { "epoch": 0.5080211540444588, "grad_norm": 1.6017400959298105, "learning_rate": 5.730170954870951e-06, "loss": 0.4177, "step": 17243 }, { "epoch": 0.5080506165019224, "grad_norm": 1.681015932589094, "learning_rate": 5.729662224278923e-06, "loss": 0.4915, "step": 17244 }, { "epoch": 0.508080078959386, "grad_norm": 1.4870630923157244, "learning_rate": 5.729153485968751e-06, "loss": 0.4655, "step": 17245 }, { "epoch": 0.5081095414168496, "grad_norm": 1.5142009321292658, "learning_rate": 5.728644739945814e-06, "loss": 0.5662, "step": 17246 }, { "epoch": 0.5081390038743132, "grad_norm": 1.3607699048832764, "learning_rate": 5.728135986215497e-06, "loss": 0.3108, "step": 17247 }, { "epoch": 0.5081684663317767, "grad_norm": 1.5253020367141854, "learning_rate": 5.727627224783177e-06, "loss": 0.5854, "step": 17248 }, { "epoch": 0.5081979287892403, "grad_norm": 1.3963846969186862, "learning_rate": 5.727118455654239e-06, "loss": 0.3585, "step": 17249 }, { "epoch": 0.5082273912467039, "grad_norm": 1.64569561586914, "learning_rate": 5.726609678834065e-06, "loss": 0.4701, "step": 17250 }, { "epoch": 0.5082568537041675, "grad_norm": 1.542058236698435, "learning_rate": 5.726100894328035e-06, "loss": 0.4778, "step": 17251 }, { "epoch": 0.508286316161631, "grad_norm": 1.7534502783372847, "learning_rate": 5.725592102141529e-06, "loss": 0.5234, "step": 17252 }, { "epoch": 0.5083157786190946, "grad_norm": 1.4437844246329394, "learning_rate": 5.725083302279934e-06, "loss": 0.4028, "step": 17253 }, { "epoch": 0.5083452410765582, "grad_norm": 1.3445231750446405, "learning_rate": 5.724574494748626e-06, "loss": 0.4596, "step": 17254 }, { "epoch": 0.5083747035340218, "grad_norm": 1.7818081791578937, "learning_rate": 5.724065679552991e-06, "loss": 0.5249, "step": 17255 }, { "epoch": 0.5084041659914853, "grad_norm": 1.5551742455030815, "learning_rate": 5.723556856698409e-06, "loss": 0.5115, "step": 17256 }, { "epoch": 0.5084336284489489, "grad_norm": 1.490247323265906, "learning_rate": 5.723048026190265e-06, "loss": 0.4097, "step": 17257 }, { "epoch": 0.5084630909064125, "grad_norm": 1.8547557890410298, "learning_rate": 5.722539188033937e-06, "loss": 0.6112, "step": 17258 }, { "epoch": 0.5084925533638761, "grad_norm": 1.4587544845512908, "learning_rate": 5.7220303422348125e-06, "loss": 0.4048, "step": 17259 }, { "epoch": 0.5085220158213397, "grad_norm": 1.3992000609431792, "learning_rate": 5.7215214887982695e-06, "loss": 0.4577, "step": 17260 }, { "epoch": 0.5085514782788032, "grad_norm": 1.545111573431268, "learning_rate": 5.721012627729692e-06, "loss": 0.4752, "step": 17261 }, { "epoch": 0.5085809407362668, "grad_norm": 1.5433536106675447, "learning_rate": 5.720503759034462e-06, "loss": 0.3878, "step": 17262 }, { "epoch": 0.5086104031937304, "grad_norm": 1.5541429391273502, "learning_rate": 5.719994882717963e-06, "loss": 0.437, "step": 17263 }, { "epoch": 0.508639865651194, "grad_norm": 1.3472515391173994, "learning_rate": 5.719485998785578e-06, "loss": 0.3598, "step": 17264 }, { "epoch": 0.5086693281086575, "grad_norm": 1.6491385562365377, "learning_rate": 5.71897710724269e-06, "loss": 0.3943, "step": 17265 }, { "epoch": 0.5086987905661211, "grad_norm": 1.6637236513949936, "learning_rate": 5.718468208094681e-06, "loss": 0.4699, "step": 17266 }, { "epoch": 0.5087282530235847, "grad_norm": 1.5144788440793926, "learning_rate": 5.7179593013469335e-06, "loss": 0.4342, "step": 17267 }, { "epoch": 0.5087577154810483, "grad_norm": 1.5228811162673268, "learning_rate": 5.717450387004831e-06, "loss": 0.4597, "step": 17268 }, { "epoch": 0.5087871779385118, "grad_norm": 1.507173924050297, "learning_rate": 5.716941465073759e-06, "loss": 0.4843, "step": 17269 }, { "epoch": 0.5088166403959754, "grad_norm": 1.5514929977414909, "learning_rate": 5.716432535559098e-06, "loss": 0.4394, "step": 17270 }, { "epoch": 0.508846102853439, "grad_norm": 1.4986051715909146, "learning_rate": 5.71592359846623e-06, "loss": 0.4409, "step": 17271 }, { "epoch": 0.5088755653109026, "grad_norm": 1.622576973041857, "learning_rate": 5.715414653800542e-06, "loss": 0.5703, "step": 17272 }, { "epoch": 0.5089050277683662, "grad_norm": 1.4352708227909277, "learning_rate": 5.714905701567415e-06, "loss": 0.4392, "step": 17273 }, { "epoch": 0.5089344902258297, "grad_norm": 1.6868999582472437, "learning_rate": 5.714396741772235e-06, "loss": 0.529, "step": 17274 }, { "epoch": 0.5089639526832933, "grad_norm": 1.6232571520370942, "learning_rate": 5.713887774420384e-06, "loss": 0.5829, "step": 17275 }, { "epoch": 0.5089934151407569, "grad_norm": 1.480788943663023, "learning_rate": 5.713378799517243e-06, "loss": 0.4484, "step": 17276 }, { "epoch": 0.5090228775982205, "grad_norm": 1.3831134417544924, "learning_rate": 5.7128698170682e-06, "loss": 0.3692, "step": 17277 }, { "epoch": 0.509052340055684, "grad_norm": 1.5377732555842585, "learning_rate": 5.712360827078638e-06, "loss": 0.4173, "step": 17278 }, { "epoch": 0.5090818025131476, "grad_norm": 1.3610384909889983, "learning_rate": 5.711851829553938e-06, "loss": 0.2811, "step": 17279 }, { "epoch": 0.5091112649706112, "grad_norm": 1.6416500821462963, "learning_rate": 5.71134282449949e-06, "loss": 0.5506, "step": 17280 }, { "epoch": 0.5091407274280748, "grad_norm": 1.4125346292306598, "learning_rate": 5.710833811920672e-06, "loss": 0.3799, "step": 17281 }, { "epoch": 0.5091701898855383, "grad_norm": 1.3462574579316775, "learning_rate": 5.71032479182287e-06, "loss": 0.3824, "step": 17282 }, { "epoch": 0.5091996523430019, "grad_norm": 1.4098706689944773, "learning_rate": 5.709815764211469e-06, "loss": 0.3929, "step": 17283 }, { "epoch": 0.5092291148004655, "grad_norm": 1.7346969987733372, "learning_rate": 5.709306729091855e-06, "loss": 0.5489, "step": 17284 }, { "epoch": 0.5092585772579291, "grad_norm": 1.4154313193228656, "learning_rate": 5.708797686469409e-06, "loss": 0.4573, "step": 17285 }, { "epoch": 0.5092880397153927, "grad_norm": 1.6088449560038318, "learning_rate": 5.708288636349518e-06, "loss": 0.5409, "step": 17286 }, { "epoch": 0.5093175021728562, "grad_norm": 1.673786516194461, "learning_rate": 5.707779578737566e-06, "loss": 0.3857, "step": 17287 }, { "epoch": 0.5093469646303198, "grad_norm": 1.319090389159288, "learning_rate": 5.707270513638936e-06, "loss": 0.3499, "step": 17288 }, { "epoch": 0.5093764270877834, "grad_norm": 1.7159671121736104, "learning_rate": 5.706761441059013e-06, "loss": 0.643, "step": 17289 }, { "epoch": 0.509405889545247, "grad_norm": 1.5296325004913403, "learning_rate": 5.706252361003188e-06, "loss": 0.4719, "step": 17290 }, { "epoch": 0.5094353520027105, "grad_norm": 1.5475820176001136, "learning_rate": 5.7057432734768355e-06, "loss": 0.4753, "step": 17291 }, { "epoch": 0.5094648144601741, "grad_norm": 1.551727468771229, "learning_rate": 5.7052341784853475e-06, "loss": 0.4478, "step": 17292 }, { "epoch": 0.5094942769176377, "grad_norm": 1.5152576061920664, "learning_rate": 5.704725076034108e-06, "loss": 0.3878, "step": 17293 }, { "epoch": 0.5095237393751013, "grad_norm": 1.4928922233819064, "learning_rate": 5.704215966128499e-06, "loss": 0.4287, "step": 17294 }, { "epoch": 0.5095532018325648, "grad_norm": 1.9725542289346765, "learning_rate": 5.7037068487739115e-06, "loss": 0.6789, "step": 17295 }, { "epoch": 0.5095826642900284, "grad_norm": 1.5248801242844603, "learning_rate": 5.703197723975725e-06, "loss": 0.4698, "step": 17296 }, { "epoch": 0.509612126747492, "grad_norm": 1.6018478068074222, "learning_rate": 5.702688591739328e-06, "loss": 0.5502, "step": 17297 }, { "epoch": 0.5096415892049556, "grad_norm": 1.5095415895136153, "learning_rate": 5.7021794520701045e-06, "loss": 0.5717, "step": 17298 }, { "epoch": 0.5096710516624192, "grad_norm": 1.4277498354885794, "learning_rate": 5.701670304973442e-06, "loss": 0.4831, "step": 17299 }, { "epoch": 0.5097005141198827, "grad_norm": 1.773922266671227, "learning_rate": 5.701161150454725e-06, "loss": 0.5764, "step": 17300 }, { "epoch": 0.5097299765773463, "grad_norm": 1.527747863834276, "learning_rate": 5.700651988519338e-06, "loss": 0.4838, "step": 17301 }, { "epoch": 0.5097594390348099, "grad_norm": 1.5603276429350168, "learning_rate": 5.700142819172668e-06, "loss": 0.4633, "step": 17302 }, { "epoch": 0.5097889014922735, "grad_norm": 1.5422840247546326, "learning_rate": 5.699633642420101e-06, "loss": 0.4463, "step": 17303 }, { "epoch": 0.509818363949737, "grad_norm": 1.5216955734891002, "learning_rate": 5.699124458267021e-06, "loss": 0.6089, "step": 17304 }, { "epoch": 0.5098478264072006, "grad_norm": 1.4011602280904263, "learning_rate": 5.6986152667188185e-06, "loss": 0.306, "step": 17305 }, { "epoch": 0.5098772888646642, "grad_norm": 1.5207176406921217, "learning_rate": 5.698106067780875e-06, "loss": 0.5321, "step": 17306 }, { "epoch": 0.5099067513221278, "grad_norm": 1.3238811753075346, "learning_rate": 5.6975968614585775e-06, "loss": 0.4265, "step": 17307 }, { "epoch": 0.5099362137795913, "grad_norm": 1.733027919050831, "learning_rate": 5.697087647757314e-06, "loss": 0.576, "step": 17308 }, { "epoch": 0.5099656762370549, "grad_norm": 1.5236683174015087, "learning_rate": 5.69657842668247e-06, "loss": 0.4351, "step": 17309 }, { "epoch": 0.5099951386945185, "grad_norm": 1.6507030466272603, "learning_rate": 5.6960691982394315e-06, "loss": 0.4273, "step": 17310 }, { "epoch": 0.5100246011519821, "grad_norm": 1.4600317023187543, "learning_rate": 5.6955599624335855e-06, "loss": 0.3383, "step": 17311 }, { "epoch": 0.5100540636094457, "grad_norm": 1.6877585806443474, "learning_rate": 5.6950507192703166e-06, "loss": 0.508, "step": 17312 }, { "epoch": 0.5100835260669092, "grad_norm": 1.4063116262471054, "learning_rate": 5.694541468755014e-06, "loss": 0.4149, "step": 17313 }, { "epoch": 0.5101129885243728, "grad_norm": 1.478251460668803, "learning_rate": 5.694032210893064e-06, "loss": 0.4317, "step": 17314 }, { "epoch": 0.5101424509818364, "grad_norm": 1.3499516425755864, "learning_rate": 5.693522945689852e-06, "loss": 0.4105, "step": 17315 }, { "epoch": 0.5101719134393, "grad_norm": 1.4805565461988484, "learning_rate": 5.693013673150765e-06, "loss": 0.471, "step": 17316 }, { "epoch": 0.5102013758967635, "grad_norm": 1.6199391796073432, "learning_rate": 5.692504393281193e-06, "loss": 0.4876, "step": 17317 }, { "epoch": 0.5102308383542271, "grad_norm": 1.4394353467048016, "learning_rate": 5.691995106086519e-06, "loss": 0.4912, "step": 17318 }, { "epoch": 0.5102603008116907, "grad_norm": 1.4803390074634217, "learning_rate": 5.691485811572131e-06, "loss": 0.4442, "step": 17319 }, { "epoch": 0.5102897632691543, "grad_norm": 1.5651019113333149, "learning_rate": 5.690976509743419e-06, "loss": 0.5247, "step": 17320 }, { "epoch": 0.5103192257266178, "grad_norm": 1.5607087311641938, "learning_rate": 5.690467200605766e-06, "loss": 0.5038, "step": 17321 }, { "epoch": 0.5103486881840814, "grad_norm": 1.3404016034499469, "learning_rate": 5.6899578841645606e-06, "loss": 0.4156, "step": 17322 }, { "epoch": 0.510378150641545, "grad_norm": 1.5806699206500612, "learning_rate": 5.689448560425194e-06, "loss": 0.4697, "step": 17323 }, { "epoch": 0.5104076130990086, "grad_norm": 1.5075201157958757, "learning_rate": 5.688939229393048e-06, "loss": 0.5455, "step": 17324 }, { "epoch": 0.5104370755564722, "grad_norm": 1.5092219665387074, "learning_rate": 5.688429891073514e-06, "loss": 0.4904, "step": 17325 }, { "epoch": 0.5104665380139357, "grad_norm": 1.6953658217462368, "learning_rate": 5.687920545471979e-06, "loss": 0.4575, "step": 17326 }, { "epoch": 0.5104960004713993, "grad_norm": 1.6625185429458857, "learning_rate": 5.687411192593828e-06, "loss": 0.557, "step": 17327 }, { "epoch": 0.5105254629288629, "grad_norm": 1.4921622627869218, "learning_rate": 5.686901832444452e-06, "loss": 0.4211, "step": 17328 }, { "epoch": 0.5105549253863265, "grad_norm": 1.399570567799201, "learning_rate": 5.686392465029239e-06, "loss": 0.4184, "step": 17329 }, { "epoch": 0.51058438784379, "grad_norm": 1.6039110096143157, "learning_rate": 5.685883090353575e-06, "loss": 0.5937, "step": 17330 }, { "epoch": 0.5106138503012536, "grad_norm": 1.4872460420337408, "learning_rate": 5.685373708422848e-06, "loss": 0.4229, "step": 17331 }, { "epoch": 0.5106433127587172, "grad_norm": 1.5283170909739985, "learning_rate": 5.6848643192424465e-06, "loss": 0.4666, "step": 17332 }, { "epoch": 0.5106727752161808, "grad_norm": 1.4588092419729248, "learning_rate": 5.684354922817759e-06, "loss": 0.4802, "step": 17333 }, { "epoch": 0.5107022376736443, "grad_norm": 1.5843312286075606, "learning_rate": 5.6838455191541746e-06, "loss": 0.5446, "step": 17334 }, { "epoch": 0.5107317001311079, "grad_norm": 1.7053822340450218, "learning_rate": 5.6833361082570825e-06, "loss": 0.3266, "step": 17335 }, { "epoch": 0.5107611625885715, "grad_norm": 1.6807446702728133, "learning_rate": 5.682826690131866e-06, "loss": 0.4202, "step": 17336 }, { "epoch": 0.5107906250460351, "grad_norm": 1.4681529092220795, "learning_rate": 5.682317264783918e-06, "loss": 0.3598, "step": 17337 }, { "epoch": 0.5108200875034987, "grad_norm": 1.5707303002420345, "learning_rate": 5.681807832218627e-06, "loss": 0.4839, "step": 17338 }, { "epoch": 0.5108495499609622, "grad_norm": 1.3675638717280894, "learning_rate": 5.681298392441378e-06, "loss": 0.3307, "step": 17339 }, { "epoch": 0.5108790124184258, "grad_norm": 1.3348783067506582, "learning_rate": 5.6807889454575646e-06, "loss": 0.4775, "step": 17340 }, { "epoch": 0.5109084748758894, "grad_norm": 1.4869283065755563, "learning_rate": 5.6802794912725725e-06, "loss": 0.4736, "step": 17341 }, { "epoch": 0.510937937333353, "grad_norm": 1.3419501003505343, "learning_rate": 5.679770029891792e-06, "loss": 0.3584, "step": 17342 }, { "epoch": 0.5109673997908165, "grad_norm": 1.7364436859524726, "learning_rate": 5.679260561320608e-06, "loss": 0.5513, "step": 17343 }, { "epoch": 0.5109968622482801, "grad_norm": 1.4572293874775306, "learning_rate": 5.678751085564415e-06, "loss": 0.4378, "step": 17344 }, { "epoch": 0.5110263247057437, "grad_norm": 1.4107477971342761, "learning_rate": 5.678241602628603e-06, "loss": 0.4204, "step": 17345 }, { "epoch": 0.5110557871632073, "grad_norm": 1.5591745615553028, "learning_rate": 5.677732112518554e-06, "loss": 0.5551, "step": 17346 }, { "epoch": 0.5110852496206708, "grad_norm": 1.582492899815268, "learning_rate": 5.677222615239663e-06, "loss": 0.4367, "step": 17347 }, { "epoch": 0.5111147120781344, "grad_norm": 1.530197714739362, "learning_rate": 5.676713110797316e-06, "loss": 0.4672, "step": 17348 }, { "epoch": 0.511144174535598, "grad_norm": 1.590842441988049, "learning_rate": 5.676203599196905e-06, "loss": 0.284, "step": 17349 }, { "epoch": 0.5111736369930616, "grad_norm": 1.5949673561183468, "learning_rate": 5.675694080443818e-06, "loss": 0.4681, "step": 17350 }, { "epoch": 0.5112030994505252, "grad_norm": 1.3607198009990635, "learning_rate": 5.675184554543446e-06, "loss": 0.3025, "step": 17351 }, { "epoch": 0.5112325619079887, "grad_norm": 1.629800998939654, "learning_rate": 5.674675021501176e-06, "loss": 0.5107, "step": 17352 }, { "epoch": 0.5112620243654523, "grad_norm": 1.5727165871858462, "learning_rate": 5.6741654813224005e-06, "loss": 0.4423, "step": 17353 }, { "epoch": 0.5112914868229159, "grad_norm": 1.4702014360979923, "learning_rate": 5.6736559340125065e-06, "loss": 0.4741, "step": 17354 }, { "epoch": 0.5113209492803795, "grad_norm": 1.478522486562693, "learning_rate": 5.673146379576887e-06, "loss": 0.3671, "step": 17355 }, { "epoch": 0.511350411737843, "grad_norm": 1.4707255019637426, "learning_rate": 5.672636818020929e-06, "loss": 0.3291, "step": 17356 }, { "epoch": 0.5113798741953066, "grad_norm": 1.6902477822635678, "learning_rate": 5.672127249350026e-06, "loss": 0.4421, "step": 17357 }, { "epoch": 0.5114093366527702, "grad_norm": 1.4897781937509937, "learning_rate": 5.671617673569563e-06, "loss": 0.3389, "step": 17358 }, { "epoch": 0.5114387991102338, "grad_norm": 1.446744637568244, "learning_rate": 5.671108090684935e-06, "loss": 0.5145, "step": 17359 }, { "epoch": 0.5114682615676973, "grad_norm": 1.6157025255335726, "learning_rate": 5.670598500701528e-06, "loss": 0.6083, "step": 17360 }, { "epoch": 0.5114977240251609, "grad_norm": 1.4864239210135555, "learning_rate": 5.670088903624736e-06, "loss": 0.3615, "step": 17361 }, { "epoch": 0.5115271864826245, "grad_norm": 1.6943636878118815, "learning_rate": 5.6695792994599485e-06, "loss": 0.567, "step": 17362 }, { "epoch": 0.5115566489400881, "grad_norm": 1.437232677969794, "learning_rate": 5.6690696882125526e-06, "loss": 0.413, "step": 17363 }, { "epoch": 0.5115861113975517, "grad_norm": 1.4464034546406241, "learning_rate": 5.668560069887943e-06, "loss": 0.2446, "step": 17364 }, { "epoch": 0.5116155738550152, "grad_norm": 1.6627774522038699, "learning_rate": 5.66805044449151e-06, "loss": 0.5448, "step": 17365 }, { "epoch": 0.5116450363124788, "grad_norm": 1.437172965872678, "learning_rate": 5.667540812028641e-06, "loss": 0.2875, "step": 17366 }, { "epoch": 0.5116744987699424, "grad_norm": 1.4363931358602053, "learning_rate": 5.667031172504729e-06, "loss": 0.4147, "step": 17367 }, { "epoch": 0.511703961227406, "grad_norm": 1.4628755207260296, "learning_rate": 5.666521525925166e-06, "loss": 0.4481, "step": 17368 }, { "epoch": 0.5117334236848695, "grad_norm": 1.6372362442904516, "learning_rate": 5.6660118722953395e-06, "loss": 0.4632, "step": 17369 }, { "epoch": 0.5117628861423331, "grad_norm": 1.581235823023528, "learning_rate": 5.665502211620644e-06, "loss": 0.4266, "step": 17370 }, { "epoch": 0.5117923485997967, "grad_norm": 1.2317940731391575, "learning_rate": 5.664992543906468e-06, "loss": 0.3566, "step": 17371 }, { "epoch": 0.5118218110572603, "grad_norm": 1.474031918066329, "learning_rate": 5.664482869158205e-06, "loss": 0.426, "step": 17372 }, { "epoch": 0.5118512735147238, "grad_norm": 1.461486487211839, "learning_rate": 5.663973187381242e-06, "loss": 0.4243, "step": 17373 }, { "epoch": 0.5118807359721874, "grad_norm": 1.4161452684909053, "learning_rate": 5.663463498580974e-06, "loss": 0.3004, "step": 17374 }, { "epoch": 0.511910198429651, "grad_norm": 1.4568333548639745, "learning_rate": 5.662953802762793e-06, "loss": 0.4354, "step": 17375 }, { "epoch": 0.5119396608871146, "grad_norm": 1.4410701227781293, "learning_rate": 5.662444099932087e-06, "loss": 0.435, "step": 17376 }, { "epoch": 0.5119691233445782, "grad_norm": 1.5665014484892492, "learning_rate": 5.661934390094249e-06, "loss": 0.401, "step": 17377 }, { "epoch": 0.5119985858020417, "grad_norm": 1.5262035961269231, "learning_rate": 5.661424673254672e-06, "loss": 0.4963, "step": 17378 }, { "epoch": 0.5120280482595053, "grad_norm": 1.516066909574617, "learning_rate": 5.660914949418745e-06, "loss": 0.4325, "step": 17379 }, { "epoch": 0.5120575107169689, "grad_norm": 1.7992309214833866, "learning_rate": 5.660405218591863e-06, "loss": 0.5567, "step": 17380 }, { "epoch": 0.5120869731744325, "grad_norm": 1.5125946524865985, "learning_rate": 5.659895480779415e-06, "loss": 0.4533, "step": 17381 }, { "epoch": 0.512116435631896, "grad_norm": 1.4813447526161567, "learning_rate": 5.659385735986793e-06, "loss": 0.4341, "step": 17382 }, { "epoch": 0.5121458980893596, "grad_norm": 1.5097088985091847, "learning_rate": 5.6588759842193894e-06, "loss": 0.4312, "step": 17383 }, { "epoch": 0.5121753605468232, "grad_norm": 1.6836791988757573, "learning_rate": 5.6583662254825975e-06, "loss": 0.5464, "step": 17384 }, { "epoch": 0.5122048230042868, "grad_norm": 1.4675903299853252, "learning_rate": 5.657856459781808e-06, "loss": 0.4594, "step": 17385 }, { "epoch": 0.5122342854617503, "grad_norm": 1.5718103999599695, "learning_rate": 5.657346687122414e-06, "loss": 0.4276, "step": 17386 }, { "epoch": 0.5122637479192139, "grad_norm": 1.4560163865225806, "learning_rate": 5.656836907509806e-06, "loss": 0.4534, "step": 17387 }, { "epoch": 0.5122932103766775, "grad_norm": 1.701572526454074, "learning_rate": 5.656327120949378e-06, "loss": 0.4961, "step": 17388 }, { "epoch": 0.5123226728341411, "grad_norm": 1.6636377756750333, "learning_rate": 5.655817327446522e-06, "loss": 0.5972, "step": 17389 }, { "epoch": 0.5123521352916047, "grad_norm": 1.5654505077061909, "learning_rate": 5.65530752700663e-06, "loss": 0.5084, "step": 17390 }, { "epoch": 0.5123815977490682, "grad_norm": 1.5989670646065184, "learning_rate": 5.654797719635095e-06, "loss": 0.4195, "step": 17391 }, { "epoch": 0.5124110602065318, "grad_norm": 1.4715805715121189, "learning_rate": 5.654287905337307e-06, "loss": 0.4742, "step": 17392 }, { "epoch": 0.5124405226639954, "grad_norm": 1.4960563149727413, "learning_rate": 5.653778084118663e-06, "loss": 0.4706, "step": 17393 }, { "epoch": 0.512469985121459, "grad_norm": 1.4513248683335487, "learning_rate": 5.653268255984553e-06, "loss": 0.4692, "step": 17394 }, { "epoch": 0.5124994475789225, "grad_norm": 1.2829471479315786, "learning_rate": 5.652758420940371e-06, "loss": 0.3452, "step": 17395 }, { "epoch": 0.5125289100363861, "grad_norm": 1.6477332113308005, "learning_rate": 5.652248578991509e-06, "loss": 0.441, "step": 17396 }, { "epoch": 0.5125583724938497, "grad_norm": 1.4919096492976873, "learning_rate": 5.6517387301433605e-06, "loss": 0.3561, "step": 17397 }, { "epoch": 0.5125878349513133, "grad_norm": 1.5871451680667161, "learning_rate": 5.651228874401318e-06, "loss": 0.5278, "step": 17398 }, { "epoch": 0.5126172974087768, "grad_norm": 1.6826409174305812, "learning_rate": 5.650719011770776e-06, "loss": 0.5973, "step": 17399 }, { "epoch": 0.5126467598662404, "grad_norm": 1.455727248751003, "learning_rate": 5.650209142257126e-06, "loss": 0.5005, "step": 17400 }, { "epoch": 0.512676222323704, "grad_norm": 1.5758671534736377, "learning_rate": 5.649699265865761e-06, "loss": 0.4942, "step": 17401 }, { "epoch": 0.5127056847811676, "grad_norm": 1.6881989982868568, "learning_rate": 5.649189382602076e-06, "loss": 0.4615, "step": 17402 }, { "epoch": 0.5127351472386312, "grad_norm": 1.5193780040179956, "learning_rate": 5.648679492471463e-06, "loss": 0.4263, "step": 17403 }, { "epoch": 0.5127646096960947, "grad_norm": 1.4845690496531256, "learning_rate": 5.648169595479316e-06, "loss": 0.3209, "step": 17404 }, { "epoch": 0.5127940721535583, "grad_norm": 1.4476258150928714, "learning_rate": 5.64765969163103e-06, "loss": 0.4622, "step": 17405 }, { "epoch": 0.5128235346110219, "grad_norm": 1.4609399811240698, "learning_rate": 5.647149780931996e-06, "loss": 0.5061, "step": 17406 }, { "epoch": 0.5128529970684855, "grad_norm": 1.3355600156528622, "learning_rate": 5.646639863387609e-06, "loss": 0.4428, "step": 17407 }, { "epoch": 0.512882459525949, "grad_norm": 1.690800999202961, "learning_rate": 5.646129939003262e-06, "loss": 0.5655, "step": 17408 }, { "epoch": 0.5129119219834126, "grad_norm": 1.5181489718932646, "learning_rate": 5.645620007784349e-06, "loss": 0.4481, "step": 17409 }, { "epoch": 0.5129413844408762, "grad_norm": 1.4944457235795008, "learning_rate": 5.645110069736266e-06, "loss": 0.5687, "step": 17410 }, { "epoch": 0.5129708468983398, "grad_norm": 1.5645955764204762, "learning_rate": 5.644600124864406e-06, "loss": 0.4767, "step": 17411 }, { "epoch": 0.5130003093558033, "grad_norm": 1.4359738341344852, "learning_rate": 5.644090173174161e-06, "loss": 0.4168, "step": 17412 }, { "epoch": 0.5130297718132669, "grad_norm": 1.3687280987084995, "learning_rate": 5.643580214670925e-06, "loss": 0.4618, "step": 17413 }, { "epoch": 0.5130592342707305, "grad_norm": 1.4991199419300454, "learning_rate": 5.643070249360095e-06, "loss": 0.395, "step": 17414 }, { "epoch": 0.5130886967281941, "grad_norm": 1.6622252906230934, "learning_rate": 5.642560277247064e-06, "loss": 0.5414, "step": 17415 }, { "epoch": 0.5131181591856577, "grad_norm": 1.4941431580726736, "learning_rate": 5.642050298337226e-06, "loss": 0.534, "step": 17416 }, { "epoch": 0.5131476216431212, "grad_norm": 1.700845468607316, "learning_rate": 5.641540312635976e-06, "loss": 0.5743, "step": 17417 }, { "epoch": 0.5131770841005848, "grad_norm": 1.5218227780584435, "learning_rate": 5.641030320148707e-06, "loss": 0.6294, "step": 17418 }, { "epoch": 0.5132065465580484, "grad_norm": 1.4623683526969762, "learning_rate": 5.640520320880816e-06, "loss": 0.4915, "step": 17419 }, { "epoch": 0.513236009015512, "grad_norm": 1.5030136753792496, "learning_rate": 5.640010314837696e-06, "loss": 0.4284, "step": 17420 }, { "epoch": 0.5132654714729755, "grad_norm": 1.6420518715629624, "learning_rate": 5.639500302024742e-06, "loss": 0.6106, "step": 17421 }, { "epoch": 0.5132949339304391, "grad_norm": 1.458209094856141, "learning_rate": 5.6389902824473476e-06, "loss": 0.3941, "step": 17422 }, { "epoch": 0.5133243963879027, "grad_norm": 1.2674202829484493, "learning_rate": 5.6384802561109095e-06, "loss": 0.284, "step": 17423 }, { "epoch": 0.5133538588453663, "grad_norm": 1.3580448201899074, "learning_rate": 5.637970223020821e-06, "loss": 0.4424, "step": 17424 }, { "epoch": 0.5133833213028298, "grad_norm": 1.554637531064013, "learning_rate": 5.63746018318248e-06, "loss": 0.5233, "step": 17425 }, { "epoch": 0.5134127837602934, "grad_norm": 1.829591564045548, "learning_rate": 5.636950136601278e-06, "loss": 0.4891, "step": 17426 }, { "epoch": 0.513442246217757, "grad_norm": 1.7592529847470846, "learning_rate": 5.6364400832826115e-06, "loss": 0.543, "step": 17427 }, { "epoch": 0.5134717086752206, "grad_norm": 1.4404796262487183, "learning_rate": 5.635930023231875e-06, "loss": 0.4834, "step": 17428 }, { "epoch": 0.5135011711326842, "grad_norm": 1.6285880710499765, "learning_rate": 5.6354199564544665e-06, "loss": 0.3982, "step": 17429 }, { "epoch": 0.5135306335901477, "grad_norm": 2.0549698989421032, "learning_rate": 5.634909882955778e-06, "loss": 0.5466, "step": 17430 }, { "epoch": 0.5135600960476113, "grad_norm": 1.8810340439337676, "learning_rate": 5.634399802741206e-06, "loss": 0.4803, "step": 17431 }, { "epoch": 0.5135895585050749, "grad_norm": 1.5715014934023182, "learning_rate": 5.633889715816148e-06, "loss": 0.5232, "step": 17432 }, { "epoch": 0.5136190209625385, "grad_norm": 1.767796723210329, "learning_rate": 5.633379622185996e-06, "loss": 0.4373, "step": 17433 }, { "epoch": 0.513648483420002, "grad_norm": 1.4534368595524225, "learning_rate": 5.632869521856147e-06, "loss": 0.5317, "step": 17434 }, { "epoch": 0.5136779458774656, "grad_norm": 1.5661150780645403, "learning_rate": 5.632359414831999e-06, "loss": 0.5171, "step": 17435 }, { "epoch": 0.5137074083349292, "grad_norm": 1.764327687796892, "learning_rate": 5.6318493011189455e-06, "loss": 0.5884, "step": 17436 }, { "epoch": 0.5137368707923928, "grad_norm": 1.6555231446065442, "learning_rate": 5.631339180722382e-06, "loss": 0.5052, "step": 17437 }, { "epoch": 0.5137663332498563, "grad_norm": 1.3903314539402551, "learning_rate": 5.630829053647705e-06, "loss": 0.3906, "step": 17438 }, { "epoch": 0.5137957957073199, "grad_norm": 1.6354482405740114, "learning_rate": 5.6303189199003094e-06, "loss": 0.4224, "step": 17439 }, { "epoch": 0.5138252581647835, "grad_norm": 1.3302273417655661, "learning_rate": 5.6298087794855935e-06, "loss": 0.3714, "step": 17440 }, { "epoch": 0.5138547206222471, "grad_norm": 1.5698814908107672, "learning_rate": 5.629298632408954e-06, "loss": 0.5066, "step": 17441 }, { "epoch": 0.5138841830797107, "grad_norm": 1.5620174492194197, "learning_rate": 5.628788478675783e-06, "loss": 0.5435, "step": 17442 }, { "epoch": 0.5139136455371742, "grad_norm": 1.3916638176365592, "learning_rate": 5.628278318291479e-06, "loss": 0.4004, "step": 17443 }, { "epoch": 0.5139431079946378, "grad_norm": 1.4715427801140968, "learning_rate": 5.627768151261439e-06, "loss": 0.4901, "step": 17444 }, { "epoch": 0.5139725704521014, "grad_norm": 1.5845173296149029, "learning_rate": 5.627257977591061e-06, "loss": 0.4475, "step": 17445 }, { "epoch": 0.514002032909565, "grad_norm": 1.530891151506242, "learning_rate": 5.626747797285735e-06, "loss": 0.4472, "step": 17446 }, { "epoch": 0.5140314953670285, "grad_norm": 1.3646224741249593, "learning_rate": 5.626237610350865e-06, "loss": 0.3105, "step": 17447 }, { "epoch": 0.5140609578244921, "grad_norm": 1.5997924277054743, "learning_rate": 5.625727416791843e-06, "loss": 0.479, "step": 17448 }, { "epoch": 0.5140904202819557, "grad_norm": 1.2800410359364132, "learning_rate": 5.625217216614067e-06, "loss": 0.3822, "step": 17449 }, { "epoch": 0.5141198827394193, "grad_norm": 1.5876730357377815, "learning_rate": 5.624707009822937e-06, "loss": 0.4011, "step": 17450 }, { "epoch": 0.5141493451968828, "grad_norm": 1.548798770545043, "learning_rate": 5.624196796423843e-06, "loss": 0.4298, "step": 17451 }, { "epoch": 0.5141788076543464, "grad_norm": 1.52025496372168, "learning_rate": 5.623686576422186e-06, "loss": 0.4158, "step": 17452 }, { "epoch": 0.51420827011181, "grad_norm": 1.5616859436597406, "learning_rate": 5.623176349823364e-06, "loss": 0.483, "step": 17453 }, { "epoch": 0.5142377325692736, "grad_norm": 1.3333018706174227, "learning_rate": 5.62266611663277e-06, "loss": 0.3062, "step": 17454 }, { "epoch": 0.5142671950267372, "grad_norm": 1.4417764152984915, "learning_rate": 5.622155876855806e-06, "loss": 0.4162, "step": 17455 }, { "epoch": 0.5142966574842007, "grad_norm": 1.218588861353687, "learning_rate": 5.621645630497866e-06, "loss": 0.2578, "step": 17456 }, { "epoch": 0.5143261199416643, "grad_norm": 1.4971283758713978, "learning_rate": 5.621135377564348e-06, "loss": 0.3702, "step": 17457 }, { "epoch": 0.5143555823991279, "grad_norm": 1.6817945118608915, "learning_rate": 5.620625118060648e-06, "loss": 0.4928, "step": 17458 }, { "epoch": 0.5143850448565915, "grad_norm": 1.4320455579184104, "learning_rate": 5.6201148519921665e-06, "loss": 0.3932, "step": 17459 }, { "epoch": 0.514414507314055, "grad_norm": 1.2571647407580504, "learning_rate": 5.619604579364298e-06, "loss": 0.2454, "step": 17460 }, { "epoch": 0.5144439697715186, "grad_norm": 1.5234134264087587, "learning_rate": 5.619094300182442e-06, "loss": 0.5046, "step": 17461 }, { "epoch": 0.5144734322289822, "grad_norm": 1.5224420299032233, "learning_rate": 5.618584014451994e-06, "loss": 0.4186, "step": 17462 }, { "epoch": 0.5145028946864458, "grad_norm": 1.5455488420044696, "learning_rate": 5.618073722178354e-06, "loss": 0.3246, "step": 17463 }, { "epoch": 0.5145323571439093, "grad_norm": 1.3840158361750243, "learning_rate": 5.617563423366916e-06, "loss": 0.5177, "step": 17464 }, { "epoch": 0.5145618196013729, "grad_norm": 1.6292913221294556, "learning_rate": 5.617053118023084e-06, "loss": 0.4985, "step": 17465 }, { "epoch": 0.5145912820588365, "grad_norm": 1.4925396112363298, "learning_rate": 5.616542806152251e-06, "loss": 0.4643, "step": 17466 }, { "epoch": 0.5146207445163001, "grad_norm": 1.5829523736065114, "learning_rate": 5.616032487759815e-06, "loss": 0.4155, "step": 17467 }, { "epoch": 0.5146502069737637, "grad_norm": 1.3609383392188552, "learning_rate": 5.615522162851176e-06, "loss": 0.4034, "step": 17468 }, { "epoch": 0.5146796694312272, "grad_norm": 1.6480173535176166, "learning_rate": 5.615011831431729e-06, "loss": 0.4018, "step": 17469 }, { "epoch": 0.5147091318886908, "grad_norm": 1.7208670718540136, "learning_rate": 5.614501493506878e-06, "loss": 0.5386, "step": 17470 }, { "epoch": 0.5147385943461544, "grad_norm": 1.6638002554096334, "learning_rate": 5.613991149082014e-06, "loss": 0.4349, "step": 17471 }, { "epoch": 0.514768056803618, "grad_norm": 1.729105553275673, "learning_rate": 5.61348079816254e-06, "loss": 0.6043, "step": 17472 }, { "epoch": 0.5147975192610815, "grad_norm": 1.576394708190263, "learning_rate": 5.612970440753853e-06, "loss": 0.4188, "step": 17473 }, { "epoch": 0.5148269817185451, "grad_norm": 1.3859435092766952, "learning_rate": 5.6124600768613525e-06, "loss": 0.3919, "step": 17474 }, { "epoch": 0.5148564441760087, "grad_norm": 1.4649910992485842, "learning_rate": 5.611949706490436e-06, "loss": 0.3368, "step": 17475 }, { "epoch": 0.5148859066334723, "grad_norm": 1.5084855820045837, "learning_rate": 5.6114393296465005e-06, "loss": 0.5481, "step": 17476 }, { "epoch": 0.5149153690909358, "grad_norm": 1.5168750005439124, "learning_rate": 5.610928946334948e-06, "loss": 0.4945, "step": 17477 }, { "epoch": 0.5149448315483994, "grad_norm": 1.5187695432593813, "learning_rate": 5.610418556561173e-06, "loss": 0.5875, "step": 17478 }, { "epoch": 0.514974294005863, "grad_norm": 1.5568046281779553, "learning_rate": 5.609908160330577e-06, "loss": 0.5651, "step": 17479 }, { "epoch": 0.5150037564633266, "grad_norm": 1.5998274869801439, "learning_rate": 5.609397757648561e-06, "loss": 0.4976, "step": 17480 }, { "epoch": 0.5150332189207902, "grad_norm": 1.6301181510303098, "learning_rate": 5.608887348520519e-06, "loss": 0.4081, "step": 17481 }, { "epoch": 0.5150626813782537, "grad_norm": 1.743640823562431, "learning_rate": 5.6083769329518526e-06, "loss": 0.3335, "step": 17482 }, { "epoch": 0.5150921438357173, "grad_norm": 1.5110488048897628, "learning_rate": 5.60786651094796e-06, "loss": 0.5065, "step": 17483 }, { "epoch": 0.5151216062931809, "grad_norm": 1.5868228882928208, "learning_rate": 5.60735608251424e-06, "loss": 0.5397, "step": 17484 }, { "epoch": 0.5151510687506445, "grad_norm": 1.3785778010917151, "learning_rate": 5.606845647656094e-06, "loss": 0.3385, "step": 17485 }, { "epoch": 0.515180531208108, "grad_norm": 1.2556504028650968, "learning_rate": 5.6063352063789185e-06, "loss": 0.2922, "step": 17486 }, { "epoch": 0.5152099936655716, "grad_norm": 1.4945206824809143, "learning_rate": 5.605824758688115e-06, "loss": 0.4743, "step": 17487 }, { "epoch": 0.5152394561230352, "grad_norm": 1.3553308549838778, "learning_rate": 5.6053143045890815e-06, "loss": 0.3918, "step": 17488 }, { "epoch": 0.5152689185804988, "grad_norm": 1.4669172466929339, "learning_rate": 5.604803844087219e-06, "loss": 0.5629, "step": 17489 }, { "epoch": 0.5152983810379623, "grad_norm": 1.6340858099654507, "learning_rate": 5.604293377187925e-06, "loss": 0.6035, "step": 17490 }, { "epoch": 0.5153278434954259, "grad_norm": 1.4120832682035116, "learning_rate": 5.603782903896599e-06, "loss": 0.3671, "step": 17491 }, { "epoch": 0.5153573059528895, "grad_norm": 1.5098712142001973, "learning_rate": 5.60327242421864e-06, "loss": 0.3811, "step": 17492 }, { "epoch": 0.5153867684103531, "grad_norm": 1.3933921361981825, "learning_rate": 5.602761938159451e-06, "loss": 0.449, "step": 17493 }, { "epoch": 0.5154162308678167, "grad_norm": 1.683720865050436, "learning_rate": 5.6022514457244305e-06, "loss": 0.4741, "step": 17494 }, { "epoch": 0.5154456933252802, "grad_norm": 1.3713450310850548, "learning_rate": 5.601740946918977e-06, "loss": 0.3756, "step": 17495 }, { "epoch": 0.5154751557827438, "grad_norm": 1.4751937825733985, "learning_rate": 5.601230441748492e-06, "loss": 0.4337, "step": 17496 }, { "epoch": 0.5155046182402074, "grad_norm": 1.376116155403446, "learning_rate": 5.600719930218373e-06, "loss": 0.3754, "step": 17497 }, { "epoch": 0.515534080697671, "grad_norm": 1.5884412334292912, "learning_rate": 5.600209412334023e-06, "loss": 0.4978, "step": 17498 }, { "epoch": 0.5155635431551345, "grad_norm": 1.5289635982086665, "learning_rate": 5.599698888100839e-06, "loss": 0.53, "step": 17499 }, { "epoch": 0.5155930056125981, "grad_norm": 1.4262270881707242, "learning_rate": 5.599188357524226e-06, "loss": 0.4011, "step": 17500 }, { "epoch": 0.5156224680700617, "grad_norm": 1.8745906131344876, "learning_rate": 5.598677820609578e-06, "loss": 0.4372, "step": 17501 }, { "epoch": 0.5156519305275253, "grad_norm": 1.4250948273873236, "learning_rate": 5.598167277362301e-06, "loss": 0.4234, "step": 17502 }, { "epoch": 0.5156813929849888, "grad_norm": 1.534140386279705, "learning_rate": 5.59765672778779e-06, "loss": 0.501, "step": 17503 }, { "epoch": 0.5157108554424524, "grad_norm": 1.5119851154633042, "learning_rate": 5.597146171891451e-06, "loss": 0.5646, "step": 17504 }, { "epoch": 0.515740317899916, "grad_norm": 1.5549125230316967, "learning_rate": 5.596635609678681e-06, "loss": 0.5616, "step": 17505 }, { "epoch": 0.5157697803573796, "grad_norm": 1.4801534538802514, "learning_rate": 5.596125041154881e-06, "loss": 0.3262, "step": 17506 }, { "epoch": 0.5157992428148432, "grad_norm": 1.4816056356201983, "learning_rate": 5.59561446632545e-06, "loss": 0.3945, "step": 17507 }, { "epoch": 0.5158287052723067, "grad_norm": 2.003442935428729, "learning_rate": 5.595103885195794e-06, "loss": 0.5302, "step": 17508 }, { "epoch": 0.5158581677297703, "grad_norm": 1.5942155252020822, "learning_rate": 5.594593297771309e-06, "loss": 0.4459, "step": 17509 }, { "epoch": 0.5158876301872339, "grad_norm": 1.447573501499209, "learning_rate": 5.594082704057398e-06, "loss": 0.5151, "step": 17510 }, { "epoch": 0.5159170926446975, "grad_norm": 1.6419363305994694, "learning_rate": 5.5935721040594614e-06, "loss": 0.4502, "step": 17511 }, { "epoch": 0.515946555102161, "grad_norm": 1.2783570447539672, "learning_rate": 5.593061497782899e-06, "loss": 0.269, "step": 17512 }, { "epoch": 0.5159760175596246, "grad_norm": 1.4179688406065198, "learning_rate": 5.592550885233112e-06, "loss": 0.4874, "step": 17513 }, { "epoch": 0.5160054800170882, "grad_norm": 1.3905310388256025, "learning_rate": 5.592040266415504e-06, "loss": 0.5034, "step": 17514 }, { "epoch": 0.5160349424745518, "grad_norm": 1.5652105420689555, "learning_rate": 5.591529641335475e-06, "loss": 0.4342, "step": 17515 }, { "epoch": 0.5160644049320153, "grad_norm": 1.3320961890470224, "learning_rate": 5.591019009998423e-06, "loss": 0.3379, "step": 17516 }, { "epoch": 0.5160938673894789, "grad_norm": 1.696253869682938, "learning_rate": 5.590508372409754e-06, "loss": 0.6026, "step": 17517 }, { "epoch": 0.5161233298469425, "grad_norm": 1.6382582171361373, "learning_rate": 5.589997728574868e-06, "loss": 0.3682, "step": 17518 }, { "epoch": 0.5161527923044061, "grad_norm": 1.4168805562193572, "learning_rate": 5.589487078499163e-06, "loss": 0.4202, "step": 17519 }, { "epoch": 0.5161822547618697, "grad_norm": 1.9319108632617643, "learning_rate": 5.588976422188047e-06, "loss": 0.4183, "step": 17520 }, { "epoch": 0.5162117172193332, "grad_norm": 1.5328587571330572, "learning_rate": 5.588465759646916e-06, "loss": 0.4061, "step": 17521 }, { "epoch": 0.5162411796767968, "grad_norm": 1.5142247659047214, "learning_rate": 5.587955090881173e-06, "loss": 0.398, "step": 17522 }, { "epoch": 0.5162706421342604, "grad_norm": 1.4627155847994397, "learning_rate": 5.587444415896222e-06, "loss": 0.4765, "step": 17523 }, { "epoch": 0.516300104591724, "grad_norm": 1.3349412627938755, "learning_rate": 5.586933734697461e-06, "loss": 0.3922, "step": 17524 }, { "epoch": 0.5163295670491875, "grad_norm": 1.3832017649968014, "learning_rate": 5.586423047290295e-06, "loss": 0.3555, "step": 17525 }, { "epoch": 0.5163590295066511, "grad_norm": 1.5797004444659348, "learning_rate": 5.5859123536801255e-06, "loss": 0.4343, "step": 17526 }, { "epoch": 0.5163884919641147, "grad_norm": 1.3329417631754301, "learning_rate": 5.585401653872351e-06, "loss": 0.4135, "step": 17527 }, { "epoch": 0.5164179544215783, "grad_norm": 1.5242512675974589, "learning_rate": 5.584890947872378e-06, "loss": 0.4756, "step": 17528 }, { "epoch": 0.5164474168790418, "grad_norm": 1.6712490658481587, "learning_rate": 5.584380235685607e-06, "loss": 0.5538, "step": 17529 }, { "epoch": 0.5164768793365054, "grad_norm": 1.4903685760241672, "learning_rate": 5.58386951731744e-06, "loss": 0.4111, "step": 17530 }, { "epoch": 0.516506341793969, "grad_norm": 1.5062724302000705, "learning_rate": 5.583358792773278e-06, "loss": 0.4675, "step": 17531 }, { "epoch": 0.5165358042514326, "grad_norm": 1.6225323590538943, "learning_rate": 5.582848062058526e-06, "loss": 0.5281, "step": 17532 }, { "epoch": 0.5165652667088962, "grad_norm": 1.5214810571633854, "learning_rate": 5.582337325178584e-06, "loss": 0.4258, "step": 17533 }, { "epoch": 0.5165947291663597, "grad_norm": 1.4965381521350718, "learning_rate": 5.581826582138856e-06, "loss": 0.4115, "step": 17534 }, { "epoch": 0.5166241916238233, "grad_norm": 1.555929489707374, "learning_rate": 5.581315832944743e-06, "loss": 0.4925, "step": 17535 }, { "epoch": 0.5166536540812869, "grad_norm": 1.6760621935396816, "learning_rate": 5.580805077601647e-06, "loss": 0.5169, "step": 17536 }, { "epoch": 0.5166831165387505, "grad_norm": 1.480651540943146, "learning_rate": 5.580294316114974e-06, "loss": 0.3748, "step": 17537 }, { "epoch": 0.516712578996214, "grad_norm": 1.5351574095605576, "learning_rate": 5.579783548490124e-06, "loss": 0.4916, "step": 17538 }, { "epoch": 0.5167420414536776, "grad_norm": 1.7306380734159965, "learning_rate": 5.5792727747325e-06, "loss": 0.5275, "step": 17539 }, { "epoch": 0.5167715039111412, "grad_norm": 1.412187135858645, "learning_rate": 5.5787619948475054e-06, "loss": 0.4286, "step": 17540 }, { "epoch": 0.5168009663686048, "grad_norm": 1.3763656401366366, "learning_rate": 5.5782512088405435e-06, "loss": 0.5018, "step": 17541 }, { "epoch": 0.5168304288260683, "grad_norm": 1.7239371482777917, "learning_rate": 5.577740416717014e-06, "loss": 0.3986, "step": 17542 }, { "epoch": 0.5168598912835319, "grad_norm": 1.4554124476332626, "learning_rate": 5.5772296184823245e-06, "loss": 0.3336, "step": 17543 }, { "epoch": 0.5168893537409955, "grad_norm": 1.3866889198197658, "learning_rate": 5.576718814141877e-06, "loss": 0.3409, "step": 17544 }, { "epoch": 0.5169188161984591, "grad_norm": 1.4749062464870235, "learning_rate": 5.576208003701073e-06, "loss": 0.4167, "step": 17545 }, { "epoch": 0.5169482786559227, "grad_norm": 1.5685664487826063, "learning_rate": 5.5756971871653145e-06, "loss": 0.4998, "step": 17546 }, { "epoch": 0.5169777411133862, "grad_norm": 1.4957817807250913, "learning_rate": 5.575186364540009e-06, "loss": 0.5163, "step": 17547 }, { "epoch": 0.5170072035708498, "grad_norm": 1.5542443225333038, "learning_rate": 5.574675535830555e-06, "loss": 0.4137, "step": 17548 }, { "epoch": 0.5170366660283134, "grad_norm": 1.4007221054149777, "learning_rate": 5.57416470104236e-06, "loss": 0.4716, "step": 17549 }, { "epoch": 0.517066128485777, "grad_norm": 1.645316009898177, "learning_rate": 5.573653860180825e-06, "loss": 0.5559, "step": 17550 }, { "epoch": 0.5170955909432405, "grad_norm": 1.8549895414446806, "learning_rate": 5.573143013251355e-06, "loss": 0.5259, "step": 17551 }, { "epoch": 0.5171250534007041, "grad_norm": 1.5441193897355754, "learning_rate": 5.572632160259352e-06, "loss": 0.4437, "step": 17552 }, { "epoch": 0.5171545158581677, "grad_norm": 1.5920183889815465, "learning_rate": 5.572121301210221e-06, "loss": 0.5981, "step": 17553 }, { "epoch": 0.5171839783156313, "grad_norm": 1.3749121687401964, "learning_rate": 5.571610436109366e-06, "loss": 0.4235, "step": 17554 }, { "epoch": 0.5172134407730948, "grad_norm": 1.4879976013209337, "learning_rate": 5.571099564962188e-06, "loss": 0.427, "step": 17555 }, { "epoch": 0.5172429032305584, "grad_norm": 1.4007481094240946, "learning_rate": 5.570588687774096e-06, "loss": 0.422, "step": 17556 }, { "epoch": 0.517272365688022, "grad_norm": 1.462547515563841, "learning_rate": 5.570077804550487e-06, "loss": 0.5535, "step": 17557 }, { "epoch": 0.5173018281454856, "grad_norm": 1.556734177635494, "learning_rate": 5.56956691529677e-06, "loss": 0.5571, "step": 17558 }, { "epoch": 0.5173312906029492, "grad_norm": 1.4230612836165895, "learning_rate": 5.56905602001835e-06, "loss": 0.4543, "step": 17559 }, { "epoch": 0.5173607530604127, "grad_norm": 1.56861285199728, "learning_rate": 5.568545118720627e-06, "loss": 0.4074, "step": 17560 }, { "epoch": 0.5173902155178763, "grad_norm": 1.5146948387611854, "learning_rate": 5.568034211409007e-06, "loss": 0.5867, "step": 17561 }, { "epoch": 0.5174196779753399, "grad_norm": 1.557475164040626, "learning_rate": 5.5675232980888945e-06, "loss": 0.5212, "step": 17562 }, { "epoch": 0.5174491404328035, "grad_norm": 1.4147666455403867, "learning_rate": 5.567012378765692e-06, "loss": 0.3367, "step": 17563 }, { "epoch": 0.517478602890267, "grad_norm": 1.6291086358093945, "learning_rate": 5.566501453444808e-06, "loss": 0.5201, "step": 17564 }, { "epoch": 0.5175080653477306, "grad_norm": 1.4692644924977856, "learning_rate": 5.565990522131643e-06, "loss": 0.3406, "step": 17565 }, { "epoch": 0.5175375278051942, "grad_norm": 1.2287057528435186, "learning_rate": 5.565479584831603e-06, "loss": 0.3538, "step": 17566 }, { "epoch": 0.5175669902626578, "grad_norm": 1.4854730708355597, "learning_rate": 5.564968641550091e-06, "loss": 0.5301, "step": 17567 }, { "epoch": 0.5175964527201213, "grad_norm": 1.5915701221019896, "learning_rate": 5.564457692292515e-06, "loss": 0.5804, "step": 17568 }, { "epoch": 0.5176259151775849, "grad_norm": 1.5840858866653098, "learning_rate": 5.563946737064276e-06, "loss": 0.4763, "step": 17569 }, { "epoch": 0.5176553776350485, "grad_norm": 1.504928290639473, "learning_rate": 5.563435775870781e-06, "loss": 0.5081, "step": 17570 }, { "epoch": 0.5176848400925121, "grad_norm": 1.3033091795237266, "learning_rate": 5.562924808717435e-06, "loss": 0.3219, "step": 17571 }, { "epoch": 0.5177143025499757, "grad_norm": 1.6781821133918005, "learning_rate": 5.56241383560964e-06, "loss": 0.557, "step": 17572 }, { "epoch": 0.5177437650074392, "grad_norm": 1.4776758248079245, "learning_rate": 5.561902856552804e-06, "loss": 0.3839, "step": 17573 }, { "epoch": 0.5177732274649028, "grad_norm": 1.4557873599191393, "learning_rate": 5.5613918715523295e-06, "loss": 0.4917, "step": 17574 }, { "epoch": 0.5178026899223664, "grad_norm": 1.648100423686178, "learning_rate": 5.560880880613625e-06, "loss": 0.3905, "step": 17575 }, { "epoch": 0.51783215237983, "grad_norm": 1.5598567694968422, "learning_rate": 5.560369883742092e-06, "loss": 0.5479, "step": 17576 }, { "epoch": 0.5178616148372935, "grad_norm": 1.3649921479124743, "learning_rate": 5.559858880943137e-06, "loss": 0.4232, "step": 17577 }, { "epoch": 0.5178910772947571, "grad_norm": 1.5361523776730617, "learning_rate": 5.559347872222166e-06, "loss": 0.3571, "step": 17578 }, { "epoch": 0.5179205397522207, "grad_norm": 1.3791814951686703, "learning_rate": 5.558836857584584e-06, "loss": 0.4811, "step": 17579 }, { "epoch": 0.5179500022096843, "grad_norm": 1.4125904431049867, "learning_rate": 5.558325837035795e-06, "loss": 0.4326, "step": 17580 }, { "epoch": 0.5179794646671478, "grad_norm": 1.5748138764927198, "learning_rate": 5.557814810581207e-06, "loss": 0.4742, "step": 17581 }, { "epoch": 0.5180089271246114, "grad_norm": 1.785592160459295, "learning_rate": 5.557303778226221e-06, "loss": 0.5844, "step": 17582 }, { "epoch": 0.518038389582075, "grad_norm": 1.2979496651642266, "learning_rate": 5.556792739976249e-06, "loss": 0.3728, "step": 17583 }, { "epoch": 0.5180678520395386, "grad_norm": 1.338801482714781, "learning_rate": 5.556281695836691e-06, "loss": 0.3988, "step": 17584 }, { "epoch": 0.5180973144970022, "grad_norm": 1.3884512531080995, "learning_rate": 5.5557706458129555e-06, "loss": 0.3871, "step": 17585 }, { "epoch": 0.5181267769544657, "grad_norm": 1.5710399773440824, "learning_rate": 5.555259589910448e-06, "loss": 0.4537, "step": 17586 }, { "epoch": 0.5181562394119293, "grad_norm": 1.7213138496184728, "learning_rate": 5.554748528134573e-06, "loss": 0.5, "step": 17587 }, { "epoch": 0.518185701869393, "grad_norm": 1.3872495881129783, "learning_rate": 5.554237460490735e-06, "loss": 0.4219, "step": 17588 }, { "epoch": 0.5182151643268565, "grad_norm": 1.7082205433516033, "learning_rate": 5.553726386984346e-06, "loss": 0.486, "step": 17589 }, { "epoch": 0.51824462678432, "grad_norm": 1.5442741996326819, "learning_rate": 5.553215307620806e-06, "loss": 0.3417, "step": 17590 }, { "epoch": 0.5182740892417836, "grad_norm": 1.4390166062938226, "learning_rate": 5.552704222405523e-06, "loss": 0.3872, "step": 17591 }, { "epoch": 0.5183035516992472, "grad_norm": 1.440684963767258, "learning_rate": 5.5521931313439036e-06, "loss": 0.381, "step": 17592 }, { "epoch": 0.5183330141567108, "grad_norm": 1.7041669663284802, "learning_rate": 5.551682034441354e-06, "loss": 0.4286, "step": 17593 }, { "epoch": 0.5183624766141743, "grad_norm": 1.6452643573469958, "learning_rate": 5.551170931703279e-06, "loss": 0.5355, "step": 17594 }, { "epoch": 0.5183919390716379, "grad_norm": 1.4198528695378665, "learning_rate": 5.550659823135086e-06, "loss": 0.5374, "step": 17595 }, { "epoch": 0.5184214015291015, "grad_norm": 1.3666193070521035, "learning_rate": 5.550148708742183e-06, "loss": 0.3535, "step": 17596 }, { "epoch": 0.5184508639865651, "grad_norm": 1.415181149349909, "learning_rate": 5.549637588529971e-06, "loss": 0.469, "step": 17597 }, { "epoch": 0.5184803264440287, "grad_norm": 1.553055931686497, "learning_rate": 5.549126462503863e-06, "loss": 0.4821, "step": 17598 }, { "epoch": 0.5185097889014922, "grad_norm": 1.4163233409352824, "learning_rate": 5.548615330669263e-06, "loss": 0.3765, "step": 17599 }, { "epoch": 0.5185392513589558, "grad_norm": 1.7332578914692247, "learning_rate": 5.5481041930315746e-06, "loss": 0.4041, "step": 17600 }, { "epoch": 0.5185687138164194, "grad_norm": 1.5355134190613866, "learning_rate": 5.547593049596208e-06, "loss": 0.4486, "step": 17601 }, { "epoch": 0.518598176273883, "grad_norm": 1.3152453547059058, "learning_rate": 5.547081900368571e-06, "loss": 0.3901, "step": 17602 }, { "epoch": 0.5186276387313465, "grad_norm": 1.6127599558553343, "learning_rate": 5.546570745354066e-06, "loss": 0.5339, "step": 17603 }, { "epoch": 0.5186571011888101, "grad_norm": 1.5027480470414287, "learning_rate": 5.546059584558103e-06, "loss": 0.4276, "step": 17604 }, { "epoch": 0.5186865636462737, "grad_norm": 1.64247027275735, "learning_rate": 5.54554841798609e-06, "loss": 0.3785, "step": 17605 }, { "epoch": 0.5187160261037373, "grad_norm": 1.5422205551853458, "learning_rate": 5.54503724564343e-06, "loss": 0.3927, "step": 17606 }, { "epoch": 0.5187454885612008, "grad_norm": 1.5276301415082487, "learning_rate": 5.544526067535531e-06, "loss": 0.542, "step": 17607 }, { "epoch": 0.5187749510186644, "grad_norm": 1.4429757637100629, "learning_rate": 5.544014883667803e-06, "loss": 0.3469, "step": 17608 }, { "epoch": 0.518804413476128, "grad_norm": 1.4569073452511654, "learning_rate": 5.543503694045652e-06, "loss": 0.3966, "step": 17609 }, { "epoch": 0.5188338759335916, "grad_norm": 1.566670517972557, "learning_rate": 5.542992498674483e-06, "loss": 0.4218, "step": 17610 }, { "epoch": 0.5188633383910553, "grad_norm": 1.644954791468065, "learning_rate": 5.542481297559707e-06, "loss": 0.4526, "step": 17611 }, { "epoch": 0.5188928008485187, "grad_norm": 1.5450987577191424, "learning_rate": 5.541970090706726e-06, "loss": 0.5885, "step": 17612 }, { "epoch": 0.5189222633059823, "grad_norm": 1.6479392877470913, "learning_rate": 5.541458878120953e-06, "loss": 0.6432, "step": 17613 }, { "epoch": 0.518951725763446, "grad_norm": 1.6635300664022186, "learning_rate": 5.540947659807792e-06, "loss": 0.4163, "step": 17614 }, { "epoch": 0.5189811882209096, "grad_norm": 1.7682557402530523, "learning_rate": 5.540436435772651e-06, "loss": 0.5238, "step": 17615 }, { "epoch": 0.519010650678373, "grad_norm": 1.3860410518801967, "learning_rate": 5.53992520602094e-06, "loss": 0.3269, "step": 17616 }, { "epoch": 0.5190401131358366, "grad_norm": 1.7837329806650362, "learning_rate": 5.539413970558063e-06, "loss": 0.5734, "step": 17617 }, { "epoch": 0.5190695755933002, "grad_norm": 1.5584354570119945, "learning_rate": 5.538902729389429e-06, "loss": 0.545, "step": 17618 }, { "epoch": 0.5190990380507639, "grad_norm": 1.3790241317190097, "learning_rate": 5.538391482520448e-06, "loss": 0.3668, "step": 17619 }, { "epoch": 0.5191285005082273, "grad_norm": 1.4173094237561288, "learning_rate": 5.537880229956526e-06, "loss": 0.3732, "step": 17620 }, { "epoch": 0.5191579629656909, "grad_norm": 1.5690851244538848, "learning_rate": 5.537368971703069e-06, "loss": 0.5467, "step": 17621 }, { "epoch": 0.5191874254231545, "grad_norm": 1.495790723084601, "learning_rate": 5.536857707765487e-06, "loss": 0.4045, "step": 17622 }, { "epoch": 0.5192168878806181, "grad_norm": 1.519737671586195, "learning_rate": 5.5363464381491895e-06, "loss": 0.521, "step": 17623 }, { "epoch": 0.5192463503380818, "grad_norm": 1.3553625429820073, "learning_rate": 5.535835162859582e-06, "loss": 0.3947, "step": 17624 }, { "epoch": 0.5192758127955452, "grad_norm": 1.477155080362825, "learning_rate": 5.535323881902073e-06, "loss": 0.443, "step": 17625 }, { "epoch": 0.5193052752530088, "grad_norm": 1.656230864646635, "learning_rate": 5.534812595282072e-06, "loss": 0.5524, "step": 17626 }, { "epoch": 0.5193347377104724, "grad_norm": 1.4740457903484319, "learning_rate": 5.5343013030049855e-06, "loss": 0.4791, "step": 17627 }, { "epoch": 0.519364200167936, "grad_norm": 1.521597740459901, "learning_rate": 5.5337900050762214e-06, "loss": 0.4589, "step": 17628 }, { "epoch": 0.5193936626253995, "grad_norm": 1.6421695657849065, "learning_rate": 5.5332787015011925e-06, "loss": 0.5074, "step": 17629 }, { "epoch": 0.5194231250828631, "grad_norm": 1.5650096795526158, "learning_rate": 5.532767392285302e-06, "loss": 0.4877, "step": 17630 }, { "epoch": 0.5194525875403267, "grad_norm": 1.5433827138710272, "learning_rate": 5.53225607743396e-06, "loss": 0.4972, "step": 17631 }, { "epoch": 0.5194820499977904, "grad_norm": 1.4006628325975736, "learning_rate": 5.531744756952576e-06, "loss": 0.3353, "step": 17632 }, { "epoch": 0.5195115124552538, "grad_norm": 1.4600346333752146, "learning_rate": 5.531233430846558e-06, "loss": 0.4625, "step": 17633 }, { "epoch": 0.5195409749127174, "grad_norm": 1.5871253841542523, "learning_rate": 5.530722099121315e-06, "loss": 0.3117, "step": 17634 }, { "epoch": 0.519570437370181, "grad_norm": 1.6897783791810999, "learning_rate": 5.5302107617822565e-06, "loss": 0.5403, "step": 17635 }, { "epoch": 0.5195998998276447, "grad_norm": 1.7313283559168144, "learning_rate": 5.529699418834787e-06, "loss": 0.4832, "step": 17636 }, { "epoch": 0.5196293622851083, "grad_norm": 1.5017719452895664, "learning_rate": 5.529188070284321e-06, "loss": 0.4908, "step": 17637 }, { "epoch": 0.5196588247425717, "grad_norm": 1.513698780588464, "learning_rate": 5.528676716136264e-06, "loss": 0.4177, "step": 17638 }, { "epoch": 0.5196882872000353, "grad_norm": 1.3564416472285319, "learning_rate": 5.528165356396027e-06, "loss": 0.3372, "step": 17639 }, { "epoch": 0.519717749657499, "grad_norm": 1.391008315014259, "learning_rate": 5.527653991069016e-06, "loss": 0.3837, "step": 17640 }, { "epoch": 0.5197472121149626, "grad_norm": 1.7633839341016715, "learning_rate": 5.527142620160644e-06, "loss": 0.4371, "step": 17641 }, { "epoch": 0.519776674572426, "grad_norm": 1.6618542744160476, "learning_rate": 5.526631243676316e-06, "loss": 0.521, "step": 17642 }, { "epoch": 0.5198061370298896, "grad_norm": 1.5568090617801027, "learning_rate": 5.526119861621443e-06, "loss": 0.3845, "step": 17643 }, { "epoch": 0.5198355994873533, "grad_norm": 1.3451200663909348, "learning_rate": 5.525608474001437e-06, "loss": 0.3795, "step": 17644 }, { "epoch": 0.5198650619448169, "grad_norm": 1.359916924944717, "learning_rate": 5.525097080821702e-06, "loss": 0.3181, "step": 17645 }, { "epoch": 0.5198945244022803, "grad_norm": 1.5201083579416736, "learning_rate": 5.524585682087651e-06, "loss": 0.4105, "step": 17646 }, { "epoch": 0.519923986859744, "grad_norm": 1.5126584849806064, "learning_rate": 5.524074277804693e-06, "loss": 0.4278, "step": 17647 }, { "epoch": 0.5199534493172075, "grad_norm": 1.518616675767656, "learning_rate": 5.523562867978236e-06, "loss": 0.473, "step": 17648 }, { "epoch": 0.5199829117746712, "grad_norm": 1.5481868036155018, "learning_rate": 5.5230514526136925e-06, "loss": 0.4714, "step": 17649 }, { "epoch": 0.5200123742321348, "grad_norm": 1.3680974001863206, "learning_rate": 5.522540031716469e-06, "loss": 0.4167, "step": 17650 }, { "epoch": 0.5200418366895982, "grad_norm": 1.5722418126543993, "learning_rate": 5.522028605291976e-06, "loss": 0.3876, "step": 17651 }, { "epoch": 0.5200712991470618, "grad_norm": 1.6358579271551215, "learning_rate": 5.521517173345623e-06, "loss": 0.433, "step": 17652 }, { "epoch": 0.5201007616045255, "grad_norm": 1.6859741522408276, "learning_rate": 5.521005735882822e-06, "loss": 0.3995, "step": 17653 }, { "epoch": 0.520130224061989, "grad_norm": 1.513899099887563, "learning_rate": 5.52049429290898e-06, "loss": 0.4281, "step": 17654 }, { "epoch": 0.5201596865194525, "grad_norm": 1.497327613263346, "learning_rate": 5.519982844429508e-06, "loss": 0.4975, "step": 17655 }, { "epoch": 0.5201891489769161, "grad_norm": 1.2906887532458973, "learning_rate": 5.519471390449817e-06, "loss": 0.4041, "step": 17656 }, { "epoch": 0.5202186114343798, "grad_norm": 1.7166350416997018, "learning_rate": 5.518959930975314e-06, "loss": 0.4699, "step": 17657 }, { "epoch": 0.5202480738918434, "grad_norm": 1.6802273574801017, "learning_rate": 5.518448466011411e-06, "loss": 0.6326, "step": 17658 }, { "epoch": 0.5202775363493068, "grad_norm": 1.454659647638281, "learning_rate": 5.517936995563521e-06, "loss": 0.49, "step": 17659 }, { "epoch": 0.5203069988067704, "grad_norm": 1.5163492058256822, "learning_rate": 5.51742551963705e-06, "loss": 0.3136, "step": 17660 }, { "epoch": 0.520336461264234, "grad_norm": 1.380072541410501, "learning_rate": 5.516914038237407e-06, "loss": 0.396, "step": 17661 }, { "epoch": 0.5203659237216977, "grad_norm": 1.51698977060202, "learning_rate": 5.516402551370008e-06, "loss": 0.4529, "step": 17662 }, { "epoch": 0.5203953861791613, "grad_norm": 1.6619873907147495, "learning_rate": 5.5158910590402585e-06, "loss": 0.402, "step": 17663 }, { "epoch": 0.5204248486366247, "grad_norm": 1.604596558237576, "learning_rate": 5.515379561253571e-06, "loss": 0.5022, "step": 17664 }, { "epoch": 0.5204543110940884, "grad_norm": 1.5096012133627348, "learning_rate": 5.514868058015357e-06, "loss": 0.5561, "step": 17665 }, { "epoch": 0.520483773551552, "grad_norm": 1.393218748195363, "learning_rate": 5.514356549331024e-06, "loss": 0.4536, "step": 17666 }, { "epoch": 0.5205132360090156, "grad_norm": 1.7468985412713816, "learning_rate": 5.513845035205984e-06, "loss": 0.518, "step": 17667 }, { "epoch": 0.520542698466479, "grad_norm": 1.7094674947304394, "learning_rate": 5.513333515645648e-06, "loss": 0.5744, "step": 17668 }, { "epoch": 0.5205721609239427, "grad_norm": 1.6387122438827235, "learning_rate": 5.512821990655427e-06, "loss": 0.5402, "step": 17669 }, { "epoch": 0.5206016233814063, "grad_norm": 1.2147997505181836, "learning_rate": 5.512310460240731e-06, "loss": 0.2485, "step": 17670 }, { "epoch": 0.5206310858388699, "grad_norm": 1.6853039866095498, "learning_rate": 5.511798924406972e-06, "loss": 0.4949, "step": 17671 }, { "epoch": 0.5206605482963333, "grad_norm": 1.474482333141628, "learning_rate": 5.511287383159558e-06, "loss": 0.4651, "step": 17672 }, { "epoch": 0.520690010753797, "grad_norm": 1.5808455096638958, "learning_rate": 5.510775836503902e-06, "loss": 0.5087, "step": 17673 }, { "epoch": 0.5207194732112606, "grad_norm": 1.3222917767315563, "learning_rate": 5.510264284445418e-06, "loss": 0.3215, "step": 17674 }, { "epoch": 0.5207489356687242, "grad_norm": 1.4737255067392767, "learning_rate": 5.5097527269895115e-06, "loss": 0.5499, "step": 17675 }, { "epoch": 0.5207783981261878, "grad_norm": 1.5492694266928544, "learning_rate": 5.5092411641415955e-06, "loss": 0.5797, "step": 17676 }, { "epoch": 0.5208078605836512, "grad_norm": 1.5111819500532278, "learning_rate": 5.508729595907083e-06, "loss": 0.3933, "step": 17677 }, { "epoch": 0.5208373230411149, "grad_norm": 1.6565039999666429, "learning_rate": 5.508218022291383e-06, "loss": 0.4986, "step": 17678 }, { "epoch": 0.5208667854985785, "grad_norm": 1.4482053333012828, "learning_rate": 5.5077064432999085e-06, "loss": 0.4508, "step": 17679 }, { "epoch": 0.5208962479560421, "grad_norm": 1.6143070631686378, "learning_rate": 5.50719485893807e-06, "loss": 0.515, "step": 17680 }, { "epoch": 0.5209257104135055, "grad_norm": 1.5365817205627372, "learning_rate": 5.5066832692112775e-06, "loss": 0.4538, "step": 17681 }, { "epoch": 0.5209551728709692, "grad_norm": 1.5674735296941658, "learning_rate": 5.5061716741249436e-06, "loss": 0.5263, "step": 17682 }, { "epoch": 0.5209846353284328, "grad_norm": 1.6541480016699264, "learning_rate": 5.505660073684481e-06, "loss": 0.4881, "step": 17683 }, { "epoch": 0.5210140977858964, "grad_norm": 1.508072212652572, "learning_rate": 5.505148467895302e-06, "loss": 0.5254, "step": 17684 }, { "epoch": 0.5210435602433598, "grad_norm": 1.4931682187105169, "learning_rate": 5.504636856762813e-06, "loss": 0.4374, "step": 17685 }, { "epoch": 0.5210730227008235, "grad_norm": 1.333960843635451, "learning_rate": 5.504125240292432e-06, "loss": 0.3798, "step": 17686 }, { "epoch": 0.521102485158287, "grad_norm": 1.3750209842408894, "learning_rate": 5.503613618489566e-06, "loss": 0.3862, "step": 17687 }, { "epoch": 0.5211319476157507, "grad_norm": 1.5489681005630527, "learning_rate": 5.503101991359628e-06, "loss": 0.3782, "step": 17688 }, { "epoch": 0.5211614100732143, "grad_norm": 1.6724309852038894, "learning_rate": 5.502590358908033e-06, "loss": 0.6134, "step": 17689 }, { "epoch": 0.5211908725306778, "grad_norm": 1.4271582611538312, "learning_rate": 5.5020787211401895e-06, "loss": 0.4306, "step": 17690 }, { "epoch": 0.5212203349881414, "grad_norm": 1.3472569335129816, "learning_rate": 5.501567078061509e-06, "loss": 0.3553, "step": 17691 }, { "epoch": 0.521249797445605, "grad_norm": 1.5716086408897283, "learning_rate": 5.501055429677407e-06, "loss": 0.5266, "step": 17692 }, { "epoch": 0.5212792599030686, "grad_norm": 1.8157193976982784, "learning_rate": 5.500543775993291e-06, "loss": 0.4195, "step": 17693 }, { "epoch": 0.521308722360532, "grad_norm": 1.320248420748768, "learning_rate": 5.500032117014577e-06, "loss": 0.3347, "step": 17694 }, { "epoch": 0.5213381848179957, "grad_norm": 1.4937000690512634, "learning_rate": 5.499520452746674e-06, "loss": 0.4644, "step": 17695 }, { "epoch": 0.5213676472754593, "grad_norm": 1.6035439368762363, "learning_rate": 5.4990087831949976e-06, "loss": 0.469, "step": 17696 }, { "epoch": 0.5213971097329229, "grad_norm": 1.579910144525073, "learning_rate": 5.498497108364958e-06, "loss": 0.6354, "step": 17697 }, { "epoch": 0.5214265721903864, "grad_norm": 1.499248723320521, "learning_rate": 5.4979854282619684e-06, "loss": 0.4171, "step": 17698 }, { "epoch": 0.52145603464785, "grad_norm": 1.539697490905147, "learning_rate": 5.4974737428914405e-06, "loss": 0.5274, "step": 17699 }, { "epoch": 0.5214854971053136, "grad_norm": 1.5324201598000187, "learning_rate": 5.4969620522587855e-06, "loss": 0.4484, "step": 17700 }, { "epoch": 0.5215149595627772, "grad_norm": 1.526228868745435, "learning_rate": 5.49645035636942e-06, "loss": 0.5367, "step": 17701 }, { "epoch": 0.5215444220202408, "grad_norm": 1.5003428237254643, "learning_rate": 5.495938655228751e-06, "loss": 0.4796, "step": 17702 }, { "epoch": 0.5215738844777043, "grad_norm": 1.4931808038818795, "learning_rate": 5.495426948842194e-06, "loss": 0.5118, "step": 17703 }, { "epoch": 0.5216033469351679, "grad_norm": 1.5669416281871287, "learning_rate": 5.494915237215164e-06, "loss": 0.4902, "step": 17704 }, { "epoch": 0.5216328093926315, "grad_norm": 1.5423007388576795, "learning_rate": 5.494403520353071e-06, "loss": 0.4483, "step": 17705 }, { "epoch": 0.5216622718500951, "grad_norm": 1.51936106880408, "learning_rate": 5.493891798261327e-06, "loss": 0.4623, "step": 17706 }, { "epoch": 0.5216917343075586, "grad_norm": 1.4200600137098895, "learning_rate": 5.493380070945348e-06, "loss": 0.3872, "step": 17707 }, { "epoch": 0.5217211967650222, "grad_norm": 1.4107060205770277, "learning_rate": 5.492868338410543e-06, "loss": 0.4768, "step": 17708 }, { "epoch": 0.5217506592224858, "grad_norm": 1.4880909583082749, "learning_rate": 5.4923566006623275e-06, "loss": 0.4403, "step": 17709 }, { "epoch": 0.5217801216799494, "grad_norm": 1.609192068254345, "learning_rate": 5.491844857706115e-06, "loss": 0.4927, "step": 17710 }, { "epoch": 0.5218095841374129, "grad_norm": 1.6192180753914225, "learning_rate": 5.491333109547316e-06, "loss": 0.4869, "step": 17711 }, { "epoch": 0.5218390465948765, "grad_norm": 1.5761069536247088, "learning_rate": 5.4908213561913456e-06, "loss": 0.3956, "step": 17712 }, { "epoch": 0.5218685090523401, "grad_norm": 1.5435443847632637, "learning_rate": 5.490309597643617e-06, "loss": 0.4891, "step": 17713 }, { "epoch": 0.5218979715098037, "grad_norm": 1.2636045722707083, "learning_rate": 5.489797833909543e-06, "loss": 0.3407, "step": 17714 }, { "epoch": 0.5219274339672673, "grad_norm": 1.4039051701225755, "learning_rate": 5.489286064994536e-06, "loss": 0.46, "step": 17715 }, { "epoch": 0.5219568964247308, "grad_norm": 1.3803321330490208, "learning_rate": 5.48877429090401e-06, "loss": 0.4583, "step": 17716 }, { "epoch": 0.5219863588821944, "grad_norm": 1.4157404259018762, "learning_rate": 5.488262511643379e-06, "loss": 0.4468, "step": 17717 }, { "epoch": 0.522015821339658, "grad_norm": 1.3536410376013563, "learning_rate": 5.487750727218055e-06, "loss": 0.3661, "step": 17718 }, { "epoch": 0.5220452837971216, "grad_norm": 1.543242475767672, "learning_rate": 5.487238937633454e-06, "loss": 0.3745, "step": 17719 }, { "epoch": 0.522074746254585, "grad_norm": 1.7745635611043007, "learning_rate": 5.4867271428949885e-06, "loss": 0.5329, "step": 17720 }, { "epoch": 0.5221042087120487, "grad_norm": 1.470878019390369, "learning_rate": 5.48621534300807e-06, "loss": 0.3601, "step": 17721 }, { "epoch": 0.5221336711695123, "grad_norm": 1.6645886930299152, "learning_rate": 5.485703537978114e-06, "loss": 0.4968, "step": 17722 }, { "epoch": 0.5221631336269759, "grad_norm": 1.3944856622689001, "learning_rate": 5.485191727810534e-06, "loss": 0.4463, "step": 17723 }, { "epoch": 0.5221925960844394, "grad_norm": 1.3898321369913809, "learning_rate": 5.484679912510744e-06, "loss": 0.3496, "step": 17724 }, { "epoch": 0.522222058541903, "grad_norm": 1.4771307193878263, "learning_rate": 5.484168092084158e-06, "loss": 0.5031, "step": 17725 }, { "epoch": 0.5222515209993666, "grad_norm": 1.5767437446413963, "learning_rate": 5.483656266536189e-06, "loss": 0.4441, "step": 17726 }, { "epoch": 0.5222809834568302, "grad_norm": 1.3585065117199921, "learning_rate": 5.48314443587225e-06, "loss": 0.4389, "step": 17727 }, { "epoch": 0.5223104459142938, "grad_norm": 1.7302663422379607, "learning_rate": 5.482632600097759e-06, "loss": 0.5738, "step": 17728 }, { "epoch": 0.5223399083717573, "grad_norm": 1.7472947349738235, "learning_rate": 5.482120759218128e-06, "loss": 0.5828, "step": 17729 }, { "epoch": 0.5223693708292209, "grad_norm": 1.4205794819115094, "learning_rate": 5.481608913238767e-06, "loss": 0.4953, "step": 17730 }, { "epoch": 0.5223988332866845, "grad_norm": 1.3552181274025952, "learning_rate": 5.481097062165096e-06, "loss": 0.3585, "step": 17731 }, { "epoch": 0.5224282957441481, "grad_norm": 1.2655151327989518, "learning_rate": 5.480585206002526e-06, "loss": 0.3045, "step": 17732 }, { "epoch": 0.5224577582016116, "grad_norm": 1.5360124096677341, "learning_rate": 5.4800733447564726e-06, "loss": 0.4644, "step": 17733 }, { "epoch": 0.5224872206590752, "grad_norm": 1.5271831524973276, "learning_rate": 5.4795614784323504e-06, "loss": 0.3801, "step": 17734 }, { "epoch": 0.5225166831165388, "grad_norm": 1.5937967352319786, "learning_rate": 5.4790496070355725e-06, "loss": 0.4674, "step": 17735 }, { "epoch": 0.5225461455740024, "grad_norm": 1.5278085091123506, "learning_rate": 5.478537730571554e-06, "loss": 0.4333, "step": 17736 }, { "epoch": 0.5225756080314659, "grad_norm": 1.5706325690274812, "learning_rate": 5.478025849045708e-06, "loss": 0.4774, "step": 17737 }, { "epoch": 0.5226050704889295, "grad_norm": 1.5324623438069258, "learning_rate": 5.4775139624634525e-06, "loss": 0.5186, "step": 17738 }, { "epoch": 0.5226345329463931, "grad_norm": 1.5729672874646505, "learning_rate": 5.4770020708301985e-06, "loss": 0.4005, "step": 17739 }, { "epoch": 0.5226639954038567, "grad_norm": 1.7054587878923426, "learning_rate": 5.476490174151361e-06, "loss": 0.4785, "step": 17740 }, { "epoch": 0.5226934578613203, "grad_norm": 1.5211484231495085, "learning_rate": 5.4759782724323575e-06, "loss": 0.4771, "step": 17741 }, { "epoch": 0.5227229203187838, "grad_norm": 1.4323655372330553, "learning_rate": 5.475466365678599e-06, "loss": 0.3586, "step": 17742 }, { "epoch": 0.5227523827762474, "grad_norm": 1.5793950853288596, "learning_rate": 5.474954453895503e-06, "loss": 0.4961, "step": 17743 }, { "epoch": 0.522781845233711, "grad_norm": 1.5415856810514896, "learning_rate": 5.4744425370884845e-06, "loss": 0.4365, "step": 17744 }, { "epoch": 0.5228113076911746, "grad_norm": 1.73650090215396, "learning_rate": 5.473930615262957e-06, "loss": 0.5658, "step": 17745 }, { "epoch": 0.5228407701486381, "grad_norm": 1.6035127296902212, "learning_rate": 5.473418688424335e-06, "loss": 0.4834, "step": 17746 }, { "epoch": 0.5228702326061017, "grad_norm": 1.5043969608512828, "learning_rate": 5.472906756578035e-06, "loss": 0.4794, "step": 17747 }, { "epoch": 0.5228996950635653, "grad_norm": 1.5308353921270228, "learning_rate": 5.472394819729471e-06, "loss": 0.4721, "step": 17748 }, { "epoch": 0.5229291575210289, "grad_norm": 1.4161302422893947, "learning_rate": 5.471882877884059e-06, "loss": 0.509, "step": 17749 }, { "epoch": 0.5229586199784924, "grad_norm": 1.5234557899820345, "learning_rate": 5.471370931047215e-06, "loss": 0.4367, "step": 17750 }, { "epoch": 0.522988082435956, "grad_norm": 1.6859451925422275, "learning_rate": 5.47085897922435e-06, "loss": 0.4748, "step": 17751 }, { "epoch": 0.5230175448934196, "grad_norm": 1.4771932045513443, "learning_rate": 5.470347022420884e-06, "loss": 0.4818, "step": 17752 }, { "epoch": 0.5230470073508832, "grad_norm": 1.4464022596855355, "learning_rate": 5.469835060642232e-06, "loss": 0.4321, "step": 17753 }, { "epoch": 0.5230764698083468, "grad_norm": 1.3579275910832684, "learning_rate": 5.469323093893805e-06, "loss": 0.3779, "step": 17754 }, { "epoch": 0.5231059322658103, "grad_norm": 1.4913491669197072, "learning_rate": 5.468811122181022e-06, "loss": 0.3714, "step": 17755 }, { "epoch": 0.5231353947232739, "grad_norm": 1.5745229873366395, "learning_rate": 5.468299145509299e-06, "loss": 0.4978, "step": 17756 }, { "epoch": 0.5231648571807375, "grad_norm": 1.5954285903320609, "learning_rate": 5.46778716388405e-06, "loss": 0.5776, "step": 17757 }, { "epoch": 0.5231943196382011, "grad_norm": 1.5156912169418941, "learning_rate": 5.46727517731069e-06, "loss": 0.381, "step": 17758 }, { "epoch": 0.5232237820956646, "grad_norm": 1.4809720791104397, "learning_rate": 5.466763185794638e-06, "loss": 0.4079, "step": 17759 }, { "epoch": 0.5232532445531282, "grad_norm": 1.675485252093878, "learning_rate": 5.466251189341305e-06, "loss": 0.5158, "step": 17760 }, { "epoch": 0.5232827070105918, "grad_norm": 1.397882509391239, "learning_rate": 5.46573918795611e-06, "loss": 0.3905, "step": 17761 }, { "epoch": 0.5233121694680554, "grad_norm": 1.5878221205485155, "learning_rate": 5.465227181644467e-06, "loss": 0.4365, "step": 17762 }, { "epoch": 0.5233416319255189, "grad_norm": 1.5244998849183213, "learning_rate": 5.464715170411793e-06, "loss": 0.554, "step": 17763 }, { "epoch": 0.5233710943829825, "grad_norm": 1.458172079038, "learning_rate": 5.464203154263505e-06, "loss": 0.447, "step": 17764 }, { "epoch": 0.5234005568404461, "grad_norm": 1.4357035624866634, "learning_rate": 5.463691133205016e-06, "loss": 0.5365, "step": 17765 }, { "epoch": 0.5234300192979097, "grad_norm": 1.3602154494270047, "learning_rate": 5.4631791072417425e-06, "loss": 0.4479, "step": 17766 }, { "epoch": 0.5234594817553733, "grad_norm": 1.5956554371733307, "learning_rate": 5.462667076379102e-06, "loss": 0.386, "step": 17767 }, { "epoch": 0.5234889442128368, "grad_norm": 1.3008770480941565, "learning_rate": 5.462155040622512e-06, "loss": 0.4058, "step": 17768 }, { "epoch": 0.5235184066703004, "grad_norm": 1.7248439676624323, "learning_rate": 5.4616429999773856e-06, "loss": 0.5044, "step": 17769 }, { "epoch": 0.523547869127764, "grad_norm": 1.4471244901170175, "learning_rate": 5.461130954449141e-06, "loss": 0.4226, "step": 17770 }, { "epoch": 0.5235773315852276, "grad_norm": 1.6943784068789371, "learning_rate": 5.4606189040431934e-06, "loss": 0.4135, "step": 17771 }, { "epoch": 0.5236067940426911, "grad_norm": 1.3723575346280468, "learning_rate": 5.460106848764958e-06, "loss": 0.468, "step": 17772 }, { "epoch": 0.5236362565001547, "grad_norm": 1.6100166067779806, "learning_rate": 5.459594788619853e-06, "loss": 0.3721, "step": 17773 }, { "epoch": 0.5236657189576183, "grad_norm": 1.53233595178162, "learning_rate": 5.459082723613297e-06, "loss": 0.426, "step": 17774 }, { "epoch": 0.5236951814150819, "grad_norm": 1.697024318040585, "learning_rate": 5.4585706537507015e-06, "loss": 0.4255, "step": 17775 }, { "epoch": 0.5237246438725454, "grad_norm": 1.6639939491397922, "learning_rate": 5.4580585790374855e-06, "loss": 0.4539, "step": 17776 }, { "epoch": 0.523754106330009, "grad_norm": 1.5752236099722248, "learning_rate": 5.457546499479065e-06, "loss": 0.5238, "step": 17777 }, { "epoch": 0.5237835687874726, "grad_norm": 1.4671311552755713, "learning_rate": 5.457034415080857e-06, "loss": 0.4114, "step": 17778 }, { "epoch": 0.5238130312449362, "grad_norm": 1.3990730503337137, "learning_rate": 5.45652232584828e-06, "loss": 0.2954, "step": 17779 }, { "epoch": 0.5238424937023998, "grad_norm": 1.2687296726534347, "learning_rate": 5.456010231786749e-06, "loss": 0.3072, "step": 17780 }, { "epoch": 0.5238719561598633, "grad_norm": 1.60164134798938, "learning_rate": 5.4554981329016785e-06, "loss": 0.4822, "step": 17781 }, { "epoch": 0.5239014186173269, "grad_norm": 1.514750281970213, "learning_rate": 5.454986029198489e-06, "loss": 0.5424, "step": 17782 }, { "epoch": 0.5239308810747905, "grad_norm": 1.4064658404996955, "learning_rate": 5.454473920682596e-06, "loss": 0.527, "step": 17783 }, { "epoch": 0.5239603435322541, "grad_norm": 1.4448032992407582, "learning_rate": 5.453961807359417e-06, "loss": 0.463, "step": 17784 }, { "epoch": 0.5239898059897176, "grad_norm": 1.4833557054181925, "learning_rate": 5.453449689234367e-06, "loss": 0.47, "step": 17785 }, { "epoch": 0.5240192684471812, "grad_norm": 1.5126006650656438, "learning_rate": 5.452937566312864e-06, "loss": 0.4569, "step": 17786 }, { "epoch": 0.5240487309046448, "grad_norm": 1.3690724869759276, "learning_rate": 5.452425438600328e-06, "loss": 0.4028, "step": 17787 }, { "epoch": 0.5240781933621084, "grad_norm": 1.4002841585222103, "learning_rate": 5.45191330610217e-06, "loss": 0.4663, "step": 17788 }, { "epoch": 0.5241076558195719, "grad_norm": 1.5274185107244433, "learning_rate": 5.451401168823814e-06, "loss": 0.4915, "step": 17789 }, { "epoch": 0.5241371182770355, "grad_norm": 1.4732445309258402, "learning_rate": 5.4508890267706725e-06, "loss": 0.4692, "step": 17790 }, { "epoch": 0.5241665807344991, "grad_norm": 1.5820935944603998, "learning_rate": 5.450376879948164e-06, "loss": 0.5984, "step": 17791 }, { "epoch": 0.5241960431919627, "grad_norm": 1.4587333299926006, "learning_rate": 5.449864728361707e-06, "loss": 0.4405, "step": 17792 }, { "epoch": 0.5242255056494263, "grad_norm": 1.6397359842591057, "learning_rate": 5.449352572016717e-06, "loss": 0.5359, "step": 17793 }, { "epoch": 0.5242549681068898, "grad_norm": 1.4922521059822351, "learning_rate": 5.448840410918613e-06, "loss": 0.5523, "step": 17794 }, { "epoch": 0.5242844305643534, "grad_norm": 1.761883993764897, "learning_rate": 5.448328245072813e-06, "loss": 0.4408, "step": 17795 }, { "epoch": 0.524313893021817, "grad_norm": 1.648836058505666, "learning_rate": 5.447816074484732e-06, "loss": 0.4741, "step": 17796 }, { "epoch": 0.5243433554792806, "grad_norm": 1.642057406551977, "learning_rate": 5.4473038991597896e-06, "loss": 0.4162, "step": 17797 }, { "epoch": 0.5243728179367441, "grad_norm": 1.5178990183530103, "learning_rate": 5.446791719103403e-06, "loss": 0.5252, "step": 17798 }, { "epoch": 0.5244022803942077, "grad_norm": 1.4991962965550878, "learning_rate": 5.446279534320989e-06, "loss": 0.4521, "step": 17799 }, { "epoch": 0.5244317428516713, "grad_norm": 1.3112979593779535, "learning_rate": 5.445767344817967e-06, "loss": 0.4282, "step": 17800 }, { "epoch": 0.5244612053091349, "grad_norm": 1.509974832118399, "learning_rate": 5.445255150599753e-06, "loss": 0.4239, "step": 17801 }, { "epoch": 0.5244906677665984, "grad_norm": 1.3643123503468737, "learning_rate": 5.444742951671767e-06, "loss": 0.3517, "step": 17802 }, { "epoch": 0.524520130224062, "grad_norm": 1.5130633809407688, "learning_rate": 5.444230748039423e-06, "loss": 0.4979, "step": 17803 }, { "epoch": 0.5245495926815256, "grad_norm": 1.6503564926989907, "learning_rate": 5.443718539708144e-06, "loss": 0.5201, "step": 17804 }, { "epoch": 0.5245790551389892, "grad_norm": 1.4424841485057276, "learning_rate": 5.443206326683346e-06, "loss": 0.3797, "step": 17805 }, { "epoch": 0.5246085175964528, "grad_norm": 1.6731330124653256, "learning_rate": 5.442694108970445e-06, "loss": 0.3095, "step": 17806 }, { "epoch": 0.5246379800539163, "grad_norm": 1.494148553645718, "learning_rate": 5.442181886574861e-06, "loss": 0.5866, "step": 17807 }, { "epoch": 0.5246674425113799, "grad_norm": 1.5758880885764825, "learning_rate": 5.441669659502011e-06, "loss": 0.5297, "step": 17808 }, { "epoch": 0.5246969049688435, "grad_norm": 1.556233106441655, "learning_rate": 5.4411574277573156e-06, "loss": 0.424, "step": 17809 }, { "epoch": 0.5247263674263071, "grad_norm": 1.4802610767664546, "learning_rate": 5.440645191346191e-06, "loss": 0.531, "step": 17810 }, { "epoch": 0.5247558298837706, "grad_norm": 1.6996904224119092, "learning_rate": 5.440132950274054e-06, "loss": 0.3917, "step": 17811 }, { "epoch": 0.5247852923412342, "grad_norm": 1.519828268410606, "learning_rate": 5.439620704546325e-06, "loss": 0.3853, "step": 17812 }, { "epoch": 0.5248147547986978, "grad_norm": 1.6526517851417375, "learning_rate": 5.4391084541684246e-06, "loss": 0.5388, "step": 17813 }, { "epoch": 0.5248442172561614, "grad_norm": 1.649715646241813, "learning_rate": 5.438596199145768e-06, "loss": 0.5373, "step": 17814 }, { "epoch": 0.5248736797136249, "grad_norm": 1.649362962960398, "learning_rate": 5.4380839394837735e-06, "loss": 0.4389, "step": 17815 }, { "epoch": 0.5249031421710885, "grad_norm": 1.4953229925958247, "learning_rate": 5.437571675187862e-06, "loss": 0.5822, "step": 17816 }, { "epoch": 0.5249326046285521, "grad_norm": 1.5028338095349916, "learning_rate": 5.437059406263449e-06, "loss": 0.4367, "step": 17817 }, { "epoch": 0.5249620670860157, "grad_norm": 1.3988909166642205, "learning_rate": 5.4365471327159544e-06, "loss": 0.5026, "step": 17818 }, { "epoch": 0.5249915295434793, "grad_norm": 1.643695363542432, "learning_rate": 5.436034854550799e-06, "loss": 0.2727, "step": 17819 }, { "epoch": 0.5250209920009428, "grad_norm": 1.4861553777726493, "learning_rate": 5.435522571773401e-06, "loss": 0.5105, "step": 17820 }, { "epoch": 0.5250504544584064, "grad_norm": 1.3937452749240575, "learning_rate": 5.435010284389175e-06, "loss": 0.4432, "step": 17821 }, { "epoch": 0.52507991691587, "grad_norm": 1.4692991237243793, "learning_rate": 5.434497992403545e-06, "loss": 0.39, "step": 17822 }, { "epoch": 0.5251093793733336, "grad_norm": 1.4953887040314109, "learning_rate": 5.433985695821925e-06, "loss": 0.4682, "step": 17823 }, { "epoch": 0.5251388418307971, "grad_norm": 1.3653369010250473, "learning_rate": 5.433473394649739e-06, "loss": 0.3791, "step": 17824 }, { "epoch": 0.5251683042882607, "grad_norm": 1.42303337859859, "learning_rate": 5.432961088892402e-06, "loss": 0.3891, "step": 17825 }, { "epoch": 0.5251977667457243, "grad_norm": 1.5380710821302888, "learning_rate": 5.4324487785553355e-06, "loss": 0.5041, "step": 17826 }, { "epoch": 0.5252272292031879, "grad_norm": 1.5464168485221894, "learning_rate": 5.431936463643958e-06, "loss": 0.6073, "step": 17827 }, { "epoch": 0.5252566916606514, "grad_norm": 1.3566153012213276, "learning_rate": 5.431424144163687e-06, "loss": 0.3921, "step": 17828 }, { "epoch": 0.525286154118115, "grad_norm": 1.7165664258510702, "learning_rate": 5.430911820119944e-06, "loss": 0.4404, "step": 17829 }, { "epoch": 0.5253156165755786, "grad_norm": 1.7288368490824089, "learning_rate": 5.430399491518147e-06, "loss": 0.5263, "step": 17830 }, { "epoch": 0.5253450790330422, "grad_norm": 1.5220339493677484, "learning_rate": 5.429887158363714e-06, "loss": 0.506, "step": 17831 }, { "epoch": 0.5253745414905058, "grad_norm": 1.5700568314069991, "learning_rate": 5.429374820662067e-06, "loss": 0.511, "step": 17832 }, { "epoch": 0.5254040039479693, "grad_norm": 1.497907919402989, "learning_rate": 5.428862478418623e-06, "loss": 0.291, "step": 17833 }, { "epoch": 0.5254334664054329, "grad_norm": 1.3563496295192778, "learning_rate": 5.4283501316388035e-06, "loss": 0.4274, "step": 17834 }, { "epoch": 0.5254629288628965, "grad_norm": 1.5996957178371998, "learning_rate": 5.427837780328027e-06, "loss": 0.5056, "step": 17835 }, { "epoch": 0.5254923913203601, "grad_norm": 1.3487685598280301, "learning_rate": 5.4273254244917115e-06, "loss": 0.3005, "step": 17836 }, { "epoch": 0.5255218537778236, "grad_norm": 1.5706999788516487, "learning_rate": 5.426813064135279e-06, "loss": 0.5094, "step": 17837 }, { "epoch": 0.5255513162352872, "grad_norm": 1.5438974404185848, "learning_rate": 5.4263006992641465e-06, "loss": 0.5019, "step": 17838 }, { "epoch": 0.5255807786927508, "grad_norm": 1.4434779740063854, "learning_rate": 5.425788329883737e-06, "loss": 0.4109, "step": 17839 }, { "epoch": 0.5256102411502144, "grad_norm": 1.4195033010549867, "learning_rate": 5.425275955999467e-06, "loss": 0.3599, "step": 17840 }, { "epoch": 0.5256397036076779, "grad_norm": 1.528969444576945, "learning_rate": 5.424763577616758e-06, "loss": 0.4455, "step": 17841 }, { "epoch": 0.5256691660651415, "grad_norm": 1.4543308759392346, "learning_rate": 5.424251194741029e-06, "loss": 0.383, "step": 17842 }, { "epoch": 0.5256986285226051, "grad_norm": 1.37024483328228, "learning_rate": 5.423738807377702e-06, "loss": 0.4009, "step": 17843 }, { "epoch": 0.5257280909800687, "grad_norm": 1.3115597170373112, "learning_rate": 5.423226415532193e-06, "loss": 0.3394, "step": 17844 }, { "epoch": 0.5257575534375323, "grad_norm": 1.5545689429336387, "learning_rate": 5.422714019209925e-06, "loss": 0.5408, "step": 17845 }, { "epoch": 0.5257870158949958, "grad_norm": 1.4736143797982741, "learning_rate": 5.422201618416315e-06, "loss": 0.4898, "step": 17846 }, { "epoch": 0.5258164783524594, "grad_norm": 1.6089718501600823, "learning_rate": 5.421689213156787e-06, "loss": 0.3812, "step": 17847 }, { "epoch": 0.525845940809923, "grad_norm": 1.5608288897597902, "learning_rate": 5.421176803436759e-06, "loss": 0.5173, "step": 17848 }, { "epoch": 0.5258754032673866, "grad_norm": 1.755458079957273, "learning_rate": 5.420664389261651e-06, "loss": 0.5835, "step": 17849 }, { "epoch": 0.5259048657248501, "grad_norm": 1.6366799305264728, "learning_rate": 5.420151970636882e-06, "loss": 0.6343, "step": 17850 }, { "epoch": 0.5259343281823137, "grad_norm": 1.49126751504067, "learning_rate": 5.419639547567874e-06, "loss": 0.4752, "step": 17851 }, { "epoch": 0.5259637906397773, "grad_norm": 1.4731417648487826, "learning_rate": 5.419127120060047e-06, "loss": 0.4455, "step": 17852 }, { "epoch": 0.5259932530972409, "grad_norm": 1.451569463400117, "learning_rate": 5.418614688118822e-06, "loss": 0.3819, "step": 17853 }, { "epoch": 0.5260227155547044, "grad_norm": 1.311826087642587, "learning_rate": 5.418102251749617e-06, "loss": 0.3483, "step": 17854 }, { "epoch": 0.526052178012168, "grad_norm": 1.7516595628098117, "learning_rate": 5.417589810957854e-06, "loss": 0.5309, "step": 17855 }, { "epoch": 0.5260816404696316, "grad_norm": 1.6206221741589277, "learning_rate": 5.417077365748954e-06, "loss": 0.4646, "step": 17856 }, { "epoch": 0.5261111029270952, "grad_norm": 1.4439990751428544, "learning_rate": 5.416564916128337e-06, "loss": 0.4913, "step": 17857 }, { "epoch": 0.5261405653845588, "grad_norm": 1.8798109757043253, "learning_rate": 5.416052462101422e-06, "loss": 0.658, "step": 17858 }, { "epoch": 0.5261700278420223, "grad_norm": 1.5331447161295877, "learning_rate": 5.415540003673633e-06, "loss": 0.4823, "step": 17859 }, { "epoch": 0.5261994902994859, "grad_norm": 1.4489673011482163, "learning_rate": 5.415027540850386e-06, "loss": 0.353, "step": 17860 }, { "epoch": 0.5262289527569495, "grad_norm": 1.3549456137233922, "learning_rate": 5.414515073637105e-06, "loss": 0.4617, "step": 17861 }, { "epoch": 0.5262584152144131, "grad_norm": 1.5747833317444881, "learning_rate": 5.41400260203921e-06, "loss": 0.3907, "step": 17862 }, { "epoch": 0.5262878776718766, "grad_norm": 1.5983696225897635, "learning_rate": 5.413490126062122e-06, "loss": 0.5038, "step": 17863 }, { "epoch": 0.5263173401293402, "grad_norm": 1.8176587033382035, "learning_rate": 5.41297764571126e-06, "loss": 0.4138, "step": 17864 }, { "epoch": 0.5263468025868038, "grad_norm": 1.4466251806376271, "learning_rate": 5.4124651609920485e-06, "loss": 0.4318, "step": 17865 }, { "epoch": 0.5263762650442674, "grad_norm": 1.5632530705532295, "learning_rate": 5.411952671909905e-06, "loss": 0.5229, "step": 17866 }, { "epoch": 0.5264057275017309, "grad_norm": 1.2705296220826021, "learning_rate": 5.411440178470251e-06, "loss": 0.3785, "step": 17867 }, { "epoch": 0.5264351899591945, "grad_norm": 1.7009141030421122, "learning_rate": 5.41092768067851e-06, "loss": 0.6679, "step": 17868 }, { "epoch": 0.5264646524166581, "grad_norm": 1.5151947682234703, "learning_rate": 5.410415178540099e-06, "loss": 0.4675, "step": 17869 }, { "epoch": 0.5264941148741217, "grad_norm": 1.6492769089960826, "learning_rate": 5.4099026720604435e-06, "loss": 0.5306, "step": 17870 }, { "epoch": 0.5265235773315853, "grad_norm": 1.4237454675224237, "learning_rate": 5.4093901612449615e-06, "loss": 0.4244, "step": 17871 }, { "epoch": 0.5265530397890488, "grad_norm": 1.6579714179986593, "learning_rate": 5.4088776460990745e-06, "loss": 0.521, "step": 17872 }, { "epoch": 0.5265825022465124, "grad_norm": 1.246320893183828, "learning_rate": 5.408365126628206e-06, "loss": 0.2732, "step": 17873 }, { "epoch": 0.526611964703976, "grad_norm": 1.4544509308447244, "learning_rate": 5.407852602837774e-06, "loss": 0.5166, "step": 17874 }, { "epoch": 0.5266414271614396, "grad_norm": 1.4335089547944257, "learning_rate": 5.407340074733202e-06, "loss": 0.3198, "step": 17875 }, { "epoch": 0.5266708896189031, "grad_norm": 1.4805096252693992, "learning_rate": 5.40682754231991e-06, "loss": 0.4142, "step": 17876 }, { "epoch": 0.5267003520763667, "grad_norm": 1.4342087390893496, "learning_rate": 5.4063150056033224e-06, "loss": 0.3129, "step": 17877 }, { "epoch": 0.5267298145338303, "grad_norm": 1.6094327865735332, "learning_rate": 5.405802464588858e-06, "loss": 0.5745, "step": 17878 }, { "epoch": 0.5267592769912939, "grad_norm": 1.7325969546829385, "learning_rate": 5.4052899192819376e-06, "loss": 0.4977, "step": 17879 }, { "epoch": 0.5267887394487574, "grad_norm": 1.719258175076552, "learning_rate": 5.404777369687985e-06, "loss": 0.6106, "step": 17880 }, { "epoch": 0.526818201906221, "grad_norm": 1.4466791912584438, "learning_rate": 5.404264815812419e-06, "loss": 0.3244, "step": 17881 }, { "epoch": 0.5268476643636846, "grad_norm": 1.4623635575452993, "learning_rate": 5.4037522576606634e-06, "loss": 0.4625, "step": 17882 }, { "epoch": 0.5268771268211482, "grad_norm": 1.5819004442517381, "learning_rate": 5.403239695238142e-06, "loss": 0.4674, "step": 17883 }, { "epoch": 0.5269065892786118, "grad_norm": 1.3669742086991141, "learning_rate": 5.402727128550272e-06, "loss": 0.3781, "step": 17884 }, { "epoch": 0.5269360517360753, "grad_norm": 1.444250906162302, "learning_rate": 5.402214557602477e-06, "loss": 0.4698, "step": 17885 }, { "epoch": 0.5269655141935389, "grad_norm": 1.5103398612804915, "learning_rate": 5.401701982400179e-06, "loss": 0.5527, "step": 17886 }, { "epoch": 0.5269949766510025, "grad_norm": 1.6393395876293322, "learning_rate": 5.4011894029488e-06, "loss": 0.5362, "step": 17887 }, { "epoch": 0.5270244391084661, "grad_norm": 1.6329136913991074, "learning_rate": 5.400676819253763e-06, "loss": 0.5468, "step": 17888 }, { "epoch": 0.5270539015659296, "grad_norm": 1.5508102490044984, "learning_rate": 5.400164231320488e-06, "loss": 0.5816, "step": 17889 }, { "epoch": 0.5270833640233932, "grad_norm": 1.6347795298965542, "learning_rate": 5.399651639154398e-06, "loss": 0.5168, "step": 17890 }, { "epoch": 0.5271128264808568, "grad_norm": 1.872113904943718, "learning_rate": 5.399139042760912e-06, "loss": 0.5562, "step": 17891 }, { "epoch": 0.5271422889383204, "grad_norm": 1.6170210594248073, "learning_rate": 5.398626442145458e-06, "loss": 0.3831, "step": 17892 }, { "epoch": 0.5271717513957839, "grad_norm": 1.5664850558842434, "learning_rate": 5.398113837313455e-06, "loss": 0.4092, "step": 17893 }, { "epoch": 0.5272012138532475, "grad_norm": 1.3689166777535686, "learning_rate": 5.3976012282703225e-06, "loss": 0.3155, "step": 17894 }, { "epoch": 0.5272306763107111, "grad_norm": 1.5481595331080642, "learning_rate": 5.397088615021489e-06, "loss": 0.4252, "step": 17895 }, { "epoch": 0.5272601387681747, "grad_norm": 1.6563761273745417, "learning_rate": 5.39657599757237e-06, "loss": 0.4774, "step": 17896 }, { "epoch": 0.5272896012256383, "grad_norm": 1.540228747490806, "learning_rate": 5.396063375928391e-06, "loss": 0.4778, "step": 17897 }, { "epoch": 0.5273190636831018, "grad_norm": 1.391594946191908, "learning_rate": 5.395550750094978e-06, "loss": 0.3526, "step": 17898 }, { "epoch": 0.5273485261405654, "grad_norm": 1.7933064378698396, "learning_rate": 5.395038120077546e-06, "loss": 0.4461, "step": 17899 }, { "epoch": 0.527377988598029, "grad_norm": 1.5213350973507702, "learning_rate": 5.394525485881522e-06, "loss": 0.4565, "step": 17900 }, { "epoch": 0.5274074510554926, "grad_norm": 1.2539103287528277, "learning_rate": 5.394012847512329e-06, "loss": 0.3042, "step": 17901 }, { "epoch": 0.5274369135129561, "grad_norm": 1.4806855286164509, "learning_rate": 5.393500204975386e-06, "loss": 0.487, "step": 17902 }, { "epoch": 0.5274663759704197, "grad_norm": 1.4124599984944002, "learning_rate": 5.392987558276119e-06, "loss": 0.377, "step": 17903 }, { "epoch": 0.5274958384278833, "grad_norm": 1.882979717748013, "learning_rate": 5.39247490741995e-06, "loss": 0.6232, "step": 17904 }, { "epoch": 0.5275253008853469, "grad_norm": 1.5287406821277083, "learning_rate": 5.3919622524123e-06, "loss": 0.4919, "step": 17905 }, { "epoch": 0.5275547633428104, "grad_norm": 1.6484254725147738, "learning_rate": 5.391449593258593e-06, "loss": 0.5685, "step": 17906 }, { "epoch": 0.527584225800274, "grad_norm": 1.6512396959358635, "learning_rate": 5.390936929964252e-06, "loss": 0.291, "step": 17907 }, { "epoch": 0.5276136882577376, "grad_norm": 1.5336706275683902, "learning_rate": 5.390424262534699e-06, "loss": 0.287, "step": 17908 }, { "epoch": 0.5276431507152012, "grad_norm": 1.3867469865490432, "learning_rate": 5.3899115909753565e-06, "loss": 0.4035, "step": 17909 }, { "epoch": 0.5276726131726648, "grad_norm": 1.6237388490053466, "learning_rate": 5.3893989152916495e-06, "loss": 0.3635, "step": 17910 }, { "epoch": 0.5277020756301283, "grad_norm": 1.4040653866745205, "learning_rate": 5.388886235488997e-06, "loss": 0.4291, "step": 17911 }, { "epoch": 0.5277315380875919, "grad_norm": 1.5235986788067837, "learning_rate": 5.388373551572827e-06, "loss": 0.4871, "step": 17912 }, { "epoch": 0.5277610005450555, "grad_norm": 1.636746777141642, "learning_rate": 5.3878608635485586e-06, "loss": 0.495, "step": 17913 }, { "epoch": 0.5277904630025191, "grad_norm": 1.4055983472614615, "learning_rate": 5.387348171421617e-06, "loss": 0.3014, "step": 17914 }, { "epoch": 0.5278199254599826, "grad_norm": 1.5908178295956472, "learning_rate": 5.386835475197423e-06, "loss": 0.4881, "step": 17915 }, { "epoch": 0.5278493879174462, "grad_norm": 1.5640197946692145, "learning_rate": 5.386322774881403e-06, "loss": 0.443, "step": 17916 }, { "epoch": 0.5278788503749098, "grad_norm": 1.5761082610782564, "learning_rate": 5.3858100704789764e-06, "loss": 0.4311, "step": 17917 }, { "epoch": 0.5279083128323734, "grad_norm": 1.4560476404176164, "learning_rate": 5.38529736199557e-06, "loss": 0.4954, "step": 17918 }, { "epoch": 0.5279377752898369, "grad_norm": 1.5295176606109173, "learning_rate": 5.384784649436605e-06, "loss": 0.4992, "step": 17919 }, { "epoch": 0.5279672377473005, "grad_norm": 1.4306464408621076, "learning_rate": 5.384271932807506e-06, "loss": 0.3849, "step": 17920 }, { "epoch": 0.5279967002047641, "grad_norm": 1.424658664126831, "learning_rate": 5.383759212113694e-06, "loss": 0.4644, "step": 17921 }, { "epoch": 0.5280261626622277, "grad_norm": 1.5250468188603965, "learning_rate": 5.3832464873605965e-06, "loss": 0.5337, "step": 17922 }, { "epoch": 0.5280556251196913, "grad_norm": 1.8474843741479907, "learning_rate": 5.3827337585536334e-06, "loss": 0.6024, "step": 17923 }, { "epoch": 0.5280850875771548, "grad_norm": 1.6063644392670746, "learning_rate": 5.382221025698227e-06, "loss": 0.5448, "step": 17924 }, { "epoch": 0.5281145500346184, "grad_norm": 1.5301876344482488, "learning_rate": 5.381708288799806e-06, "loss": 0.3763, "step": 17925 }, { "epoch": 0.528144012492082, "grad_norm": 1.4714770339010708, "learning_rate": 5.381195547863789e-06, "loss": 0.4809, "step": 17926 }, { "epoch": 0.5281734749495456, "grad_norm": 1.4217333476304495, "learning_rate": 5.380682802895603e-06, "loss": 0.3119, "step": 17927 }, { "epoch": 0.5282029374070091, "grad_norm": 1.5888528155204351, "learning_rate": 5.38017005390067e-06, "loss": 0.5275, "step": 17928 }, { "epoch": 0.5282323998644727, "grad_norm": 1.5077612585882485, "learning_rate": 5.379657300884414e-06, "loss": 0.4434, "step": 17929 }, { "epoch": 0.5282618623219363, "grad_norm": 1.5044063454929641, "learning_rate": 5.3791445438522585e-06, "loss": 0.3971, "step": 17930 }, { "epoch": 0.5282913247793999, "grad_norm": 1.5538808002736535, "learning_rate": 5.378631782809629e-06, "loss": 0.5239, "step": 17931 }, { "epoch": 0.5283207872368634, "grad_norm": 1.5469780625714615, "learning_rate": 5.378119017761946e-06, "loss": 0.3473, "step": 17932 }, { "epoch": 0.528350249694327, "grad_norm": 1.3421605439414437, "learning_rate": 5.377606248714636e-06, "loss": 0.3727, "step": 17933 }, { "epoch": 0.5283797121517906, "grad_norm": 1.4892663967317603, "learning_rate": 5.377093475673122e-06, "loss": 0.3747, "step": 17934 }, { "epoch": 0.5284091746092542, "grad_norm": 1.7086219826052118, "learning_rate": 5.376580698642828e-06, "loss": 0.5512, "step": 17935 }, { "epoch": 0.5284386370667178, "grad_norm": 1.4665862360619213, "learning_rate": 5.376067917629178e-06, "loss": 0.3564, "step": 17936 }, { "epoch": 0.5284680995241813, "grad_norm": 1.3841329515706913, "learning_rate": 5.375555132637597e-06, "loss": 0.4287, "step": 17937 }, { "epoch": 0.5284975619816449, "grad_norm": 1.455032080909074, "learning_rate": 5.375042343673509e-06, "loss": 0.5195, "step": 17938 }, { "epoch": 0.5285270244391085, "grad_norm": 1.649425528247817, "learning_rate": 5.374529550742335e-06, "loss": 0.5229, "step": 17939 }, { "epoch": 0.5285564868965721, "grad_norm": 1.7136061698196332, "learning_rate": 5.374016753849503e-06, "loss": 0.4309, "step": 17940 }, { "epoch": 0.5285859493540356, "grad_norm": 1.515000982249023, "learning_rate": 5.373503953000436e-06, "loss": 0.5291, "step": 17941 }, { "epoch": 0.5286154118114992, "grad_norm": 1.4635535521241303, "learning_rate": 5.372991148200557e-06, "loss": 0.4626, "step": 17942 }, { "epoch": 0.5286448742689628, "grad_norm": 1.3384222822207696, "learning_rate": 5.372478339455293e-06, "loss": 0.4658, "step": 17943 }, { "epoch": 0.5286743367264264, "grad_norm": 1.4350126176623152, "learning_rate": 5.371965526770066e-06, "loss": 0.3999, "step": 17944 }, { "epoch": 0.5287037991838899, "grad_norm": 1.3572784966683111, "learning_rate": 5.371452710150299e-06, "loss": 0.3188, "step": 17945 }, { "epoch": 0.5287332616413535, "grad_norm": 1.524683794158053, "learning_rate": 5.3709398896014195e-06, "loss": 0.4503, "step": 17946 }, { "epoch": 0.5287627240988171, "grad_norm": 1.5442252462133783, "learning_rate": 5.370427065128853e-06, "loss": 0.4716, "step": 17947 }, { "epoch": 0.5287921865562807, "grad_norm": 1.6227077580382308, "learning_rate": 5.369914236738019e-06, "loss": 0.3178, "step": 17948 }, { "epoch": 0.5288216490137443, "grad_norm": 1.5392576481444606, "learning_rate": 5.369401404434347e-06, "loss": 0.4314, "step": 17949 }, { "epoch": 0.5288511114712078, "grad_norm": 1.4319967776348306, "learning_rate": 5.368888568223258e-06, "loss": 0.5026, "step": 17950 }, { "epoch": 0.5288805739286714, "grad_norm": 1.549449065762242, "learning_rate": 5.368375728110179e-06, "loss": 0.4761, "step": 17951 }, { "epoch": 0.528910036386135, "grad_norm": 1.543140090766462, "learning_rate": 5.367862884100534e-06, "loss": 0.4265, "step": 17952 }, { "epoch": 0.5289394988435986, "grad_norm": 1.5281152788658627, "learning_rate": 5.367350036199747e-06, "loss": 0.4397, "step": 17953 }, { "epoch": 0.5289689613010621, "grad_norm": 1.443552014178098, "learning_rate": 5.366837184413244e-06, "loss": 0.4121, "step": 17954 }, { "epoch": 0.5289984237585257, "grad_norm": 1.6514522829693556, "learning_rate": 5.3663243287464486e-06, "loss": 0.5048, "step": 17955 }, { "epoch": 0.5290278862159893, "grad_norm": 1.5595864864226041, "learning_rate": 5.365811469204786e-06, "loss": 0.4659, "step": 17956 }, { "epoch": 0.5290573486734529, "grad_norm": 1.5785408582179876, "learning_rate": 5.365298605793681e-06, "loss": 0.5651, "step": 17957 }, { "epoch": 0.5290868111309164, "grad_norm": 1.5410536458522037, "learning_rate": 5.364785738518558e-06, "loss": 0.4808, "step": 17958 }, { "epoch": 0.52911627358838, "grad_norm": 1.3986660994244131, "learning_rate": 5.364272867384846e-06, "loss": 0.3061, "step": 17959 }, { "epoch": 0.5291457360458436, "grad_norm": 1.648755634214946, "learning_rate": 5.363759992397962e-06, "loss": 0.4857, "step": 17960 }, { "epoch": 0.5291751985033072, "grad_norm": 1.4311458917493243, "learning_rate": 5.363247113563338e-06, "loss": 0.4617, "step": 17961 }, { "epoch": 0.5292046609607708, "grad_norm": 1.3151976060124198, "learning_rate": 5.362734230886397e-06, "loss": 0.3426, "step": 17962 }, { "epoch": 0.5292341234182343, "grad_norm": 1.5873489355789148, "learning_rate": 5.362221344372564e-06, "loss": 0.5063, "step": 17963 }, { "epoch": 0.5292635858756979, "grad_norm": 1.7251500673816498, "learning_rate": 5.361708454027262e-06, "loss": 0.6136, "step": 17964 }, { "epoch": 0.5292930483331615, "grad_norm": 1.4533024609222538, "learning_rate": 5.361195559855921e-06, "loss": 0.3784, "step": 17965 }, { "epoch": 0.5293225107906251, "grad_norm": 1.3507470915094308, "learning_rate": 5.360682661863961e-06, "loss": 0.4274, "step": 17966 }, { "epoch": 0.5293519732480886, "grad_norm": 1.6210043592099856, "learning_rate": 5.36016976005681e-06, "loss": 0.3367, "step": 17967 }, { "epoch": 0.5293814357055522, "grad_norm": 1.5139885692577126, "learning_rate": 5.3596568544398965e-06, "loss": 0.473, "step": 17968 }, { "epoch": 0.5294108981630158, "grad_norm": 1.2163734747879302, "learning_rate": 5.359143945018639e-06, "loss": 0.3493, "step": 17969 }, { "epoch": 0.5294403606204794, "grad_norm": 1.460589936128876, "learning_rate": 5.358631031798466e-06, "loss": 0.4748, "step": 17970 }, { "epoch": 0.5294698230779429, "grad_norm": 1.4140170329591952, "learning_rate": 5.358118114784805e-06, "loss": 0.4298, "step": 17971 }, { "epoch": 0.5294992855354065, "grad_norm": 1.4962371361170865, "learning_rate": 5.357605193983077e-06, "loss": 0.4348, "step": 17972 }, { "epoch": 0.5295287479928701, "grad_norm": 1.656826565356585, "learning_rate": 5.357092269398713e-06, "loss": 0.4646, "step": 17973 }, { "epoch": 0.5295582104503337, "grad_norm": 1.8221895737795626, "learning_rate": 5.356579341037136e-06, "loss": 0.5141, "step": 17974 }, { "epoch": 0.5295876729077973, "grad_norm": 1.6304205168314836, "learning_rate": 5.356066408903771e-06, "loss": 0.5588, "step": 17975 }, { "epoch": 0.5296171353652608, "grad_norm": 1.431014225979152, "learning_rate": 5.355553473004043e-06, "loss": 0.4733, "step": 17976 }, { "epoch": 0.5296465978227244, "grad_norm": 1.2947619372895216, "learning_rate": 5.355040533343379e-06, "loss": 0.3847, "step": 17977 }, { "epoch": 0.529676060280188, "grad_norm": 1.6204916874107609, "learning_rate": 5.354527589927206e-06, "loss": 0.4957, "step": 17978 }, { "epoch": 0.5297055227376516, "grad_norm": 1.4359894225202354, "learning_rate": 5.354014642760945e-06, "loss": 0.4103, "step": 17979 }, { "epoch": 0.5297349851951151, "grad_norm": 1.305063263728362, "learning_rate": 5.353501691850027e-06, "loss": 0.3255, "step": 17980 }, { "epoch": 0.5297644476525787, "grad_norm": 1.501985812398475, "learning_rate": 5.352988737199876e-06, "loss": 0.4419, "step": 17981 }, { "epoch": 0.5297939101100423, "grad_norm": 1.4381255042647296, "learning_rate": 5.352475778815916e-06, "loss": 0.4582, "step": 17982 }, { "epoch": 0.5298233725675059, "grad_norm": 1.556215317784978, "learning_rate": 5.351962816703579e-06, "loss": 0.3731, "step": 17983 }, { "epoch": 0.5298528350249694, "grad_norm": 1.4843657938407202, "learning_rate": 5.351449850868282e-06, "loss": 0.5543, "step": 17984 }, { "epoch": 0.529882297482433, "grad_norm": 1.445673579397913, "learning_rate": 5.350936881315457e-06, "loss": 0.4439, "step": 17985 }, { "epoch": 0.5299117599398966, "grad_norm": 1.5441742449569027, "learning_rate": 5.350423908050529e-06, "loss": 0.5016, "step": 17986 }, { "epoch": 0.5299412223973602, "grad_norm": 1.5475897806968755, "learning_rate": 5.349910931078923e-06, "loss": 0.3988, "step": 17987 }, { "epoch": 0.5299706848548238, "grad_norm": 1.6042867041492674, "learning_rate": 5.349397950406066e-06, "loss": 0.6108, "step": 17988 }, { "epoch": 0.5300001473122873, "grad_norm": 1.5659658605285816, "learning_rate": 5.348884966037385e-06, "loss": 0.3852, "step": 17989 }, { "epoch": 0.5300296097697509, "grad_norm": 1.409157215035703, "learning_rate": 5.348371977978305e-06, "loss": 0.5538, "step": 17990 }, { "epoch": 0.5300590722272145, "grad_norm": 1.5734899347240165, "learning_rate": 5.347858986234251e-06, "loss": 0.4399, "step": 17991 }, { "epoch": 0.5300885346846781, "grad_norm": 1.5485811729298171, "learning_rate": 5.347345990810652e-06, "loss": 0.4255, "step": 17992 }, { "epoch": 0.5301179971421416, "grad_norm": 1.4303476199884804, "learning_rate": 5.3468329917129325e-06, "loss": 0.3383, "step": 17993 }, { "epoch": 0.5301474595996052, "grad_norm": 1.3829889949331415, "learning_rate": 5.346319988946519e-06, "loss": 0.3775, "step": 17994 }, { "epoch": 0.5301769220570688, "grad_norm": 1.4695693515915107, "learning_rate": 5.345806982516839e-06, "loss": 0.4426, "step": 17995 }, { "epoch": 0.5302063845145324, "grad_norm": 1.4827425259676343, "learning_rate": 5.345293972429317e-06, "loss": 0.5244, "step": 17996 }, { "epoch": 0.5302358469719959, "grad_norm": 1.3908556754731618, "learning_rate": 5.3447809586893804e-06, "loss": 0.4366, "step": 17997 }, { "epoch": 0.5302653094294595, "grad_norm": 1.3627114066970845, "learning_rate": 5.344267941302459e-06, "loss": 0.3894, "step": 17998 }, { "epoch": 0.5302947718869231, "grad_norm": 1.5223633644308456, "learning_rate": 5.343754920273972e-06, "loss": 0.3592, "step": 17999 }, { "epoch": 0.5303242343443867, "grad_norm": 1.5046107204792878, "learning_rate": 5.343241895609352e-06, "loss": 0.2625, "step": 18000 }, { "epoch": 0.5303536968018503, "grad_norm": 1.6466474778642788, "learning_rate": 5.342728867314024e-06, "loss": 0.5046, "step": 18001 }, { "epoch": 0.5303831592593138, "grad_norm": 1.636280592210488, "learning_rate": 5.342215835393415e-06, "loss": 0.5749, "step": 18002 }, { "epoch": 0.5304126217167774, "grad_norm": 1.690449709197618, "learning_rate": 5.34170279985295e-06, "loss": 0.4555, "step": 18003 }, { "epoch": 0.530442084174241, "grad_norm": 1.6680028998609704, "learning_rate": 5.341189760698059e-06, "loss": 0.557, "step": 18004 }, { "epoch": 0.5304715466317046, "grad_norm": 1.3862678201918206, "learning_rate": 5.340676717934164e-06, "loss": 0.3943, "step": 18005 }, { "epoch": 0.5305010090891681, "grad_norm": 1.4862081645794236, "learning_rate": 5.340163671566696e-06, "loss": 0.3528, "step": 18006 }, { "epoch": 0.5305304715466317, "grad_norm": 1.5806714378359752, "learning_rate": 5.33965062160108e-06, "loss": 0.4841, "step": 18007 }, { "epoch": 0.5305599340040953, "grad_norm": 1.4004729427009737, "learning_rate": 5.339137568042744e-06, "loss": 0.4321, "step": 18008 }, { "epoch": 0.5305893964615589, "grad_norm": 1.5520570346879468, "learning_rate": 5.338624510897113e-06, "loss": 0.4852, "step": 18009 }, { "epoch": 0.5306188589190224, "grad_norm": 1.5534290098720418, "learning_rate": 5.338111450169616e-06, "loss": 0.5303, "step": 18010 }, { "epoch": 0.530648321376486, "grad_norm": 1.3215454801961997, "learning_rate": 5.337598385865679e-06, "loss": 0.3241, "step": 18011 }, { "epoch": 0.5306777838339496, "grad_norm": 1.6662283370790159, "learning_rate": 5.337085317990728e-06, "loss": 0.4746, "step": 18012 }, { "epoch": 0.5307072462914132, "grad_norm": 1.7023399469419709, "learning_rate": 5.336572246550194e-06, "loss": 0.559, "step": 18013 }, { "epoch": 0.5307367087488768, "grad_norm": 1.3508846530791947, "learning_rate": 5.336059171549498e-06, "loss": 0.349, "step": 18014 }, { "epoch": 0.5307661712063403, "grad_norm": 1.5169942934066778, "learning_rate": 5.335546092994072e-06, "loss": 0.4395, "step": 18015 }, { "epoch": 0.5307956336638039, "grad_norm": 1.322338536396624, "learning_rate": 5.335033010889342e-06, "loss": 0.3907, "step": 18016 }, { "epoch": 0.5308250961212675, "grad_norm": 1.580747916483888, "learning_rate": 5.334519925240734e-06, "loss": 0.5055, "step": 18017 }, { "epoch": 0.5308545585787311, "grad_norm": 1.4597639086255876, "learning_rate": 5.334006836053678e-06, "loss": 0.3929, "step": 18018 }, { "epoch": 0.5308840210361946, "grad_norm": 1.6440169325494245, "learning_rate": 5.3334937433335985e-06, "loss": 0.417, "step": 18019 }, { "epoch": 0.5309134834936582, "grad_norm": 1.5724791910678413, "learning_rate": 5.332980647085922e-06, "loss": 0.4596, "step": 18020 }, { "epoch": 0.5309429459511218, "grad_norm": 1.3316798591336028, "learning_rate": 5.332467547316079e-06, "loss": 0.3812, "step": 18021 }, { "epoch": 0.5309724084085854, "grad_norm": 1.5256480357976774, "learning_rate": 5.3319544440294965e-06, "loss": 0.5077, "step": 18022 }, { "epoch": 0.5310018708660489, "grad_norm": 1.5545215529536651, "learning_rate": 5.331441337231601e-06, "loss": 0.394, "step": 18023 }, { "epoch": 0.5310313333235125, "grad_norm": 1.5356946635914628, "learning_rate": 5.330928226927819e-06, "loss": 0.4901, "step": 18024 }, { "epoch": 0.5310607957809761, "grad_norm": 1.634818249911152, "learning_rate": 5.33041511312358e-06, "loss": 0.291, "step": 18025 }, { "epoch": 0.5310902582384397, "grad_norm": 1.5988853228110698, "learning_rate": 5.329901995824311e-06, "loss": 0.4364, "step": 18026 }, { "epoch": 0.5311197206959033, "grad_norm": 1.334719502728265, "learning_rate": 5.329388875035437e-06, "loss": 0.4176, "step": 18027 }, { "epoch": 0.5311491831533668, "grad_norm": 1.5337158598589986, "learning_rate": 5.328875750762389e-06, "loss": 0.4651, "step": 18028 }, { "epoch": 0.5311786456108304, "grad_norm": 1.3193176317446373, "learning_rate": 5.328362623010595e-06, "loss": 0.3657, "step": 18029 }, { "epoch": 0.531208108068294, "grad_norm": 1.4231743850878873, "learning_rate": 5.327849491785479e-06, "loss": 0.4766, "step": 18030 }, { "epoch": 0.5312375705257576, "grad_norm": 1.4628054608069838, "learning_rate": 5.3273363570924726e-06, "loss": 0.4984, "step": 18031 }, { "epoch": 0.5312670329832211, "grad_norm": 1.5470239522415918, "learning_rate": 5.326823218937e-06, "loss": 0.4537, "step": 18032 }, { "epoch": 0.5312964954406847, "grad_norm": 1.85038047855074, "learning_rate": 5.326310077324493e-06, "loss": 0.4708, "step": 18033 }, { "epoch": 0.5313259578981483, "grad_norm": 1.4645209746465266, "learning_rate": 5.325796932260377e-06, "loss": 0.4366, "step": 18034 }, { "epoch": 0.5313554203556119, "grad_norm": 1.4347119387412892, "learning_rate": 5.325283783750081e-06, "loss": 0.3564, "step": 18035 }, { "epoch": 0.5313848828130754, "grad_norm": 1.3914318167821398, "learning_rate": 5.324770631799031e-06, "loss": 0.3479, "step": 18036 }, { "epoch": 0.531414345270539, "grad_norm": 1.681908193902607, "learning_rate": 5.324257476412658e-06, "loss": 0.4683, "step": 18037 }, { "epoch": 0.5314438077280026, "grad_norm": 1.6846523409874037, "learning_rate": 5.323744317596387e-06, "loss": 0.4499, "step": 18038 }, { "epoch": 0.5314732701854662, "grad_norm": 1.642402588092592, "learning_rate": 5.323231155355646e-06, "loss": 0.4664, "step": 18039 }, { "epoch": 0.5315027326429298, "grad_norm": 1.4771782093125494, "learning_rate": 5.322717989695866e-06, "loss": 0.3996, "step": 18040 }, { "epoch": 0.5315321951003933, "grad_norm": 1.5867261435449218, "learning_rate": 5.322204820622473e-06, "loss": 0.4587, "step": 18041 }, { "epoch": 0.5315616575578569, "grad_norm": 1.4166571917044488, "learning_rate": 5.321691648140894e-06, "loss": 0.2765, "step": 18042 }, { "epoch": 0.5315911200153205, "grad_norm": 1.412651161645108, "learning_rate": 5.321178472256562e-06, "loss": 0.4124, "step": 18043 }, { "epoch": 0.5316205824727841, "grad_norm": 1.52739347504973, "learning_rate": 5.320665292974901e-06, "loss": 0.377, "step": 18044 }, { "epoch": 0.5316500449302476, "grad_norm": 1.5365115086072345, "learning_rate": 5.320152110301338e-06, "loss": 0.5175, "step": 18045 }, { "epoch": 0.5316795073877112, "grad_norm": 1.6288228150199995, "learning_rate": 5.319638924241306e-06, "loss": 0.4818, "step": 18046 }, { "epoch": 0.5317089698451748, "grad_norm": 1.6628094977245136, "learning_rate": 5.31912573480023e-06, "loss": 0.4923, "step": 18047 }, { "epoch": 0.5317384323026384, "grad_norm": 1.6758404408141092, "learning_rate": 5.31861254198354e-06, "loss": 0.5969, "step": 18048 }, { "epoch": 0.5317678947601019, "grad_norm": 1.5040771328107458, "learning_rate": 5.318099345796662e-06, "loss": 0.4908, "step": 18049 }, { "epoch": 0.5317973572175655, "grad_norm": 1.4555249161808557, "learning_rate": 5.317586146245027e-06, "loss": 0.4793, "step": 18050 }, { "epoch": 0.5318268196750291, "grad_norm": 1.715261233255198, "learning_rate": 5.317072943334063e-06, "loss": 0.4193, "step": 18051 }, { "epoch": 0.5318562821324927, "grad_norm": 1.6390169388801639, "learning_rate": 5.3165597370691975e-06, "loss": 0.5416, "step": 18052 }, { "epoch": 0.5318857445899563, "grad_norm": 1.610730560435053, "learning_rate": 5.316046527455861e-06, "loss": 0.5942, "step": 18053 }, { "epoch": 0.5319152070474198, "grad_norm": 1.4756822773143607, "learning_rate": 5.315533314499478e-06, "loss": 0.3929, "step": 18054 }, { "epoch": 0.5319446695048834, "grad_norm": 1.3552026813679992, "learning_rate": 5.3150200982054815e-06, "loss": 0.4173, "step": 18055 }, { "epoch": 0.531974131962347, "grad_norm": 1.5163515397702565, "learning_rate": 5.314506878579297e-06, "loss": 0.4034, "step": 18056 }, { "epoch": 0.5320035944198106, "grad_norm": 1.6406783996751457, "learning_rate": 5.313993655626356e-06, "loss": 0.428, "step": 18057 }, { "epoch": 0.5320330568772741, "grad_norm": 1.9166668468511803, "learning_rate": 5.313480429352086e-06, "loss": 0.6275, "step": 18058 }, { "epoch": 0.5320625193347377, "grad_norm": 1.462748769720987, "learning_rate": 5.3129671997619145e-06, "loss": 0.4617, "step": 18059 }, { "epoch": 0.5320919817922013, "grad_norm": 1.4035386810975143, "learning_rate": 5.312453966861272e-06, "loss": 0.482, "step": 18060 }, { "epoch": 0.5321214442496649, "grad_norm": 1.4785927675913046, "learning_rate": 5.311940730655587e-06, "loss": 0.3702, "step": 18061 }, { "epoch": 0.5321509067071284, "grad_norm": 1.5920809617365663, "learning_rate": 5.311427491150286e-06, "loss": 0.4152, "step": 18062 }, { "epoch": 0.532180369164592, "grad_norm": 1.383097016871067, "learning_rate": 5.310914248350802e-06, "loss": 0.3375, "step": 18063 }, { "epoch": 0.5322098316220556, "grad_norm": 1.6655136981875793, "learning_rate": 5.310401002262561e-06, "loss": 0.5174, "step": 18064 }, { "epoch": 0.5322392940795192, "grad_norm": 1.3334384377490576, "learning_rate": 5.309887752890993e-06, "loss": 0.3889, "step": 18065 }, { "epoch": 0.5322687565369828, "grad_norm": 1.521244222433184, "learning_rate": 5.309374500241526e-06, "loss": 0.372, "step": 18066 }, { "epoch": 0.5322982189944463, "grad_norm": 1.6090763394833703, "learning_rate": 5.308861244319592e-06, "loss": 0.4798, "step": 18067 }, { "epoch": 0.5323276814519099, "grad_norm": 1.5550273263145573, "learning_rate": 5.308347985130616e-06, "loss": 0.4995, "step": 18068 }, { "epoch": 0.5323571439093735, "grad_norm": 1.52522204665148, "learning_rate": 5.30783472268003e-06, "loss": 0.4377, "step": 18069 }, { "epoch": 0.5323866063668371, "grad_norm": 1.5153290176543455, "learning_rate": 5.307321456973261e-06, "loss": 0.4351, "step": 18070 }, { "epoch": 0.5324160688243006, "grad_norm": 1.6427202420554916, "learning_rate": 5.30680818801574e-06, "loss": 0.5504, "step": 18071 }, { "epoch": 0.5324455312817642, "grad_norm": 1.8777212923916948, "learning_rate": 5.306294915812895e-06, "loss": 0.4389, "step": 18072 }, { "epoch": 0.5324749937392278, "grad_norm": 1.495655354307217, "learning_rate": 5.305781640370156e-06, "loss": 0.5091, "step": 18073 }, { "epoch": 0.5325044561966914, "grad_norm": 1.70140030729812, "learning_rate": 5.305268361692953e-06, "loss": 0.5807, "step": 18074 }, { "epoch": 0.5325339186541549, "grad_norm": 1.5250767425623475, "learning_rate": 5.304755079786713e-06, "loss": 0.3084, "step": 18075 }, { "epoch": 0.5325633811116185, "grad_norm": 1.5294157906048207, "learning_rate": 5.304241794656866e-06, "loss": 0.4729, "step": 18076 }, { "epoch": 0.5325928435690821, "grad_norm": 1.5789076911162327, "learning_rate": 5.303728506308844e-06, "loss": 0.5016, "step": 18077 }, { "epoch": 0.5326223060265457, "grad_norm": 1.4782533845004213, "learning_rate": 5.303215214748074e-06, "loss": 0.3633, "step": 18078 }, { "epoch": 0.5326517684840093, "grad_norm": 1.43064095805886, "learning_rate": 5.302701919979985e-06, "loss": 0.451, "step": 18079 }, { "epoch": 0.5326812309414728, "grad_norm": 1.43120136211852, "learning_rate": 5.302188622010008e-06, "loss": 0.448, "step": 18080 }, { "epoch": 0.5327106933989364, "grad_norm": 1.349312540643558, "learning_rate": 5.301675320843571e-06, "loss": 0.4468, "step": 18081 }, { "epoch": 0.5327401558564, "grad_norm": 1.3621480083632804, "learning_rate": 5.301162016486104e-06, "loss": 0.3858, "step": 18082 }, { "epoch": 0.5327696183138636, "grad_norm": 1.5819339599391968, "learning_rate": 5.300648708943039e-06, "loss": 0.601, "step": 18083 }, { "epoch": 0.5327990807713271, "grad_norm": 1.456544873472432, "learning_rate": 5.300135398219803e-06, "loss": 0.4172, "step": 18084 }, { "epoch": 0.5328285432287907, "grad_norm": 1.6578425808596193, "learning_rate": 5.299622084321825e-06, "loss": 0.5399, "step": 18085 }, { "epoch": 0.5328580056862543, "grad_norm": 1.5903741382526009, "learning_rate": 5.299108767254538e-06, "loss": 0.541, "step": 18086 }, { "epoch": 0.5328874681437179, "grad_norm": 1.5302900212475345, "learning_rate": 5.298595447023367e-06, "loss": 0.3667, "step": 18087 }, { "epoch": 0.5329169306011814, "grad_norm": 1.5569643315759303, "learning_rate": 5.298082123633746e-06, "loss": 0.3851, "step": 18088 }, { "epoch": 0.532946393058645, "grad_norm": 1.324544852175048, "learning_rate": 5.297568797091103e-06, "loss": 0.3695, "step": 18089 }, { "epoch": 0.5329758555161086, "grad_norm": 1.44264653916769, "learning_rate": 5.297055467400867e-06, "loss": 0.4005, "step": 18090 }, { "epoch": 0.5330053179735722, "grad_norm": 1.362786468461663, "learning_rate": 5.296542134568469e-06, "loss": 0.4415, "step": 18091 }, { "epoch": 0.5330347804310358, "grad_norm": 1.418433351712876, "learning_rate": 5.29602879859934e-06, "loss": 0.3688, "step": 18092 }, { "epoch": 0.5330642428884993, "grad_norm": 1.5160565003851811, "learning_rate": 5.295515459498908e-06, "loss": 0.4661, "step": 18093 }, { "epoch": 0.5330937053459629, "grad_norm": 1.471490683776719, "learning_rate": 5.295002117272603e-06, "loss": 0.5008, "step": 18094 }, { "epoch": 0.5331231678034265, "grad_norm": 1.3844442767881948, "learning_rate": 5.294488771925855e-06, "loss": 0.3009, "step": 18095 }, { "epoch": 0.5331526302608901, "grad_norm": 1.613832917431859, "learning_rate": 5.293975423464095e-06, "loss": 0.5375, "step": 18096 }, { "epoch": 0.5331820927183536, "grad_norm": 1.6283803613070946, "learning_rate": 5.293462071892753e-06, "loss": 0.5762, "step": 18097 }, { "epoch": 0.5332115551758172, "grad_norm": 1.4313392040489226, "learning_rate": 5.29294871721726e-06, "loss": 0.442, "step": 18098 }, { "epoch": 0.5332410176332808, "grad_norm": 1.5420583040473181, "learning_rate": 5.292435359443042e-06, "loss": 0.4217, "step": 18099 }, { "epoch": 0.5332704800907444, "grad_norm": 1.45319860633278, "learning_rate": 5.291921998575533e-06, "loss": 0.3909, "step": 18100 }, { "epoch": 0.5332999425482079, "grad_norm": 1.610323248834543, "learning_rate": 5.291408634620163e-06, "loss": 0.3654, "step": 18101 }, { "epoch": 0.5333294050056715, "grad_norm": 1.6698290874440744, "learning_rate": 5.29089526758236e-06, "loss": 0.4666, "step": 18102 }, { "epoch": 0.5333588674631351, "grad_norm": 1.447849734776993, "learning_rate": 5.2903818974675555e-06, "loss": 0.4977, "step": 18103 }, { "epoch": 0.5333883299205987, "grad_norm": 1.5896816786517818, "learning_rate": 5.289868524281182e-06, "loss": 0.491, "step": 18104 }, { "epoch": 0.5334177923780623, "grad_norm": 1.7404184196396826, "learning_rate": 5.289355148028665e-06, "loss": 0.5259, "step": 18105 }, { "epoch": 0.5334472548355258, "grad_norm": 1.3653673958697385, "learning_rate": 5.288841768715438e-06, "loss": 0.4415, "step": 18106 }, { "epoch": 0.5334767172929894, "grad_norm": 1.5415835576561445, "learning_rate": 5.288328386346931e-06, "loss": 0.5377, "step": 18107 }, { "epoch": 0.533506179750453, "grad_norm": 1.8187152067584211, "learning_rate": 5.287815000928576e-06, "loss": 0.569, "step": 18108 }, { "epoch": 0.5335356422079166, "grad_norm": 1.633420947983204, "learning_rate": 5.287301612465798e-06, "loss": 0.4949, "step": 18109 }, { "epoch": 0.5335651046653801, "grad_norm": 1.7304113265375183, "learning_rate": 5.2867882209640344e-06, "loss": 0.5251, "step": 18110 }, { "epoch": 0.5335945671228437, "grad_norm": 1.6018501407226096, "learning_rate": 5.28627482642871e-06, "loss": 0.5081, "step": 18111 }, { "epoch": 0.5336240295803073, "grad_norm": 1.4201040545706758, "learning_rate": 5.285761428865258e-06, "loss": 0.4847, "step": 18112 }, { "epoch": 0.5336534920377709, "grad_norm": 1.4548699466418193, "learning_rate": 5.285248028279111e-06, "loss": 0.4087, "step": 18113 }, { "epoch": 0.5336829544952344, "grad_norm": 1.14001875816821, "learning_rate": 5.284734624675696e-06, "loss": 0.2716, "step": 18114 }, { "epoch": 0.533712416952698, "grad_norm": 1.7287063017493405, "learning_rate": 5.284221218060443e-06, "loss": 0.4076, "step": 18115 }, { "epoch": 0.5337418794101616, "grad_norm": 1.4510396488797659, "learning_rate": 5.283707808438787e-06, "loss": 0.4219, "step": 18116 }, { "epoch": 0.5337713418676252, "grad_norm": 1.5409555557810826, "learning_rate": 5.283194395816155e-06, "loss": 0.6112, "step": 18117 }, { "epoch": 0.5338008043250888, "grad_norm": 1.924028052059882, "learning_rate": 5.282680980197981e-06, "loss": 0.7287, "step": 18118 }, { "epoch": 0.5338302667825523, "grad_norm": 1.4845029505808125, "learning_rate": 5.282167561589692e-06, "loss": 0.4573, "step": 18119 }, { "epoch": 0.5338597292400159, "grad_norm": 1.5323107662951212, "learning_rate": 5.28165413999672e-06, "loss": 0.4532, "step": 18120 }, { "epoch": 0.5338891916974795, "grad_norm": 1.618946821390105, "learning_rate": 5.281140715424496e-06, "loss": 0.4372, "step": 18121 }, { "epoch": 0.5339186541549431, "grad_norm": 1.2779336643599573, "learning_rate": 5.280627287878454e-06, "loss": 0.3784, "step": 18122 }, { "epoch": 0.5339481166124066, "grad_norm": 1.4513229882843204, "learning_rate": 5.2801138573640194e-06, "loss": 0.4813, "step": 18123 }, { "epoch": 0.5339775790698702, "grad_norm": 1.3558137015849296, "learning_rate": 5.279600423886627e-06, "loss": 0.4339, "step": 18124 }, { "epoch": 0.5340070415273338, "grad_norm": 1.668636152659029, "learning_rate": 5.279086987451707e-06, "loss": 0.4904, "step": 18125 }, { "epoch": 0.5340365039847974, "grad_norm": 1.7534867218838575, "learning_rate": 5.278573548064688e-06, "loss": 0.459, "step": 18126 }, { "epoch": 0.5340659664422609, "grad_norm": 1.450555771639703, "learning_rate": 5.278060105731005e-06, "loss": 0.4736, "step": 18127 }, { "epoch": 0.5340954288997245, "grad_norm": 1.3476142621133207, "learning_rate": 5.277546660456087e-06, "loss": 0.4637, "step": 18128 }, { "epoch": 0.5341248913571881, "grad_norm": 1.3672537933601159, "learning_rate": 5.277033212245363e-06, "loss": 0.3189, "step": 18129 }, { "epoch": 0.5341543538146517, "grad_norm": 1.5705845171054338, "learning_rate": 5.276519761104267e-06, "loss": 0.5306, "step": 18130 }, { "epoch": 0.5341838162721153, "grad_norm": 1.400486536561974, "learning_rate": 5.276006307038229e-06, "loss": 0.3741, "step": 18131 }, { "epoch": 0.5342132787295788, "grad_norm": 1.3921648854517732, "learning_rate": 5.27549285005268e-06, "loss": 0.4512, "step": 18132 }, { "epoch": 0.5342427411870424, "grad_norm": 1.525624556265715, "learning_rate": 5.274979390153053e-06, "loss": 0.4539, "step": 18133 }, { "epoch": 0.534272203644506, "grad_norm": 1.558476204461338, "learning_rate": 5.274465927344779e-06, "loss": 0.3716, "step": 18134 }, { "epoch": 0.5343016661019696, "grad_norm": 1.7333015290560023, "learning_rate": 5.273952461633285e-06, "loss": 0.6145, "step": 18135 }, { "epoch": 0.5343311285594331, "grad_norm": 1.4522873620578025, "learning_rate": 5.273438993024006e-06, "loss": 0.4074, "step": 18136 }, { "epoch": 0.5343605910168967, "grad_norm": 1.647015427290849, "learning_rate": 5.272925521522374e-06, "loss": 0.5362, "step": 18137 }, { "epoch": 0.5343900534743603, "grad_norm": 1.5704020523439626, "learning_rate": 5.272412047133819e-06, "loss": 0.6389, "step": 18138 }, { "epoch": 0.5344195159318239, "grad_norm": 1.4363749655063982, "learning_rate": 5.27189856986377e-06, "loss": 0.3766, "step": 18139 }, { "epoch": 0.5344489783892874, "grad_norm": 1.6237956664710058, "learning_rate": 5.2713850897176635e-06, "loss": 0.5097, "step": 18140 }, { "epoch": 0.534478440846751, "grad_norm": 1.5984703828174458, "learning_rate": 5.270871606700927e-06, "loss": 0.3455, "step": 18141 }, { "epoch": 0.5345079033042146, "grad_norm": 1.5693025612398046, "learning_rate": 5.270358120818993e-06, "loss": 0.4579, "step": 18142 }, { "epoch": 0.5345373657616782, "grad_norm": 1.515252587576913, "learning_rate": 5.2698446320772935e-06, "loss": 0.4846, "step": 18143 }, { "epoch": 0.5345668282191418, "grad_norm": 1.485047642237804, "learning_rate": 5.269331140481261e-06, "loss": 0.4556, "step": 18144 }, { "epoch": 0.5345962906766053, "grad_norm": 1.3918123569358898, "learning_rate": 5.2688176460363234e-06, "loss": 0.3955, "step": 18145 }, { "epoch": 0.5346257531340689, "grad_norm": 1.4278139981757352, "learning_rate": 5.268304148747917e-06, "loss": 0.4503, "step": 18146 }, { "epoch": 0.5346552155915325, "grad_norm": 1.421933877076897, "learning_rate": 5.2677906486214695e-06, "loss": 0.5383, "step": 18147 }, { "epoch": 0.5346846780489961, "grad_norm": 1.5821937703171054, "learning_rate": 5.267277145662414e-06, "loss": 0.511, "step": 18148 }, { "epoch": 0.5347141405064596, "grad_norm": 1.4172011637470645, "learning_rate": 5.266763639876184e-06, "loss": 0.4656, "step": 18149 }, { "epoch": 0.5347436029639232, "grad_norm": 1.5959531771309177, "learning_rate": 5.266250131268208e-06, "loss": 0.3912, "step": 18150 }, { "epoch": 0.5347730654213868, "grad_norm": 1.6297911052820462, "learning_rate": 5.265736619843919e-06, "loss": 0.5104, "step": 18151 }, { "epoch": 0.5348025278788504, "grad_norm": 1.6198018483274188, "learning_rate": 5.26522310560875e-06, "loss": 0.4811, "step": 18152 }, { "epoch": 0.5348319903363139, "grad_norm": 1.3130643815693606, "learning_rate": 5.264709588568132e-06, "loss": 0.3636, "step": 18153 }, { "epoch": 0.5348614527937775, "grad_norm": 1.3860093469332981, "learning_rate": 5.2641960687274955e-06, "loss": 0.372, "step": 18154 }, { "epoch": 0.5348909152512411, "grad_norm": 1.5996913654733171, "learning_rate": 5.263682546092275e-06, "loss": 0.3792, "step": 18155 }, { "epoch": 0.5349203777087047, "grad_norm": 1.6395744894086568, "learning_rate": 5.263169020667899e-06, "loss": 0.5165, "step": 18156 }, { "epoch": 0.5349498401661683, "grad_norm": 1.56047612106724, "learning_rate": 5.262655492459802e-06, "loss": 0.4216, "step": 18157 }, { "epoch": 0.5349793026236318, "grad_norm": 1.6377303086779031, "learning_rate": 5.262141961473416e-06, "loss": 0.6234, "step": 18158 }, { "epoch": 0.5350087650810954, "grad_norm": 1.533004426607559, "learning_rate": 5.261628427714171e-06, "loss": 0.339, "step": 18159 }, { "epoch": 0.535038227538559, "grad_norm": 1.5498110568146881, "learning_rate": 5.261114891187501e-06, "loss": 0.504, "step": 18160 }, { "epoch": 0.5350676899960226, "grad_norm": 1.8425728695001942, "learning_rate": 5.260601351898839e-06, "loss": 0.5013, "step": 18161 }, { "epoch": 0.5350971524534861, "grad_norm": 1.5672922179424869, "learning_rate": 5.260087809853613e-06, "loss": 0.3863, "step": 18162 }, { "epoch": 0.5351266149109497, "grad_norm": 1.3583279273083824, "learning_rate": 5.2595742650572575e-06, "loss": 0.3975, "step": 18163 }, { "epoch": 0.5351560773684133, "grad_norm": 1.6883222086085803, "learning_rate": 5.259060717515204e-06, "loss": 0.5957, "step": 18164 }, { "epoch": 0.5351855398258769, "grad_norm": 1.5494116319528484, "learning_rate": 5.258547167232887e-06, "loss": 0.4615, "step": 18165 }, { "epoch": 0.5352150022833404, "grad_norm": 1.5294484643749748, "learning_rate": 5.258033614215736e-06, "loss": 0.4201, "step": 18166 }, { "epoch": 0.535244464740804, "grad_norm": 1.4275115432397385, "learning_rate": 5.257520058469184e-06, "loss": 0.4409, "step": 18167 }, { "epoch": 0.5352739271982676, "grad_norm": 1.478225087857933, "learning_rate": 5.257006499998664e-06, "loss": 0.2946, "step": 18168 }, { "epoch": 0.5353033896557312, "grad_norm": 1.5926267074387421, "learning_rate": 5.256492938809607e-06, "loss": 0.5247, "step": 18169 }, { "epoch": 0.5353328521131948, "grad_norm": 1.3297756102302727, "learning_rate": 5.255979374907445e-06, "loss": 0.4216, "step": 18170 }, { "epoch": 0.5353623145706583, "grad_norm": 1.5007478620832757, "learning_rate": 5.255465808297613e-06, "loss": 0.4401, "step": 18171 }, { "epoch": 0.5353917770281219, "grad_norm": 1.4249420579606116, "learning_rate": 5.25495223898554e-06, "loss": 0.4344, "step": 18172 }, { "epoch": 0.5354212394855855, "grad_norm": 1.6836977451932778, "learning_rate": 5.2544386669766615e-06, "loss": 0.4335, "step": 18173 }, { "epoch": 0.5354507019430491, "grad_norm": 1.546440216192367, "learning_rate": 5.253925092276407e-06, "loss": 0.575, "step": 18174 }, { "epoch": 0.5354801644005126, "grad_norm": 1.3322904051308264, "learning_rate": 5.25341151489021e-06, "loss": 0.293, "step": 18175 }, { "epoch": 0.5355096268579762, "grad_norm": 1.28389197203291, "learning_rate": 5.2528979348235045e-06, "loss": 0.4644, "step": 18176 }, { "epoch": 0.5355390893154398, "grad_norm": 1.4787390260066862, "learning_rate": 5.252384352081721e-06, "loss": 0.4704, "step": 18177 }, { "epoch": 0.5355685517729034, "grad_norm": 1.4785240369983166, "learning_rate": 5.2518707666702924e-06, "loss": 0.4857, "step": 18178 }, { "epoch": 0.5355980142303669, "grad_norm": 1.4740403235679067, "learning_rate": 5.2513571785946516e-06, "loss": 0.4132, "step": 18179 }, { "epoch": 0.5356274766878305, "grad_norm": 1.5183089795797267, "learning_rate": 5.250843587860233e-06, "loss": 0.479, "step": 18180 }, { "epoch": 0.5356569391452941, "grad_norm": 1.701241393825173, "learning_rate": 5.250329994472464e-06, "loss": 0.4675, "step": 18181 }, { "epoch": 0.5356864016027577, "grad_norm": 1.4682272604107551, "learning_rate": 5.249816398436783e-06, "loss": 0.5178, "step": 18182 }, { "epoch": 0.5357158640602213, "grad_norm": 1.666685592588809, "learning_rate": 5.249302799758621e-06, "loss": 0.5972, "step": 18183 }, { "epoch": 0.5357453265176848, "grad_norm": 1.356865310024412, "learning_rate": 5.248789198443408e-06, "loss": 0.3932, "step": 18184 }, { "epoch": 0.5357747889751484, "grad_norm": 1.529366066709526, "learning_rate": 5.248275594496579e-06, "loss": 0.5481, "step": 18185 }, { "epoch": 0.535804251432612, "grad_norm": 1.6650011295902465, "learning_rate": 5.247761987923568e-06, "loss": 0.5842, "step": 18186 }, { "epoch": 0.5358337138900756, "grad_norm": 1.3355644122031216, "learning_rate": 5.247248378729805e-06, "loss": 0.3148, "step": 18187 }, { "epoch": 0.5358631763475391, "grad_norm": 1.5561186965233655, "learning_rate": 5.246734766920723e-06, "loss": 0.3819, "step": 18188 }, { "epoch": 0.5358926388050027, "grad_norm": 1.6643965338506372, "learning_rate": 5.246221152501758e-06, "loss": 0.5355, "step": 18189 }, { "epoch": 0.5359221012624663, "grad_norm": 1.4748765006148834, "learning_rate": 5.2457075354783385e-06, "loss": 0.3485, "step": 18190 }, { "epoch": 0.5359515637199299, "grad_norm": 1.43908439096871, "learning_rate": 5.2451939158559e-06, "loss": 0.4984, "step": 18191 }, { "epoch": 0.5359810261773934, "grad_norm": 1.5851627505782038, "learning_rate": 5.244680293639878e-06, "loss": 0.427, "step": 18192 }, { "epoch": 0.536010488634857, "grad_norm": 1.389986856544616, "learning_rate": 5.244166668835698e-06, "loss": 0.525, "step": 18193 }, { "epoch": 0.5360399510923206, "grad_norm": 1.4407826546351485, "learning_rate": 5.243653041448799e-06, "loss": 0.4351, "step": 18194 }, { "epoch": 0.5360694135497842, "grad_norm": 1.4138938983163067, "learning_rate": 5.243139411484614e-06, "loss": 0.4857, "step": 18195 }, { "epoch": 0.5360988760072478, "grad_norm": 1.739343037628296, "learning_rate": 5.242625778948572e-06, "loss": 0.53, "step": 18196 }, { "epoch": 0.5361283384647113, "grad_norm": 1.6455239507699777, "learning_rate": 5.242112143846111e-06, "loss": 0.5463, "step": 18197 }, { "epoch": 0.5361578009221749, "grad_norm": 1.274575523629708, "learning_rate": 5.24159850618266e-06, "loss": 0.3174, "step": 18198 }, { "epoch": 0.5361872633796385, "grad_norm": 1.5015822069203852, "learning_rate": 5.2410848659636534e-06, "loss": 0.5022, "step": 18199 }, { "epoch": 0.5362167258371021, "grad_norm": 1.4394706287096135, "learning_rate": 5.240571223194525e-06, "loss": 0.4782, "step": 18200 }, { "epoch": 0.5362461882945656, "grad_norm": 1.7203548618126798, "learning_rate": 5.240057577880708e-06, "loss": 0.4472, "step": 18201 }, { "epoch": 0.5362756507520292, "grad_norm": 1.4958580588558041, "learning_rate": 5.239543930027635e-06, "loss": 0.4288, "step": 18202 }, { "epoch": 0.5363051132094928, "grad_norm": 1.6579409227807689, "learning_rate": 5.2390302796407385e-06, "loss": 0.3722, "step": 18203 }, { "epoch": 0.5363345756669564, "grad_norm": 1.6596911206601024, "learning_rate": 5.238516626725454e-06, "loss": 0.556, "step": 18204 }, { "epoch": 0.5363640381244199, "grad_norm": 1.731620734078625, "learning_rate": 5.2380029712872115e-06, "loss": 0.6487, "step": 18205 }, { "epoch": 0.5363935005818835, "grad_norm": 1.579285962196419, "learning_rate": 5.2374893133314465e-06, "loss": 0.4789, "step": 18206 }, { "epoch": 0.5364229630393471, "grad_norm": 1.6836233636462377, "learning_rate": 5.236975652863593e-06, "loss": 0.612, "step": 18207 }, { "epoch": 0.5364524254968107, "grad_norm": 1.5506452870947312, "learning_rate": 5.236461989889082e-06, "loss": 0.5554, "step": 18208 }, { "epoch": 0.5364818879542743, "grad_norm": 1.7116096851076779, "learning_rate": 5.235948324413348e-06, "loss": 0.5981, "step": 18209 }, { "epoch": 0.5365113504117378, "grad_norm": 1.5573879411324842, "learning_rate": 5.235434656441825e-06, "loss": 0.4086, "step": 18210 }, { "epoch": 0.5365408128692014, "grad_norm": 1.916943678758084, "learning_rate": 5.234920985979947e-06, "loss": 0.2575, "step": 18211 }, { "epoch": 0.536570275326665, "grad_norm": 1.6529881288476305, "learning_rate": 5.234407313033146e-06, "loss": 0.5189, "step": 18212 }, { "epoch": 0.5365997377841286, "grad_norm": 1.5501566607624273, "learning_rate": 5.233893637606856e-06, "loss": 0.4475, "step": 18213 }, { "epoch": 0.5366292002415921, "grad_norm": 1.6289088143543036, "learning_rate": 5.23337995970651e-06, "loss": 0.5143, "step": 18214 }, { "epoch": 0.5366586626990557, "grad_norm": 1.336021856090704, "learning_rate": 5.232866279337541e-06, "loss": 0.3449, "step": 18215 }, { "epoch": 0.5366881251565193, "grad_norm": 1.5757064404658958, "learning_rate": 5.232352596505384e-06, "loss": 0.4994, "step": 18216 }, { "epoch": 0.5367175876139829, "grad_norm": 1.585458605361112, "learning_rate": 5.231838911215473e-06, "loss": 0.4987, "step": 18217 }, { "epoch": 0.5367470500714464, "grad_norm": 1.3642940948144913, "learning_rate": 5.231325223473239e-06, "loss": 0.3998, "step": 18218 }, { "epoch": 0.53677651252891, "grad_norm": 1.5507545974272168, "learning_rate": 5.230811533284119e-06, "loss": 0.5056, "step": 18219 }, { "epoch": 0.5368059749863736, "grad_norm": 1.6114557373350222, "learning_rate": 5.230297840653543e-06, "loss": 0.4598, "step": 18220 }, { "epoch": 0.5368354374438372, "grad_norm": 1.6511111496047257, "learning_rate": 5.229784145586948e-06, "loss": 0.4421, "step": 18221 }, { "epoch": 0.5368648999013008, "grad_norm": 1.7194226101048489, "learning_rate": 5.229270448089767e-06, "loss": 0.622, "step": 18222 }, { "epoch": 0.5368943623587643, "grad_norm": 1.5263760415257894, "learning_rate": 5.228756748167431e-06, "loss": 0.5228, "step": 18223 }, { "epoch": 0.5369238248162279, "grad_norm": 1.4468940778132224, "learning_rate": 5.228243045825377e-06, "loss": 0.444, "step": 18224 }, { "epoch": 0.5369532872736915, "grad_norm": 1.4089546172969174, "learning_rate": 5.227729341069039e-06, "loss": 0.3584, "step": 18225 }, { "epoch": 0.5369827497311551, "grad_norm": 1.7851652931257556, "learning_rate": 5.227215633903847e-06, "loss": 0.5621, "step": 18226 }, { "epoch": 0.5370122121886186, "grad_norm": 1.572738281828206, "learning_rate": 5.226701924335239e-06, "loss": 0.4649, "step": 18227 }, { "epoch": 0.5370416746460822, "grad_norm": 1.4705264012918282, "learning_rate": 5.226188212368647e-06, "loss": 0.5149, "step": 18228 }, { "epoch": 0.5370711371035458, "grad_norm": 1.3411986970185592, "learning_rate": 5.225674498009504e-06, "loss": 0.377, "step": 18229 }, { "epoch": 0.5371005995610094, "grad_norm": 1.5919860770055343, "learning_rate": 5.2251607812632455e-06, "loss": 0.3741, "step": 18230 }, { "epoch": 0.5371300620184729, "grad_norm": 1.4250720075260785, "learning_rate": 5.224647062135305e-06, "loss": 0.5113, "step": 18231 }, { "epoch": 0.5371595244759365, "grad_norm": 1.5653248995188518, "learning_rate": 5.2241333406311165e-06, "loss": 0.6119, "step": 18232 }, { "epoch": 0.5371889869334001, "grad_norm": 1.4908206609646546, "learning_rate": 5.223619616756113e-06, "loss": 0.5123, "step": 18233 }, { "epoch": 0.5372184493908637, "grad_norm": 1.397156359044631, "learning_rate": 5.22310589051573e-06, "loss": 0.5027, "step": 18234 }, { "epoch": 0.5372479118483273, "grad_norm": 1.4018821550844425, "learning_rate": 5.222592161915401e-06, "loss": 0.4697, "step": 18235 }, { "epoch": 0.5372773743057908, "grad_norm": 1.4212442173555657, "learning_rate": 5.2220784309605585e-06, "loss": 0.3231, "step": 18236 }, { "epoch": 0.5373068367632544, "grad_norm": 1.4519634181825354, "learning_rate": 5.221564697656641e-06, "loss": 0.3812, "step": 18237 }, { "epoch": 0.537336299220718, "grad_norm": 1.3386824887071518, "learning_rate": 5.221050962009077e-06, "loss": 0.4174, "step": 18238 }, { "epoch": 0.5373657616781816, "grad_norm": 1.4806170904345346, "learning_rate": 5.220537224023304e-06, "loss": 0.4238, "step": 18239 }, { "epoch": 0.5373952241356451, "grad_norm": 1.5867932177729747, "learning_rate": 5.220023483704756e-06, "loss": 0.6109, "step": 18240 }, { "epoch": 0.5374246865931087, "grad_norm": 1.5148333607176676, "learning_rate": 5.219509741058866e-06, "loss": 0.4328, "step": 18241 }, { "epoch": 0.5374541490505723, "grad_norm": 1.4989582121010572, "learning_rate": 5.218995996091069e-06, "loss": 0.467, "step": 18242 }, { "epoch": 0.5374836115080359, "grad_norm": 1.47349713571099, "learning_rate": 5.2184822488068e-06, "loss": 0.389, "step": 18243 }, { "epoch": 0.5375130739654994, "grad_norm": 1.4523569159377532, "learning_rate": 5.217968499211491e-06, "loss": 0.4027, "step": 18244 }, { "epoch": 0.537542536422963, "grad_norm": 1.5603492936765597, "learning_rate": 5.217454747310577e-06, "loss": 0.5704, "step": 18245 }, { "epoch": 0.5375719988804266, "grad_norm": 1.655979484182512, "learning_rate": 5.216940993109495e-06, "loss": 0.3441, "step": 18246 }, { "epoch": 0.5376014613378902, "grad_norm": 1.5315676653149668, "learning_rate": 5.216427236613677e-06, "loss": 0.5023, "step": 18247 }, { "epoch": 0.5376309237953538, "grad_norm": 1.5466081526307016, "learning_rate": 5.215913477828557e-06, "loss": 0.4455, "step": 18248 }, { "epoch": 0.5376603862528173, "grad_norm": 1.5102516836691482, "learning_rate": 5.21539971675957e-06, "loss": 0.4418, "step": 18249 }, { "epoch": 0.5376898487102809, "grad_norm": 1.5872564380317906, "learning_rate": 5.214885953412151e-06, "loss": 0.4287, "step": 18250 }, { "epoch": 0.5377193111677445, "grad_norm": 1.6988023602287632, "learning_rate": 5.214372187791732e-06, "loss": 0.5011, "step": 18251 }, { "epoch": 0.5377487736252081, "grad_norm": 1.4457298178554274, "learning_rate": 5.213858419903752e-06, "loss": 0.437, "step": 18252 }, { "epoch": 0.5377782360826716, "grad_norm": 1.7761505981827121, "learning_rate": 5.213344649753642e-06, "loss": 0.4609, "step": 18253 }, { "epoch": 0.5378076985401352, "grad_norm": 1.5873477234044655, "learning_rate": 5.212830877346836e-06, "loss": 0.4631, "step": 18254 }, { "epoch": 0.5378371609975988, "grad_norm": 1.3958320487927007, "learning_rate": 5.212317102688771e-06, "loss": 0.4489, "step": 18255 }, { "epoch": 0.5378666234550624, "grad_norm": 1.4331113017435184, "learning_rate": 5.21180332578488e-06, "loss": 0.4073, "step": 18256 }, { "epoch": 0.5378960859125259, "grad_norm": 1.7168711392396845, "learning_rate": 5.211289546640599e-06, "loss": 0.4621, "step": 18257 }, { "epoch": 0.5379255483699895, "grad_norm": 1.4660051335834257, "learning_rate": 5.21077576526136e-06, "loss": 0.5547, "step": 18258 }, { "epoch": 0.5379550108274531, "grad_norm": 1.8159203420614236, "learning_rate": 5.2102619816526e-06, "loss": 0.4772, "step": 18259 }, { "epoch": 0.5379844732849167, "grad_norm": 1.5971927586765082, "learning_rate": 5.2097481958197524e-06, "loss": 0.4121, "step": 18260 }, { "epoch": 0.5380139357423803, "grad_norm": 1.342309011525261, "learning_rate": 5.209234407768253e-06, "loss": 0.3892, "step": 18261 }, { "epoch": 0.5380433981998438, "grad_norm": 1.461636310957968, "learning_rate": 5.208720617503535e-06, "loss": 0.4043, "step": 18262 }, { "epoch": 0.5380728606573074, "grad_norm": 1.5304758030038945, "learning_rate": 5.2082068250310345e-06, "loss": 0.4927, "step": 18263 }, { "epoch": 0.538102323114771, "grad_norm": 1.452264629915826, "learning_rate": 5.2076930303561854e-06, "loss": 0.3812, "step": 18264 }, { "epoch": 0.5381317855722346, "grad_norm": 1.3885888551502352, "learning_rate": 5.207179233484423e-06, "loss": 0.4226, "step": 18265 }, { "epoch": 0.5381612480296981, "grad_norm": 1.614510372489016, "learning_rate": 5.206665434421181e-06, "loss": 0.5177, "step": 18266 }, { "epoch": 0.5381907104871617, "grad_norm": 1.4379384161073983, "learning_rate": 5.2061516331718965e-06, "loss": 0.375, "step": 18267 }, { "epoch": 0.5382201729446253, "grad_norm": 1.4450882990710774, "learning_rate": 5.205637829742002e-06, "loss": 0.4476, "step": 18268 }, { "epoch": 0.5382496354020889, "grad_norm": 1.4578079077981783, "learning_rate": 5.2051240241369325e-06, "loss": 0.4278, "step": 18269 }, { "epoch": 0.5382790978595524, "grad_norm": 1.4304938329233936, "learning_rate": 5.204610216362125e-06, "loss": 0.4211, "step": 18270 }, { "epoch": 0.538308560317016, "grad_norm": 1.3543441178096565, "learning_rate": 5.204096406423011e-06, "loss": 0.4284, "step": 18271 }, { "epoch": 0.5383380227744796, "grad_norm": 1.2474130224090678, "learning_rate": 5.203582594325029e-06, "loss": 0.3113, "step": 18272 }, { "epoch": 0.5383674852319432, "grad_norm": 1.4327075801749363, "learning_rate": 5.203068780073612e-06, "loss": 0.3693, "step": 18273 }, { "epoch": 0.5383969476894068, "grad_norm": 1.3302580005659939, "learning_rate": 5.202554963674197e-06, "loss": 0.3247, "step": 18274 }, { "epoch": 0.5384264101468703, "grad_norm": 1.5731103542733, "learning_rate": 5.202041145132215e-06, "loss": 0.5632, "step": 18275 }, { "epoch": 0.5384558726043339, "grad_norm": 1.4150331681089399, "learning_rate": 5.201527324453104e-06, "loss": 0.4327, "step": 18276 }, { "epoch": 0.5384853350617975, "grad_norm": 1.701379071114181, "learning_rate": 5.201013501642299e-06, "loss": 0.4472, "step": 18277 }, { "epoch": 0.5385147975192611, "grad_norm": 1.60085058785458, "learning_rate": 5.2004996767052325e-06, "loss": 0.4116, "step": 18278 }, { "epoch": 0.5385442599767246, "grad_norm": 1.5552477195836623, "learning_rate": 5.199985849647343e-06, "loss": 0.4448, "step": 18279 }, { "epoch": 0.5385737224341882, "grad_norm": 1.5402158371343502, "learning_rate": 5.1994720204740645e-06, "loss": 0.5511, "step": 18280 }, { "epoch": 0.5386031848916518, "grad_norm": 1.408171567581142, "learning_rate": 5.19895818919083e-06, "loss": 0.3649, "step": 18281 }, { "epoch": 0.5386326473491154, "grad_norm": 1.4807999493131874, "learning_rate": 5.19844435580308e-06, "loss": 0.5307, "step": 18282 }, { "epoch": 0.5386621098065789, "grad_norm": 1.6278675500756425, "learning_rate": 5.1979305203162436e-06, "loss": 0.5501, "step": 18283 }, { "epoch": 0.5386915722640425, "grad_norm": 1.4416993722582307, "learning_rate": 5.197416682735757e-06, "loss": 0.4524, "step": 18284 }, { "epoch": 0.5387210347215061, "grad_norm": 1.750207292684103, "learning_rate": 5.196902843067058e-06, "loss": 0.4742, "step": 18285 }, { "epoch": 0.5387504971789697, "grad_norm": 1.3862517515252921, "learning_rate": 5.196389001315583e-06, "loss": 0.3785, "step": 18286 }, { "epoch": 0.5387799596364333, "grad_norm": 1.4075379398129388, "learning_rate": 5.195875157486763e-06, "loss": 0.3483, "step": 18287 }, { "epoch": 0.5388094220938968, "grad_norm": 1.459148523411438, "learning_rate": 5.195361311586036e-06, "loss": 0.4897, "step": 18288 }, { "epoch": 0.5388388845513604, "grad_norm": 1.480931845984531, "learning_rate": 5.194847463618836e-06, "loss": 0.4771, "step": 18289 }, { "epoch": 0.538868347008824, "grad_norm": 1.464790514813762, "learning_rate": 5.194333613590598e-06, "loss": 0.4505, "step": 18290 }, { "epoch": 0.5388978094662876, "grad_norm": 1.4975146547453806, "learning_rate": 5.19381976150676e-06, "loss": 0.543, "step": 18291 }, { "epoch": 0.5389272719237511, "grad_norm": 1.4843544442896137, "learning_rate": 5.193305907372756e-06, "loss": 0.4159, "step": 18292 }, { "epoch": 0.5389567343812147, "grad_norm": 1.7153081193564992, "learning_rate": 5.1927920511940185e-06, "loss": 0.5203, "step": 18293 }, { "epoch": 0.5389861968386783, "grad_norm": 1.5669504650189847, "learning_rate": 5.192278192975987e-06, "loss": 0.5384, "step": 18294 }, { "epoch": 0.5390156592961419, "grad_norm": 1.5412783222946889, "learning_rate": 5.191764332724095e-06, "loss": 0.4653, "step": 18295 }, { "epoch": 0.5390451217536054, "grad_norm": 1.373908307110502, "learning_rate": 5.191250470443778e-06, "loss": 0.4693, "step": 18296 }, { "epoch": 0.539074584211069, "grad_norm": 1.422932853288862, "learning_rate": 5.190736606140474e-06, "loss": 0.3937, "step": 18297 }, { "epoch": 0.5391040466685326, "grad_norm": 1.4477465075376503, "learning_rate": 5.190222739819615e-06, "loss": 0.3047, "step": 18298 }, { "epoch": 0.5391335091259962, "grad_norm": 1.7289772545095896, "learning_rate": 5.1897088714866375e-06, "loss": 0.3627, "step": 18299 }, { "epoch": 0.5391629715834598, "grad_norm": 1.5689649516831703, "learning_rate": 5.189195001146976e-06, "loss": 0.4278, "step": 18300 }, { "epoch": 0.5391924340409233, "grad_norm": 1.5473240057608442, "learning_rate": 5.188681128806069e-06, "loss": 0.5, "step": 18301 }, { "epoch": 0.5392218964983869, "grad_norm": 1.4030737211909654, "learning_rate": 5.188167254469352e-06, "loss": 0.3359, "step": 18302 }, { "epoch": 0.5392513589558505, "grad_norm": 1.6391277091069933, "learning_rate": 5.187653378142257e-06, "loss": 0.4668, "step": 18303 }, { "epoch": 0.5392808214133141, "grad_norm": 1.4188335575343785, "learning_rate": 5.187139499830223e-06, "loss": 0.3697, "step": 18304 }, { "epoch": 0.5393102838707776, "grad_norm": 1.3964441764675888, "learning_rate": 5.186625619538682e-06, "loss": 0.3928, "step": 18305 }, { "epoch": 0.5393397463282412, "grad_norm": 1.3582487407754762, "learning_rate": 5.186111737273073e-06, "loss": 0.3938, "step": 18306 }, { "epoch": 0.5393692087857048, "grad_norm": 1.557948770026417, "learning_rate": 5.185597853038834e-06, "loss": 0.4039, "step": 18307 }, { "epoch": 0.5393986712431684, "grad_norm": 1.7133154157710595, "learning_rate": 5.1850839668413944e-06, "loss": 0.553, "step": 18308 }, { "epoch": 0.5394281337006319, "grad_norm": 1.6284399558720983, "learning_rate": 5.184570078686193e-06, "loss": 0.5259, "step": 18309 }, { "epoch": 0.5394575961580955, "grad_norm": 1.5481963491747923, "learning_rate": 5.184056188578667e-06, "loss": 0.5612, "step": 18310 }, { "epoch": 0.5394870586155591, "grad_norm": 1.6021710316701407, "learning_rate": 5.183542296524249e-06, "loss": 0.623, "step": 18311 }, { "epoch": 0.5395165210730227, "grad_norm": 1.4402803467207166, "learning_rate": 5.183028402528377e-06, "loss": 0.5027, "step": 18312 }, { "epoch": 0.5395459835304863, "grad_norm": 1.3488798644250226, "learning_rate": 5.182514506596488e-06, "loss": 0.3893, "step": 18313 }, { "epoch": 0.5395754459879498, "grad_norm": 1.544300703907626, "learning_rate": 5.1820006087340135e-06, "loss": 0.5434, "step": 18314 }, { "epoch": 0.5396049084454134, "grad_norm": 1.5158065501886027, "learning_rate": 5.181486708946392e-06, "loss": 0.4313, "step": 18315 }, { "epoch": 0.539634370902877, "grad_norm": 1.608343991378523, "learning_rate": 5.180972807239063e-06, "loss": 0.5816, "step": 18316 }, { "epoch": 0.5396638333603406, "grad_norm": 1.705715840375417, "learning_rate": 5.180458903617457e-06, "loss": 0.5681, "step": 18317 }, { "epoch": 0.5396932958178041, "grad_norm": 1.8177162168554635, "learning_rate": 5.179944998087011e-06, "loss": 0.4372, "step": 18318 }, { "epoch": 0.5397227582752677, "grad_norm": 1.6077391594072203, "learning_rate": 5.179431090653162e-06, "loss": 0.4589, "step": 18319 }, { "epoch": 0.5397522207327313, "grad_norm": 1.7357923646893654, "learning_rate": 5.178917181321345e-06, "loss": 0.5629, "step": 18320 }, { "epoch": 0.5397816831901949, "grad_norm": 1.521494818364034, "learning_rate": 5.178403270096997e-06, "loss": 0.4166, "step": 18321 }, { "epoch": 0.5398111456476584, "grad_norm": 1.5820082443154173, "learning_rate": 5.177889356985555e-06, "loss": 0.4573, "step": 18322 }, { "epoch": 0.539840608105122, "grad_norm": 1.6421746560371335, "learning_rate": 5.1773754419924515e-06, "loss": 0.508, "step": 18323 }, { "epoch": 0.5398700705625856, "grad_norm": 1.5499416628779628, "learning_rate": 5.176861525123124e-06, "loss": 0.3555, "step": 18324 }, { "epoch": 0.5398995330200492, "grad_norm": 1.3138456457026697, "learning_rate": 5.176347606383012e-06, "loss": 0.3358, "step": 18325 }, { "epoch": 0.5399289954775128, "grad_norm": 1.7102753392281778, "learning_rate": 5.175833685777547e-06, "loss": 0.5936, "step": 18326 }, { "epoch": 0.5399584579349763, "grad_norm": 1.4950378444647638, "learning_rate": 5.175319763312168e-06, "loss": 0.3943, "step": 18327 }, { "epoch": 0.5399879203924399, "grad_norm": 1.629156016881719, "learning_rate": 5.1748058389923095e-06, "loss": 0.4905, "step": 18328 }, { "epoch": 0.5400173828499035, "grad_norm": 1.4436031606667108, "learning_rate": 5.174291912823408e-06, "loss": 0.481, "step": 18329 }, { "epoch": 0.5400468453073671, "grad_norm": 1.5157560138347361, "learning_rate": 5.1737779848108995e-06, "loss": 0.423, "step": 18330 }, { "epoch": 0.5400763077648306, "grad_norm": 1.5660908156322841, "learning_rate": 5.173264054960221e-06, "loss": 0.4567, "step": 18331 }, { "epoch": 0.5401057702222942, "grad_norm": 1.4042562838824066, "learning_rate": 5.172750123276807e-06, "loss": 0.3629, "step": 18332 }, { "epoch": 0.5401352326797578, "grad_norm": 1.5345842263325586, "learning_rate": 5.172236189766096e-06, "loss": 0.4847, "step": 18333 }, { "epoch": 0.5401646951372214, "grad_norm": 1.447006128279887, "learning_rate": 5.171722254433523e-06, "loss": 0.3562, "step": 18334 }, { "epoch": 0.5401941575946849, "grad_norm": 1.2898189991917344, "learning_rate": 5.171208317284523e-06, "loss": 0.3161, "step": 18335 }, { "epoch": 0.5402236200521485, "grad_norm": 1.5037718467316052, "learning_rate": 5.170694378324535e-06, "loss": 0.4123, "step": 18336 }, { "epoch": 0.5402530825096121, "grad_norm": 1.6053138819713406, "learning_rate": 5.170180437558994e-06, "loss": 0.3844, "step": 18337 }, { "epoch": 0.5402825449670757, "grad_norm": 1.6779627677237885, "learning_rate": 5.1696664949933355e-06, "loss": 0.5482, "step": 18338 }, { "epoch": 0.5403120074245393, "grad_norm": 1.4669627417015643, "learning_rate": 5.169152550632995e-06, "loss": 0.4961, "step": 18339 }, { "epoch": 0.5403414698820028, "grad_norm": 1.68576485956178, "learning_rate": 5.168638604483413e-06, "loss": 0.6643, "step": 18340 }, { "epoch": 0.5403709323394664, "grad_norm": 1.7937689182981464, "learning_rate": 5.168124656550022e-06, "loss": 0.4468, "step": 18341 }, { "epoch": 0.54040039479693, "grad_norm": 1.3405279943506718, "learning_rate": 5.167610706838261e-06, "loss": 0.3392, "step": 18342 }, { "epoch": 0.5404298572543936, "grad_norm": 1.4135114175069572, "learning_rate": 5.167096755353564e-06, "loss": 0.3336, "step": 18343 }, { "epoch": 0.5404593197118571, "grad_norm": 1.4459217321797726, "learning_rate": 5.166582802101368e-06, "loss": 0.5136, "step": 18344 }, { "epoch": 0.5404887821693207, "grad_norm": 1.4606049235219885, "learning_rate": 5.166068847087109e-06, "loss": 0.4151, "step": 18345 }, { "epoch": 0.5405182446267843, "grad_norm": 1.3985624782843236, "learning_rate": 5.165554890316227e-06, "loss": 0.4007, "step": 18346 }, { "epoch": 0.5405477070842479, "grad_norm": 1.5933334620591493, "learning_rate": 5.165040931794154e-06, "loss": 0.5025, "step": 18347 }, { "epoch": 0.5405771695417114, "grad_norm": 1.4672957228110783, "learning_rate": 5.164526971526329e-06, "loss": 0.5441, "step": 18348 }, { "epoch": 0.540606631999175, "grad_norm": 1.4580191580821202, "learning_rate": 5.164013009518187e-06, "loss": 0.4291, "step": 18349 }, { "epoch": 0.5406360944566386, "grad_norm": 1.5221547352301172, "learning_rate": 5.163499045775165e-06, "loss": 0.5301, "step": 18350 }, { "epoch": 0.5406655569141022, "grad_norm": 1.5681268478031603, "learning_rate": 5.162985080302701e-06, "loss": 0.4703, "step": 18351 }, { "epoch": 0.5406950193715658, "grad_norm": 1.5195894192203225, "learning_rate": 5.162471113106232e-06, "loss": 0.3246, "step": 18352 }, { "epoch": 0.5407244818290293, "grad_norm": 1.5148640544809506, "learning_rate": 5.161957144191191e-06, "loss": 0.449, "step": 18353 }, { "epoch": 0.5407539442864929, "grad_norm": 1.3840203453577302, "learning_rate": 5.161443173563017e-06, "loss": 0.4491, "step": 18354 }, { "epoch": 0.5407834067439565, "grad_norm": 1.7309442826100865, "learning_rate": 5.160929201227146e-06, "loss": 0.6245, "step": 18355 }, { "epoch": 0.5408128692014201, "grad_norm": 1.6980425613010826, "learning_rate": 5.1604152271890154e-06, "loss": 0.4679, "step": 18356 }, { "epoch": 0.5408423316588836, "grad_norm": 1.4083164093414626, "learning_rate": 5.159901251454062e-06, "loss": 0.3668, "step": 18357 }, { "epoch": 0.5408717941163472, "grad_norm": 1.5740496808647755, "learning_rate": 5.159387274027722e-06, "loss": 0.5803, "step": 18358 }, { "epoch": 0.5409012565738108, "grad_norm": 1.464772357740846, "learning_rate": 5.158873294915431e-06, "loss": 0.3087, "step": 18359 }, { "epoch": 0.5409307190312744, "grad_norm": 1.618208207330827, "learning_rate": 5.158359314122626e-06, "loss": 0.6081, "step": 18360 }, { "epoch": 0.5409601814887379, "grad_norm": 1.3347352562896986, "learning_rate": 5.1578453316547465e-06, "loss": 0.3351, "step": 18361 }, { "epoch": 0.5409896439462015, "grad_norm": 1.777752528915151, "learning_rate": 5.157331347517226e-06, "loss": 0.4517, "step": 18362 }, { "epoch": 0.5410191064036651, "grad_norm": 1.402351962204309, "learning_rate": 5.1568173617155015e-06, "loss": 0.3044, "step": 18363 }, { "epoch": 0.5410485688611287, "grad_norm": 1.4676586287040845, "learning_rate": 5.156303374255013e-06, "loss": 0.4949, "step": 18364 }, { "epoch": 0.5410780313185923, "grad_norm": 1.566518208521996, "learning_rate": 5.155789385141193e-06, "loss": 0.432, "step": 18365 }, { "epoch": 0.5411074937760558, "grad_norm": 1.4387896551164083, "learning_rate": 5.155275394379481e-06, "loss": 0.4435, "step": 18366 }, { "epoch": 0.5411369562335194, "grad_norm": 1.485738598954362, "learning_rate": 5.154761401975314e-06, "loss": 0.4172, "step": 18367 }, { "epoch": 0.541166418690983, "grad_norm": 1.5432179008672688, "learning_rate": 5.154247407934128e-06, "loss": 0.4958, "step": 18368 }, { "epoch": 0.5411958811484466, "grad_norm": 1.5944495140425816, "learning_rate": 5.153733412261358e-06, "loss": 0.4033, "step": 18369 }, { "epoch": 0.5412253436059101, "grad_norm": 1.4730033549540618, "learning_rate": 5.153219414962445e-06, "loss": 0.2845, "step": 18370 }, { "epoch": 0.5412548060633737, "grad_norm": 1.4398709881389793, "learning_rate": 5.1527054160428226e-06, "loss": 0.4225, "step": 18371 }, { "epoch": 0.5412842685208373, "grad_norm": 1.3128640077794531, "learning_rate": 5.15219141550793e-06, "loss": 0.3489, "step": 18372 }, { "epoch": 0.5413137309783009, "grad_norm": 1.3636672137223678, "learning_rate": 5.151677413363201e-06, "loss": 0.4118, "step": 18373 }, { "epoch": 0.5413431934357644, "grad_norm": 1.3860881640214964, "learning_rate": 5.1511634096140765e-06, "loss": 0.3919, "step": 18374 }, { "epoch": 0.541372655893228, "grad_norm": 1.6587039960701406, "learning_rate": 5.150649404265991e-06, "loss": 0.5346, "step": 18375 }, { "epoch": 0.5414021183506916, "grad_norm": 1.4792437387276745, "learning_rate": 5.150135397324382e-06, "loss": 0.4545, "step": 18376 }, { "epoch": 0.5414315808081552, "grad_norm": 1.4217403588247763, "learning_rate": 5.149621388794687e-06, "loss": 0.4738, "step": 18377 }, { "epoch": 0.5414610432656188, "grad_norm": 1.5916923546558128, "learning_rate": 5.149107378682341e-06, "loss": 0.5845, "step": 18378 }, { "epoch": 0.5414905057230823, "grad_norm": 1.6513177143971465, "learning_rate": 5.148593366992784e-06, "loss": 0.3923, "step": 18379 }, { "epoch": 0.5415199681805459, "grad_norm": 1.6473051791787054, "learning_rate": 5.14807935373145e-06, "loss": 0.4625, "step": 18380 }, { "epoch": 0.5415494306380095, "grad_norm": 1.4953464873824944, "learning_rate": 5.147565338903778e-06, "loss": 0.3675, "step": 18381 }, { "epoch": 0.5415788930954731, "grad_norm": 1.3876119216071516, "learning_rate": 5.1470513225152066e-06, "loss": 0.4608, "step": 18382 }, { "epoch": 0.5416083555529366, "grad_norm": 1.422513809350356, "learning_rate": 5.146537304571171e-06, "loss": 0.3576, "step": 18383 }, { "epoch": 0.5416378180104002, "grad_norm": 1.4668707438537019, "learning_rate": 5.146023285077106e-06, "loss": 0.432, "step": 18384 }, { "epoch": 0.5416672804678638, "grad_norm": 1.774952253364799, "learning_rate": 5.145509264038454e-06, "loss": 0.672, "step": 18385 }, { "epoch": 0.5416967429253274, "grad_norm": 1.3078283061347, "learning_rate": 5.144995241460648e-06, "loss": 0.3966, "step": 18386 }, { "epoch": 0.5417262053827909, "grad_norm": 1.6246016730863835, "learning_rate": 5.144481217349126e-06, "loss": 0.4662, "step": 18387 }, { "epoch": 0.5417556678402545, "grad_norm": 1.3525437529225264, "learning_rate": 5.143967191709326e-06, "loss": 0.4165, "step": 18388 }, { "epoch": 0.5417851302977181, "grad_norm": 1.6229035269657093, "learning_rate": 5.143453164546686e-06, "loss": 0.4899, "step": 18389 }, { "epoch": 0.5418145927551817, "grad_norm": 1.4849403876665397, "learning_rate": 5.142939135866641e-06, "loss": 0.5378, "step": 18390 }, { "epoch": 0.5418440552126453, "grad_norm": 1.6433630919674838, "learning_rate": 5.1424251056746296e-06, "loss": 0.5791, "step": 18391 }, { "epoch": 0.5418735176701088, "grad_norm": 1.3081170776818023, "learning_rate": 5.14191107397609e-06, "loss": 0.4011, "step": 18392 }, { "epoch": 0.5419029801275724, "grad_norm": 1.2552172980500071, "learning_rate": 5.141397040776457e-06, "loss": 0.2792, "step": 18393 }, { "epoch": 0.541932442585036, "grad_norm": 1.296962734690656, "learning_rate": 5.140883006081168e-06, "loss": 0.3984, "step": 18394 }, { "epoch": 0.5419619050424996, "grad_norm": 1.7232212203219945, "learning_rate": 5.140368969895665e-06, "loss": 0.5611, "step": 18395 }, { "epoch": 0.5419913674999631, "grad_norm": 1.4652351431015123, "learning_rate": 5.139854932225379e-06, "loss": 0.4237, "step": 18396 }, { "epoch": 0.5420208299574267, "grad_norm": 1.4561633852567972, "learning_rate": 5.139340893075752e-06, "loss": 0.4289, "step": 18397 }, { "epoch": 0.5420502924148903, "grad_norm": 1.6572997624085224, "learning_rate": 5.138826852452218e-06, "loss": 0.5851, "step": 18398 }, { "epoch": 0.5420797548723539, "grad_norm": 1.3311532542299434, "learning_rate": 5.138312810360216e-06, "loss": 0.3773, "step": 18399 }, { "epoch": 0.5421092173298174, "grad_norm": 1.4111707034917087, "learning_rate": 5.137798766805185e-06, "loss": 0.4493, "step": 18400 }, { "epoch": 0.542138679787281, "grad_norm": 1.3066396465348447, "learning_rate": 5.1372847217925584e-06, "loss": 0.3123, "step": 18401 }, { "epoch": 0.5421681422447446, "grad_norm": 1.4323911520918589, "learning_rate": 5.1367706753277765e-06, "loss": 0.4833, "step": 18402 }, { "epoch": 0.5421976047022082, "grad_norm": 1.5576109240112566, "learning_rate": 5.136256627416276e-06, "loss": 0.4411, "step": 18403 }, { "epoch": 0.5422270671596718, "grad_norm": 1.6991544736382178, "learning_rate": 5.135742578063495e-06, "loss": 0.478, "step": 18404 }, { "epoch": 0.5422565296171353, "grad_norm": 1.5786591117073274, "learning_rate": 5.135228527274869e-06, "loss": 0.4386, "step": 18405 }, { "epoch": 0.5422859920745989, "grad_norm": 1.3897241719932156, "learning_rate": 5.134714475055838e-06, "loss": 0.4385, "step": 18406 }, { "epoch": 0.5423154545320625, "grad_norm": 1.716971330980737, "learning_rate": 5.13420042141184e-06, "loss": 0.5143, "step": 18407 }, { "epoch": 0.5423449169895261, "grad_norm": 1.6298270625109883, "learning_rate": 5.133686366348308e-06, "loss": 0.4101, "step": 18408 }, { "epoch": 0.5423743794469896, "grad_norm": 1.8336204410067443, "learning_rate": 5.1331723098706834e-06, "loss": 0.6324, "step": 18409 }, { "epoch": 0.5424038419044532, "grad_norm": 1.3468692038887222, "learning_rate": 5.132658251984403e-06, "loss": 0.4113, "step": 18410 }, { "epoch": 0.5424333043619168, "grad_norm": 1.545158420164987, "learning_rate": 5.132144192694903e-06, "loss": 0.5494, "step": 18411 }, { "epoch": 0.5424627668193804, "grad_norm": 1.4791395792381647, "learning_rate": 5.131630132007623e-06, "loss": 0.2577, "step": 18412 }, { "epoch": 0.5424922292768439, "grad_norm": 1.3637960343509132, "learning_rate": 5.1311160699280004e-06, "loss": 0.3989, "step": 18413 }, { "epoch": 0.5425216917343075, "grad_norm": 1.7035463717865174, "learning_rate": 5.130602006461471e-06, "loss": 0.5015, "step": 18414 }, { "epoch": 0.5425511541917711, "grad_norm": 1.3835845732597947, "learning_rate": 5.130087941613473e-06, "loss": 0.4838, "step": 18415 }, { "epoch": 0.5425806166492347, "grad_norm": 1.3448972615637034, "learning_rate": 5.129573875389446e-06, "loss": 0.4848, "step": 18416 }, { "epoch": 0.5426100791066983, "grad_norm": 1.661086806690349, "learning_rate": 5.129059807794824e-06, "loss": 0.6864, "step": 18417 }, { "epoch": 0.5426395415641618, "grad_norm": 1.6056088829646566, "learning_rate": 5.128545738835047e-06, "loss": 0.4076, "step": 18418 }, { "epoch": 0.5426690040216254, "grad_norm": 1.3295422660619916, "learning_rate": 5.1280316685155525e-06, "loss": 0.4452, "step": 18419 }, { "epoch": 0.542698466479089, "grad_norm": 1.7300456381458242, "learning_rate": 5.127517596841779e-06, "loss": 0.476, "step": 18420 }, { "epoch": 0.5427279289365526, "grad_norm": 1.504642158985474, "learning_rate": 5.127003523819162e-06, "loss": 0.4117, "step": 18421 }, { "epoch": 0.5427573913940161, "grad_norm": 1.3832498211467115, "learning_rate": 5.126489449453142e-06, "loss": 0.351, "step": 18422 }, { "epoch": 0.5427868538514797, "grad_norm": 1.7078879790995576, "learning_rate": 5.125975373749153e-06, "loss": 0.4687, "step": 18423 }, { "epoch": 0.5428163163089433, "grad_norm": 1.4684542997727945, "learning_rate": 5.1254612967126364e-06, "loss": 0.3694, "step": 18424 }, { "epoch": 0.5428457787664069, "grad_norm": 1.584854864644312, "learning_rate": 5.124947218349029e-06, "loss": 0.4862, "step": 18425 }, { "epoch": 0.5428752412238704, "grad_norm": 1.5573535897960489, "learning_rate": 5.124433138663766e-06, "loss": 0.5533, "step": 18426 }, { "epoch": 0.542904703681334, "grad_norm": 1.5246671389196003, "learning_rate": 5.1239190576622885e-06, "loss": 0.4631, "step": 18427 }, { "epoch": 0.5429341661387976, "grad_norm": 1.638605222153666, "learning_rate": 5.123404975350034e-06, "loss": 0.4562, "step": 18428 }, { "epoch": 0.5429636285962612, "grad_norm": 1.5353929196942577, "learning_rate": 5.122890891732437e-06, "loss": 0.3861, "step": 18429 }, { "epoch": 0.5429930910537248, "grad_norm": 1.4668773626731713, "learning_rate": 5.122376806814938e-06, "loss": 0.4586, "step": 18430 }, { "epoch": 0.5430225535111883, "grad_norm": 1.432289917095036, "learning_rate": 5.121862720602976e-06, "loss": 0.4976, "step": 18431 }, { "epoch": 0.5430520159686519, "grad_norm": 1.4549786353743783, "learning_rate": 5.121348633101987e-06, "loss": 0.3924, "step": 18432 }, { "epoch": 0.5430814784261155, "grad_norm": 1.3839883815046006, "learning_rate": 5.120834544317407e-06, "loss": 0.4148, "step": 18433 }, { "epoch": 0.5431109408835791, "grad_norm": 1.5444584898857519, "learning_rate": 5.120320454254678e-06, "loss": 0.3979, "step": 18434 }, { "epoch": 0.5431404033410426, "grad_norm": 1.2974947498547158, "learning_rate": 5.119806362919235e-06, "loss": 0.2762, "step": 18435 }, { "epoch": 0.5431698657985062, "grad_norm": 1.5523173880295786, "learning_rate": 5.119292270316517e-06, "loss": 0.3691, "step": 18436 }, { "epoch": 0.5431993282559698, "grad_norm": 1.7025794289726905, "learning_rate": 5.118778176451963e-06, "loss": 0.5066, "step": 18437 }, { "epoch": 0.5432287907134334, "grad_norm": 1.5186964196452186, "learning_rate": 5.118264081331008e-06, "loss": 0.4523, "step": 18438 }, { "epoch": 0.5432582531708969, "grad_norm": 1.2006512042617077, "learning_rate": 5.117749984959091e-06, "loss": 0.2941, "step": 18439 }, { "epoch": 0.5432877156283605, "grad_norm": 1.3999992253536115, "learning_rate": 5.117235887341653e-06, "loss": 0.3387, "step": 18440 }, { "epoch": 0.5433171780858241, "grad_norm": 1.51950857612938, "learning_rate": 5.116721788484128e-06, "loss": 0.3895, "step": 18441 }, { "epoch": 0.5433466405432877, "grad_norm": 1.820604103705879, "learning_rate": 5.116207688391955e-06, "loss": 0.5849, "step": 18442 }, { "epoch": 0.5433761030007513, "grad_norm": 1.6136830148927066, "learning_rate": 5.1156935870705745e-06, "loss": 0.4868, "step": 18443 }, { "epoch": 0.5434055654582148, "grad_norm": 1.5757764578237279, "learning_rate": 5.115179484525421e-06, "loss": 0.4151, "step": 18444 }, { "epoch": 0.5434350279156784, "grad_norm": 1.562838542226118, "learning_rate": 5.114665380761933e-06, "loss": 0.5363, "step": 18445 }, { "epoch": 0.543464490373142, "grad_norm": 1.4887916670590895, "learning_rate": 5.1141512757855515e-06, "loss": 0.5353, "step": 18446 }, { "epoch": 0.5434939528306056, "grad_norm": 1.772145605540812, "learning_rate": 5.113637169601713e-06, "loss": 0.5965, "step": 18447 }, { "epoch": 0.5435234152880691, "grad_norm": 1.3179453280081308, "learning_rate": 5.113123062215852e-06, "loss": 0.4397, "step": 18448 }, { "epoch": 0.5435528777455327, "grad_norm": 1.4129294787753086, "learning_rate": 5.1126089536334135e-06, "loss": 0.3839, "step": 18449 }, { "epoch": 0.5435823402029963, "grad_norm": 1.4883445118118064, "learning_rate": 5.11209484385983e-06, "loss": 0.4313, "step": 18450 }, { "epoch": 0.5436118026604599, "grad_norm": 1.555658655696534, "learning_rate": 5.1115807329005395e-06, "loss": 0.4178, "step": 18451 }, { "epoch": 0.5436412651179234, "grad_norm": 1.480163168940172, "learning_rate": 5.111066620760986e-06, "loss": 0.5171, "step": 18452 }, { "epoch": 0.543670727575387, "grad_norm": 1.5015117880228959, "learning_rate": 5.110552507446601e-06, "loss": 0.5207, "step": 18453 }, { "epoch": 0.5437001900328506, "grad_norm": 1.4945046787758762, "learning_rate": 5.110038392962825e-06, "loss": 0.4701, "step": 18454 }, { "epoch": 0.5437296524903142, "grad_norm": 1.4648049602803765, "learning_rate": 5.109524277315098e-06, "loss": 0.4347, "step": 18455 }, { "epoch": 0.5437591149477778, "grad_norm": 1.4789329329114211, "learning_rate": 5.109010160508855e-06, "loss": 0.4054, "step": 18456 }, { "epoch": 0.5437885774052413, "grad_norm": 1.5236840142053119, "learning_rate": 5.108496042549538e-06, "loss": 0.4042, "step": 18457 }, { "epoch": 0.5438180398627049, "grad_norm": 1.6211878489528677, "learning_rate": 5.107981923442581e-06, "loss": 0.5753, "step": 18458 }, { "epoch": 0.5438475023201685, "grad_norm": 1.5095015071302453, "learning_rate": 5.107467803193424e-06, "loss": 0.3912, "step": 18459 }, { "epoch": 0.5438769647776321, "grad_norm": 1.6011381163919256, "learning_rate": 5.106953681807506e-06, "loss": 0.3391, "step": 18460 }, { "epoch": 0.5439064272350956, "grad_norm": 1.5357125443641981, "learning_rate": 5.106439559290265e-06, "loss": 0.4652, "step": 18461 }, { "epoch": 0.5439358896925592, "grad_norm": 1.428986798781654, "learning_rate": 5.105925435647139e-06, "loss": 0.4413, "step": 18462 }, { "epoch": 0.5439653521500228, "grad_norm": 1.4142657709621167, "learning_rate": 5.105411310883565e-06, "loss": 0.4156, "step": 18463 }, { "epoch": 0.5439948146074864, "grad_norm": 1.527274610730364, "learning_rate": 5.1048971850049825e-06, "loss": 0.5448, "step": 18464 }, { "epoch": 0.5440242770649499, "grad_norm": 1.643692016832343, "learning_rate": 5.104383058016829e-06, "loss": 0.4632, "step": 18465 }, { "epoch": 0.5440537395224135, "grad_norm": 1.3287497348796664, "learning_rate": 5.103868929924544e-06, "loss": 0.379, "step": 18466 }, { "epoch": 0.5440832019798771, "grad_norm": 1.5986077789267172, "learning_rate": 5.103354800733567e-06, "loss": 0.5147, "step": 18467 }, { "epoch": 0.5441126644373407, "grad_norm": 1.5143306657536597, "learning_rate": 5.102840670449331e-06, "loss": 0.3507, "step": 18468 }, { "epoch": 0.5441421268948043, "grad_norm": 1.8819488807097249, "learning_rate": 5.102326539077278e-06, "loss": 0.4243, "step": 18469 }, { "epoch": 0.5441715893522678, "grad_norm": 1.3543926863750988, "learning_rate": 5.101812406622848e-06, "loss": 0.3599, "step": 18470 }, { "epoch": 0.5442010518097314, "grad_norm": 1.4290915142753742, "learning_rate": 5.101298273091476e-06, "loss": 0.4284, "step": 18471 }, { "epoch": 0.544230514267195, "grad_norm": 1.6599124052348848, "learning_rate": 5.1007841384886035e-06, "loss": 0.541, "step": 18472 }, { "epoch": 0.5442599767246586, "grad_norm": 1.6619824590216898, "learning_rate": 5.1002700028196665e-06, "loss": 0.5136, "step": 18473 }, { "epoch": 0.5442894391821221, "grad_norm": 1.3334544498099887, "learning_rate": 5.099755866090103e-06, "loss": 0.4607, "step": 18474 }, { "epoch": 0.5443189016395857, "grad_norm": 1.5186272466206001, "learning_rate": 5.099241728305352e-06, "loss": 0.438, "step": 18475 }, { "epoch": 0.5443483640970493, "grad_norm": 1.5697065032280713, "learning_rate": 5.098727589470853e-06, "loss": 0.453, "step": 18476 }, { "epoch": 0.544377826554513, "grad_norm": 1.520655453322556, "learning_rate": 5.098213449592044e-06, "loss": 0.5459, "step": 18477 }, { "epoch": 0.5444072890119764, "grad_norm": 1.4075054161563443, "learning_rate": 5.0976993086743635e-06, "loss": 0.2789, "step": 18478 }, { "epoch": 0.54443675146944, "grad_norm": 1.5653185131718, "learning_rate": 5.097185166723248e-06, "loss": 0.4296, "step": 18479 }, { "epoch": 0.5444662139269036, "grad_norm": 1.4588673253456041, "learning_rate": 5.096671023744138e-06, "loss": 0.4101, "step": 18480 }, { "epoch": 0.5444956763843672, "grad_norm": 1.5197582978623787, "learning_rate": 5.096156879742471e-06, "loss": 0.5239, "step": 18481 }, { "epoch": 0.5445251388418308, "grad_norm": 1.5403615911261137, "learning_rate": 5.095642734723687e-06, "loss": 0.5704, "step": 18482 }, { "epoch": 0.5445546012992943, "grad_norm": 1.4617633928222669, "learning_rate": 5.095128588693223e-06, "loss": 0.4603, "step": 18483 }, { "epoch": 0.5445840637567579, "grad_norm": 1.4976205941939236, "learning_rate": 5.094614441656517e-06, "loss": 0.5201, "step": 18484 }, { "epoch": 0.5446135262142215, "grad_norm": 1.6802186636012637, "learning_rate": 5.09410029361901e-06, "loss": 0.5282, "step": 18485 }, { "epoch": 0.5446429886716851, "grad_norm": 1.4665283879856594, "learning_rate": 5.093586144586137e-06, "loss": 0.3692, "step": 18486 }, { "epoch": 0.5446724511291486, "grad_norm": 1.7133934985146375, "learning_rate": 5.093071994563339e-06, "loss": 0.5251, "step": 18487 }, { "epoch": 0.5447019135866122, "grad_norm": 1.5104689233381028, "learning_rate": 5.092557843556054e-06, "loss": 0.4079, "step": 18488 }, { "epoch": 0.5447313760440758, "grad_norm": 1.3251367399486529, "learning_rate": 5.092043691569719e-06, "loss": 0.3387, "step": 18489 }, { "epoch": 0.5447608385015394, "grad_norm": 1.7825752456560484, "learning_rate": 5.0915295386097744e-06, "loss": 0.3825, "step": 18490 }, { "epoch": 0.5447903009590029, "grad_norm": 1.4022721319368239, "learning_rate": 5.091015384681659e-06, "loss": 0.4456, "step": 18491 }, { "epoch": 0.5448197634164665, "grad_norm": 1.5746008429243252, "learning_rate": 5.090501229790811e-06, "loss": 0.5064, "step": 18492 }, { "epoch": 0.5448492258739301, "grad_norm": 1.7662097464620605, "learning_rate": 5.089987073942667e-06, "loss": 0.5908, "step": 18493 }, { "epoch": 0.5448786883313937, "grad_norm": 1.5120800665503884, "learning_rate": 5.089472917142669e-06, "loss": 0.411, "step": 18494 }, { "epoch": 0.5449081507888573, "grad_norm": 1.3771229530516513, "learning_rate": 5.088958759396252e-06, "loss": 0.4101, "step": 18495 }, { "epoch": 0.5449376132463208, "grad_norm": 1.4687004188962558, "learning_rate": 5.088444600708856e-06, "loss": 0.2677, "step": 18496 }, { "epoch": 0.5449670757037844, "grad_norm": 1.3028266250078127, "learning_rate": 5.087930441085922e-06, "loss": 0.3923, "step": 18497 }, { "epoch": 0.544996538161248, "grad_norm": 1.485399652815795, "learning_rate": 5.087416280532887e-06, "loss": 0.4474, "step": 18498 }, { "epoch": 0.5450260006187116, "grad_norm": 1.5921240814202862, "learning_rate": 5.086902119055186e-06, "loss": 0.5191, "step": 18499 }, { "epoch": 0.5450554630761751, "grad_norm": 1.368838850609998, "learning_rate": 5.086387956658264e-06, "loss": 0.3814, "step": 18500 }, { "epoch": 0.5450849255336387, "grad_norm": 1.2304154255836741, "learning_rate": 5.0858737933475546e-06, "loss": 0.3349, "step": 18501 }, { "epoch": 0.5451143879911023, "grad_norm": 1.4852335175394018, "learning_rate": 5.085359629128499e-06, "loss": 0.4393, "step": 18502 }, { "epoch": 0.545143850448566, "grad_norm": 1.6119405927447497, "learning_rate": 5.084845464006535e-06, "loss": 0.4266, "step": 18503 }, { "epoch": 0.5451733129060294, "grad_norm": 1.376558040870612, "learning_rate": 5.084331297987103e-06, "loss": 0.4909, "step": 18504 }, { "epoch": 0.545202775363493, "grad_norm": 1.5179770866295057, "learning_rate": 5.083817131075638e-06, "loss": 0.3799, "step": 18505 }, { "epoch": 0.5452322378209566, "grad_norm": 1.5887486671535467, "learning_rate": 5.083302963277582e-06, "loss": 0.5447, "step": 18506 }, { "epoch": 0.5452617002784202, "grad_norm": 1.3816831408785253, "learning_rate": 5.082788794598373e-06, "loss": 0.4604, "step": 18507 }, { "epoch": 0.5452911627358838, "grad_norm": 1.4239492212411995, "learning_rate": 5.08227462504345e-06, "loss": 0.5427, "step": 18508 }, { "epoch": 0.5453206251933473, "grad_norm": 1.5665871643624563, "learning_rate": 5.081760454618249e-06, "loss": 0.5297, "step": 18509 }, { "epoch": 0.5453500876508109, "grad_norm": 1.4243505337372753, "learning_rate": 5.081246283328212e-06, "loss": 0.4376, "step": 18510 }, { "epoch": 0.5453795501082745, "grad_norm": 1.4320300602195355, "learning_rate": 5.080732111178777e-06, "loss": 0.4315, "step": 18511 }, { "epoch": 0.5454090125657381, "grad_norm": 1.433776619352963, "learning_rate": 5.080217938175381e-06, "loss": 0.3169, "step": 18512 }, { "epoch": 0.5454384750232016, "grad_norm": 1.5118275400913972, "learning_rate": 5.079703764323466e-06, "loss": 0.4633, "step": 18513 }, { "epoch": 0.5454679374806652, "grad_norm": 1.6109506291890474, "learning_rate": 5.079189589628467e-06, "loss": 0.5534, "step": 18514 }, { "epoch": 0.5454973999381288, "grad_norm": 1.5979458444650803, "learning_rate": 5.078675414095826e-06, "loss": 0.3798, "step": 18515 }, { "epoch": 0.5455268623955924, "grad_norm": 1.489421114078263, "learning_rate": 5.078161237730981e-06, "loss": 0.5374, "step": 18516 }, { "epoch": 0.5455563248530559, "grad_norm": 1.3502570520730746, "learning_rate": 5.077647060539368e-06, "loss": 0.3143, "step": 18517 }, { "epoch": 0.5455857873105195, "grad_norm": 1.5406392419927084, "learning_rate": 5.077132882526429e-06, "loss": 0.4503, "step": 18518 }, { "epoch": 0.5456152497679831, "grad_norm": 1.459511724886719, "learning_rate": 5.076618703697602e-06, "loss": 0.4395, "step": 18519 }, { "epoch": 0.5456447122254467, "grad_norm": 1.5619293672036334, "learning_rate": 5.076104524058325e-06, "loss": 0.3936, "step": 18520 }, { "epoch": 0.5456741746829104, "grad_norm": 1.6207832079836866, "learning_rate": 5.07559034361404e-06, "loss": 0.4963, "step": 18521 }, { "epoch": 0.5457036371403738, "grad_norm": 1.4773310812816012, "learning_rate": 5.075076162370182e-06, "loss": 0.3548, "step": 18522 }, { "epoch": 0.5457330995978374, "grad_norm": 1.3058896452818134, "learning_rate": 5.07456198033219e-06, "loss": 0.393, "step": 18523 }, { "epoch": 0.545762562055301, "grad_norm": 1.8230607610085747, "learning_rate": 5.074047797505505e-06, "loss": 0.5324, "step": 18524 }, { "epoch": 0.5457920245127647, "grad_norm": 1.757657549255728, "learning_rate": 5.073533613895565e-06, "loss": 0.549, "step": 18525 }, { "epoch": 0.5458214869702281, "grad_norm": 1.6031145565175113, "learning_rate": 5.073019429507809e-06, "loss": 0.4481, "step": 18526 }, { "epoch": 0.5458509494276917, "grad_norm": 1.5453356454586007, "learning_rate": 5.072505244347675e-06, "loss": 0.4768, "step": 18527 }, { "epoch": 0.5458804118851553, "grad_norm": 1.471606932196412, "learning_rate": 5.071991058420603e-06, "loss": 0.4219, "step": 18528 }, { "epoch": 0.545909874342619, "grad_norm": 1.3438902180565582, "learning_rate": 5.071476871732031e-06, "loss": 0.4184, "step": 18529 }, { "epoch": 0.5459393368000824, "grad_norm": 1.4121005620762324, "learning_rate": 5.070962684287399e-06, "loss": 0.4172, "step": 18530 }, { "epoch": 0.545968799257546, "grad_norm": 1.4812354804453687, "learning_rate": 5.070448496092147e-06, "loss": 0.3776, "step": 18531 }, { "epoch": 0.5459982617150096, "grad_norm": 1.71326707292575, "learning_rate": 5.069934307151711e-06, "loss": 0.3572, "step": 18532 }, { "epoch": 0.5460277241724732, "grad_norm": 1.495937100863793, "learning_rate": 5.069420117471531e-06, "loss": 0.4731, "step": 18533 }, { "epoch": 0.5460571866299369, "grad_norm": 1.4458155986442476, "learning_rate": 5.068905927057046e-06, "loss": 0.3957, "step": 18534 }, { "epoch": 0.5460866490874003, "grad_norm": 1.55092178925013, "learning_rate": 5.068391735913695e-06, "loss": 0.517, "step": 18535 }, { "epoch": 0.546116111544864, "grad_norm": 1.4726076425654253, "learning_rate": 5.067877544046917e-06, "loss": 0.4633, "step": 18536 }, { "epoch": 0.5461455740023275, "grad_norm": 1.6362769476697383, "learning_rate": 5.067363351462153e-06, "loss": 0.5045, "step": 18537 }, { "epoch": 0.5461750364597912, "grad_norm": 1.6024877235830177, "learning_rate": 5.066849158164837e-06, "loss": 0.4381, "step": 18538 }, { "epoch": 0.5462044989172546, "grad_norm": 1.2995931213254985, "learning_rate": 5.066334964160412e-06, "loss": 0.324, "step": 18539 }, { "epoch": 0.5462339613747182, "grad_norm": 1.6419168878731516, "learning_rate": 5.065820769454317e-06, "loss": 0.4265, "step": 18540 }, { "epoch": 0.5462634238321818, "grad_norm": 1.416400558770982, "learning_rate": 5.065306574051989e-06, "loss": 0.4364, "step": 18541 }, { "epoch": 0.5462928862896455, "grad_norm": 1.5018223413387206, "learning_rate": 5.064792377958868e-06, "loss": 0.495, "step": 18542 }, { "epoch": 0.5463223487471089, "grad_norm": 1.5494330748125729, "learning_rate": 5.064278181180395e-06, "loss": 0.5399, "step": 18543 }, { "epoch": 0.5463518112045725, "grad_norm": 1.4038686160605736, "learning_rate": 5.063763983722003e-06, "loss": 0.4046, "step": 18544 }, { "epoch": 0.5463812736620361, "grad_norm": 1.5445520755045787, "learning_rate": 5.063249785589137e-06, "loss": 0.5012, "step": 18545 }, { "epoch": 0.5464107361194998, "grad_norm": 1.306663041116048, "learning_rate": 5.062735586787235e-06, "loss": 0.3878, "step": 18546 }, { "epoch": 0.5464401985769634, "grad_norm": 1.5429253508277263, "learning_rate": 5.062221387321734e-06, "loss": 0.4696, "step": 18547 }, { "epoch": 0.5464696610344268, "grad_norm": 1.6648496927426712, "learning_rate": 5.061707187198074e-06, "loss": 0.353, "step": 18548 }, { "epoch": 0.5464991234918904, "grad_norm": 1.4030410928138592, "learning_rate": 5.061192986421696e-06, "loss": 0.4674, "step": 18549 }, { "epoch": 0.546528585949354, "grad_norm": 1.5702157731701165, "learning_rate": 5.060678784998034e-06, "loss": 0.5356, "step": 18550 }, { "epoch": 0.5465580484068177, "grad_norm": 1.6748695188484193, "learning_rate": 5.060164582932533e-06, "loss": 0.3664, "step": 18551 }, { "epoch": 0.5465875108642811, "grad_norm": 1.500398824384306, "learning_rate": 5.059650380230629e-06, "loss": 0.3898, "step": 18552 }, { "epoch": 0.5466169733217447, "grad_norm": 1.5887583325052679, "learning_rate": 5.059136176897761e-06, "loss": 0.5171, "step": 18553 }, { "epoch": 0.5466464357792084, "grad_norm": 1.4814051046499481, "learning_rate": 5.058621972939368e-06, "loss": 0.39, "step": 18554 }, { "epoch": 0.546675898236672, "grad_norm": 1.5857566586267107, "learning_rate": 5.05810776836089e-06, "loss": 0.4374, "step": 18555 }, { "epoch": 0.5467053606941354, "grad_norm": 1.6616929404035192, "learning_rate": 5.057593563167766e-06, "loss": 0.6081, "step": 18556 }, { "epoch": 0.546734823151599, "grad_norm": 1.5420211263585448, "learning_rate": 5.057079357365435e-06, "loss": 0.4787, "step": 18557 }, { "epoch": 0.5467642856090627, "grad_norm": 1.4749294484887616, "learning_rate": 5.056565150959336e-06, "loss": 0.437, "step": 18558 }, { "epoch": 0.5467937480665263, "grad_norm": 1.4268606720738974, "learning_rate": 5.056050943954908e-06, "loss": 0.5185, "step": 18559 }, { "epoch": 0.5468232105239899, "grad_norm": 1.327884685716206, "learning_rate": 5.055536736357589e-06, "loss": 0.3556, "step": 18560 }, { "epoch": 0.5468526729814533, "grad_norm": 1.4844367374234249, "learning_rate": 5.055022528172821e-06, "loss": 0.4928, "step": 18561 }, { "epoch": 0.546882135438917, "grad_norm": 1.5597032739992684, "learning_rate": 5.05450831940604e-06, "loss": 0.3339, "step": 18562 }, { "epoch": 0.5469115978963806, "grad_norm": 1.395301086741653, "learning_rate": 5.0539941100626875e-06, "loss": 0.4693, "step": 18563 }, { "epoch": 0.5469410603538442, "grad_norm": 1.8334584107424117, "learning_rate": 5.053479900148202e-06, "loss": 0.4035, "step": 18564 }, { "epoch": 0.5469705228113076, "grad_norm": 1.4853889216204497, "learning_rate": 5.052965689668022e-06, "loss": 0.4733, "step": 18565 }, { "epoch": 0.5469999852687712, "grad_norm": 1.5385011779672884, "learning_rate": 5.052451478627588e-06, "loss": 0.502, "step": 18566 }, { "epoch": 0.5470294477262349, "grad_norm": 1.492851186752064, "learning_rate": 5.051937267032338e-06, "loss": 0.5637, "step": 18567 }, { "epoch": 0.5470589101836985, "grad_norm": 1.7363503866812102, "learning_rate": 5.05142305488771e-06, "loss": 0.4963, "step": 18568 }, { "epoch": 0.547088372641162, "grad_norm": 1.249266696156423, "learning_rate": 5.050908842199145e-06, "loss": 0.3379, "step": 18569 }, { "epoch": 0.5471178350986255, "grad_norm": 1.5045523945134078, "learning_rate": 5.050394628972082e-06, "loss": 0.4557, "step": 18570 }, { "epoch": 0.5471472975560892, "grad_norm": 1.4439582773002446, "learning_rate": 5.049880415211961e-06, "loss": 0.5349, "step": 18571 }, { "epoch": 0.5471767600135528, "grad_norm": 1.6364724164576077, "learning_rate": 5.0493662009242185e-06, "loss": 0.5239, "step": 18572 }, { "epoch": 0.5472062224710164, "grad_norm": 1.7578528722195481, "learning_rate": 5.048851986114297e-06, "loss": 0.4869, "step": 18573 }, { "epoch": 0.5472356849284798, "grad_norm": 1.3570691524403566, "learning_rate": 5.048337770787632e-06, "loss": 0.5191, "step": 18574 }, { "epoch": 0.5472651473859435, "grad_norm": 1.4808059933344848, "learning_rate": 5.047823554949666e-06, "loss": 0.5245, "step": 18575 }, { "epoch": 0.547294609843407, "grad_norm": 1.311323528993314, "learning_rate": 5.047309338605838e-06, "loss": 0.4272, "step": 18576 }, { "epoch": 0.5473240723008707, "grad_norm": 1.3789450502895988, "learning_rate": 5.046795121761585e-06, "loss": 0.341, "step": 18577 }, { "epoch": 0.5473535347583341, "grad_norm": 1.3762387597058876, "learning_rate": 5.046280904422347e-06, "loss": 0.3042, "step": 18578 }, { "epoch": 0.5473829972157978, "grad_norm": 1.586185613047575, "learning_rate": 5.045766686593565e-06, "loss": 0.4842, "step": 18579 }, { "epoch": 0.5474124596732614, "grad_norm": 1.6016815193638534, "learning_rate": 5.045252468280676e-06, "loss": 0.3836, "step": 18580 }, { "epoch": 0.547441922130725, "grad_norm": 1.4001569000746097, "learning_rate": 5.04473824948912e-06, "loss": 0.398, "step": 18581 }, { "epoch": 0.5474713845881884, "grad_norm": 1.7639798514998828, "learning_rate": 5.0442240302243384e-06, "loss": 0.5698, "step": 18582 }, { "epoch": 0.547500847045652, "grad_norm": 1.5491580216145833, "learning_rate": 5.043709810491767e-06, "loss": 0.4525, "step": 18583 }, { "epoch": 0.5475303095031157, "grad_norm": 1.5181129780659817, "learning_rate": 5.043195590296846e-06, "loss": 0.4883, "step": 18584 }, { "epoch": 0.5475597719605793, "grad_norm": 1.5479954273133747, "learning_rate": 5.042681369645016e-06, "loss": 0.5085, "step": 18585 }, { "epoch": 0.5475892344180429, "grad_norm": 1.44905314360216, "learning_rate": 5.042167148541715e-06, "loss": 0.3152, "step": 18586 }, { "epoch": 0.5476186968755063, "grad_norm": 1.701693497181951, "learning_rate": 5.041652926992383e-06, "loss": 0.7552, "step": 18587 }, { "epoch": 0.54764815933297, "grad_norm": 1.651778094350272, "learning_rate": 5.04113870500246e-06, "loss": 0.4736, "step": 18588 }, { "epoch": 0.5476776217904336, "grad_norm": 1.5563290360262483, "learning_rate": 5.0406244825773835e-06, "loss": 0.4491, "step": 18589 }, { "epoch": 0.5477070842478972, "grad_norm": 1.5764974576211057, "learning_rate": 5.040110259722592e-06, "loss": 0.5031, "step": 18590 }, { "epoch": 0.5477365467053606, "grad_norm": 1.327973889950806, "learning_rate": 5.039596036443529e-06, "loss": 0.4696, "step": 18591 }, { "epoch": 0.5477660091628243, "grad_norm": 1.3740340533919184, "learning_rate": 5.039081812745631e-06, "loss": 0.4052, "step": 18592 }, { "epoch": 0.5477954716202879, "grad_norm": 1.5628619259586831, "learning_rate": 5.0385675886343354e-06, "loss": 0.4984, "step": 18593 }, { "epoch": 0.5478249340777515, "grad_norm": 1.409297437419223, "learning_rate": 5.0380533641150854e-06, "loss": 0.4479, "step": 18594 }, { "epoch": 0.547854396535215, "grad_norm": 1.5439223206259631, "learning_rate": 5.037539139193317e-06, "loss": 0.4529, "step": 18595 }, { "epoch": 0.5478838589926786, "grad_norm": 1.7089224643639804, "learning_rate": 5.037024913874473e-06, "loss": 0.4909, "step": 18596 }, { "epoch": 0.5479133214501422, "grad_norm": 1.7982715799405713, "learning_rate": 5.036510688163989e-06, "loss": 0.3158, "step": 18597 }, { "epoch": 0.5479427839076058, "grad_norm": 1.4891682653997065, "learning_rate": 5.035996462067308e-06, "loss": 0.4809, "step": 18598 }, { "epoch": 0.5479722463650694, "grad_norm": 1.385574753735908, "learning_rate": 5.035482235589865e-06, "loss": 0.5298, "step": 18599 }, { "epoch": 0.5480017088225329, "grad_norm": 1.453918791067915, "learning_rate": 5.034968008737103e-06, "loss": 0.474, "step": 18600 }, { "epoch": 0.5480311712799965, "grad_norm": 1.4931185963010976, "learning_rate": 5.034453781514461e-06, "loss": 0.4542, "step": 18601 }, { "epoch": 0.5480606337374601, "grad_norm": 1.4757000876994346, "learning_rate": 5.033939553927375e-06, "loss": 0.4522, "step": 18602 }, { "epoch": 0.5480900961949237, "grad_norm": 1.513614901437587, "learning_rate": 5.03342532598129e-06, "loss": 0.4404, "step": 18603 }, { "epoch": 0.5481195586523872, "grad_norm": 1.4617570774495805, "learning_rate": 5.032911097681638e-06, "loss": 0.3508, "step": 18604 }, { "epoch": 0.5481490211098508, "grad_norm": 1.3160127529052854, "learning_rate": 5.032396869033864e-06, "loss": 0.3593, "step": 18605 }, { "epoch": 0.5481784835673144, "grad_norm": 1.752549583182849, "learning_rate": 5.031882640043408e-06, "loss": 0.417, "step": 18606 }, { "epoch": 0.548207946024778, "grad_norm": 1.38620118080685, "learning_rate": 5.031368410715706e-06, "loss": 0.267, "step": 18607 }, { "epoch": 0.5482374084822415, "grad_norm": 1.5509689278530614, "learning_rate": 5.030854181056197e-06, "loss": 0.365, "step": 18608 }, { "epoch": 0.548266870939705, "grad_norm": 1.4003684557757703, "learning_rate": 5.030339951070323e-06, "loss": 0.4048, "step": 18609 }, { "epoch": 0.5482963333971687, "grad_norm": 1.508181732576075, "learning_rate": 5.029825720763523e-06, "loss": 0.5159, "step": 18610 }, { "epoch": 0.5483257958546323, "grad_norm": 1.4557826890456396, "learning_rate": 5.0293114901412345e-06, "loss": 0.504, "step": 18611 }, { "epoch": 0.5483552583120959, "grad_norm": 1.5928288763918987, "learning_rate": 5.028797259208897e-06, "loss": 0.5065, "step": 18612 }, { "epoch": 0.5483847207695594, "grad_norm": 1.6120199857729476, "learning_rate": 5.028283027971953e-06, "loss": 0.4699, "step": 18613 }, { "epoch": 0.548414183227023, "grad_norm": 1.3581687870650274, "learning_rate": 5.027768796435839e-06, "loss": 0.3396, "step": 18614 }, { "epoch": 0.5484436456844866, "grad_norm": 1.5435154459416116, "learning_rate": 5.027254564605995e-06, "loss": 0.5797, "step": 18615 }, { "epoch": 0.5484731081419502, "grad_norm": 1.577109545644569, "learning_rate": 5.026740332487862e-06, "loss": 0.4615, "step": 18616 }, { "epoch": 0.5485025705994137, "grad_norm": 1.6587407004950112, "learning_rate": 5.026226100086875e-06, "loss": 0.578, "step": 18617 }, { "epoch": 0.5485320330568773, "grad_norm": 1.657363065351716, "learning_rate": 5.025711867408477e-06, "loss": 0.3387, "step": 18618 }, { "epoch": 0.5485614955143409, "grad_norm": 1.4692785675606541, "learning_rate": 5.025197634458109e-06, "loss": 0.2943, "step": 18619 }, { "epoch": 0.5485909579718045, "grad_norm": 1.4529449225349436, "learning_rate": 5.024683401241206e-06, "loss": 0.36, "step": 18620 }, { "epoch": 0.548620420429268, "grad_norm": 1.6445348179915154, "learning_rate": 5.0241691677632095e-06, "loss": 0.4333, "step": 18621 }, { "epoch": 0.5486498828867316, "grad_norm": 1.6947930520418044, "learning_rate": 5.023654934029561e-06, "loss": 0.5699, "step": 18622 }, { "epoch": 0.5486793453441952, "grad_norm": 1.4881220180756574, "learning_rate": 5.023140700045696e-06, "loss": 0.3823, "step": 18623 }, { "epoch": 0.5487088078016588, "grad_norm": 1.5118806605605843, "learning_rate": 5.022626465817056e-06, "loss": 0.6012, "step": 18624 }, { "epoch": 0.5487382702591224, "grad_norm": 1.6411426259942705, "learning_rate": 5.02211223134908e-06, "loss": 0.5425, "step": 18625 }, { "epoch": 0.5487677327165859, "grad_norm": 1.3891261029588673, "learning_rate": 5.021597996647208e-06, "loss": 0.4635, "step": 18626 }, { "epoch": 0.5487971951740495, "grad_norm": 1.7751403387098634, "learning_rate": 5.021083761716879e-06, "loss": 0.555, "step": 18627 }, { "epoch": 0.5488266576315131, "grad_norm": 1.4468285279310917, "learning_rate": 5.0205695265635315e-06, "loss": 0.457, "step": 18628 }, { "epoch": 0.5488561200889767, "grad_norm": 1.5009312953589924, "learning_rate": 5.020055291192606e-06, "loss": 0.426, "step": 18629 }, { "epoch": 0.5488855825464402, "grad_norm": 1.3964089145411414, "learning_rate": 5.019541055609542e-06, "loss": 0.3509, "step": 18630 }, { "epoch": 0.5489150450039038, "grad_norm": 1.4130096200914635, "learning_rate": 5.01902681981978e-06, "loss": 0.422, "step": 18631 }, { "epoch": 0.5489445074613674, "grad_norm": 1.5985778751753936, "learning_rate": 5.018512583828756e-06, "loss": 0.5078, "step": 18632 }, { "epoch": 0.548973969918831, "grad_norm": 1.3716043294259082, "learning_rate": 5.017998347641912e-06, "loss": 0.4335, "step": 18633 }, { "epoch": 0.5490034323762945, "grad_norm": 1.4662989927029013, "learning_rate": 5.017484111264688e-06, "loss": 0.516, "step": 18634 }, { "epoch": 0.5490328948337581, "grad_norm": 1.491822688184597, "learning_rate": 5.016969874702522e-06, "loss": 0.4991, "step": 18635 }, { "epoch": 0.5490623572912217, "grad_norm": 1.4770648913900704, "learning_rate": 5.016455637960854e-06, "loss": 0.3868, "step": 18636 }, { "epoch": 0.5490918197486853, "grad_norm": 1.3882200536689948, "learning_rate": 5.015941401045123e-06, "loss": 0.5016, "step": 18637 }, { "epoch": 0.5491212822061489, "grad_norm": 1.4817992374790425, "learning_rate": 5.015427163960769e-06, "loss": 0.5045, "step": 18638 }, { "epoch": 0.5491507446636124, "grad_norm": 1.5002503779807357, "learning_rate": 5.014912926713229e-06, "loss": 0.4782, "step": 18639 }, { "epoch": 0.549180207121076, "grad_norm": 1.5755028341656985, "learning_rate": 5.014398689307948e-06, "loss": 0.5165, "step": 18640 }, { "epoch": 0.5492096695785396, "grad_norm": 1.693699731630419, "learning_rate": 5.01388445175036e-06, "loss": 0.444, "step": 18641 }, { "epoch": 0.5492391320360032, "grad_norm": 1.5615677014941707, "learning_rate": 5.013370214045907e-06, "loss": 0.3484, "step": 18642 }, { "epoch": 0.5492685944934667, "grad_norm": 1.5904860012308202, "learning_rate": 5.012855976200029e-06, "loss": 0.2889, "step": 18643 }, { "epoch": 0.5492980569509303, "grad_norm": 1.4976167854372928, "learning_rate": 5.012341738218162e-06, "loss": 0.4868, "step": 18644 }, { "epoch": 0.5493275194083939, "grad_norm": 1.3083551041676593, "learning_rate": 5.011827500105748e-06, "loss": 0.3737, "step": 18645 }, { "epoch": 0.5493569818658575, "grad_norm": 1.416520467153291, "learning_rate": 5.01131326186823e-06, "loss": 0.5335, "step": 18646 }, { "epoch": 0.549386444323321, "grad_norm": 1.4402241864116845, "learning_rate": 5.01079902351104e-06, "loss": 0.536, "step": 18647 }, { "epoch": 0.5494159067807846, "grad_norm": 1.4961314070752316, "learning_rate": 5.010284785039622e-06, "loss": 0.4957, "step": 18648 }, { "epoch": 0.5494453692382482, "grad_norm": 1.4214967856487237, "learning_rate": 5.009770546459416e-06, "loss": 0.4471, "step": 18649 }, { "epoch": 0.5494748316957118, "grad_norm": 1.4437920103276942, "learning_rate": 5.009256307775858e-06, "loss": 0.3482, "step": 18650 }, { "epoch": 0.5495042941531754, "grad_norm": 1.459175181431063, "learning_rate": 5.008742068994392e-06, "loss": 0.3934, "step": 18651 }, { "epoch": 0.5495337566106389, "grad_norm": 1.4677311081777464, "learning_rate": 5.008227830120454e-06, "loss": 0.5231, "step": 18652 }, { "epoch": 0.5495632190681025, "grad_norm": 1.6836471156374058, "learning_rate": 5.007713591159485e-06, "loss": 0.4904, "step": 18653 }, { "epoch": 0.5495926815255661, "grad_norm": 1.5749109050470056, "learning_rate": 5.007199352116923e-06, "loss": 0.3751, "step": 18654 }, { "epoch": 0.5496221439830297, "grad_norm": 1.4937000533150486, "learning_rate": 5.006685112998209e-06, "loss": 0.3269, "step": 18655 }, { "epoch": 0.5496516064404932, "grad_norm": 1.4081133288813346, "learning_rate": 5.006170873808782e-06, "loss": 0.3853, "step": 18656 }, { "epoch": 0.5496810688979568, "grad_norm": 1.5851994028155927, "learning_rate": 5.00565663455408e-06, "loss": 0.5122, "step": 18657 }, { "epoch": 0.5497105313554204, "grad_norm": 1.5848531972193038, "learning_rate": 5.005142395239546e-06, "loss": 0.4973, "step": 18658 }, { "epoch": 0.549739993812884, "grad_norm": 1.6267278606426272, "learning_rate": 5.004628155870615e-06, "loss": 0.5791, "step": 18659 }, { "epoch": 0.5497694562703475, "grad_norm": 1.4275428560095467, "learning_rate": 5.004113916452731e-06, "loss": 0.4051, "step": 18660 }, { "epoch": 0.5497989187278111, "grad_norm": 1.3678089511713596, "learning_rate": 5.003599676991332e-06, "loss": 0.4219, "step": 18661 }, { "epoch": 0.5498283811852747, "grad_norm": 1.7826963009296355, "learning_rate": 5.003085437491853e-06, "loss": 0.5322, "step": 18662 }, { "epoch": 0.5498578436427383, "grad_norm": 1.5001128184110717, "learning_rate": 5.002571197959741e-06, "loss": 0.4344, "step": 18663 }, { "epoch": 0.5498873061002019, "grad_norm": 1.5954549081327711, "learning_rate": 5.00205695840043e-06, "loss": 0.2907, "step": 18664 }, { "epoch": 0.5499167685576654, "grad_norm": 1.3631513310774488, "learning_rate": 5.00154271881936e-06, "loss": 0.2873, "step": 18665 }, { "epoch": 0.549946231015129, "grad_norm": 1.709624450873089, "learning_rate": 5.001028479221973e-06, "loss": 0.4784, "step": 18666 }, { "epoch": 0.5499756934725926, "grad_norm": 1.356894309106447, "learning_rate": 5.000514239613707e-06, "loss": 0.309, "step": 18667 }, { "epoch": 0.5500051559300562, "grad_norm": 1.8831196194016382, "learning_rate": 5e-06, "loss": 0.5533, "step": 18668 }, { "epoch": 0.5500346183875197, "grad_norm": 1.5547776564696174, "learning_rate": 4.999485760386295e-06, "loss": 0.5037, "step": 18669 }, { "epoch": 0.5500640808449833, "grad_norm": 1.5849629004421706, "learning_rate": 4.9989715207780285e-06, "loss": 0.3696, "step": 18670 }, { "epoch": 0.5500935433024469, "grad_norm": 1.6342933133285118, "learning_rate": 4.998457281180641e-06, "loss": 0.4895, "step": 18671 }, { "epoch": 0.5501230057599105, "grad_norm": 1.5201945962320527, "learning_rate": 4.997943041599571e-06, "loss": 0.3556, "step": 18672 }, { "epoch": 0.550152468217374, "grad_norm": 1.5271719365621863, "learning_rate": 4.997428802040262e-06, "loss": 0.4145, "step": 18673 }, { "epoch": 0.5501819306748376, "grad_norm": 1.6113527949290352, "learning_rate": 4.996914562508148e-06, "loss": 0.4148, "step": 18674 }, { "epoch": 0.5502113931323012, "grad_norm": 1.366976370694995, "learning_rate": 4.996400323008671e-06, "loss": 0.4404, "step": 18675 }, { "epoch": 0.5502408555897648, "grad_norm": 1.6134598957349615, "learning_rate": 4.995886083547271e-06, "loss": 0.6098, "step": 18676 }, { "epoch": 0.5502703180472284, "grad_norm": 1.7417916462235064, "learning_rate": 4.9953718441293855e-06, "loss": 0.4381, "step": 18677 }, { "epoch": 0.5502997805046919, "grad_norm": 1.6594623828989667, "learning_rate": 4.994857604760455e-06, "loss": 0.3662, "step": 18678 }, { "epoch": 0.5503292429621555, "grad_norm": 1.4246720470720753, "learning_rate": 4.994343365445921e-06, "loss": 0.4686, "step": 18679 }, { "epoch": 0.5503587054196191, "grad_norm": 1.4265460595536088, "learning_rate": 4.993829126191219e-06, "loss": 0.3328, "step": 18680 }, { "epoch": 0.5503881678770827, "grad_norm": 1.5923656840650224, "learning_rate": 4.993314887001792e-06, "loss": 0.6476, "step": 18681 }, { "epoch": 0.5504176303345462, "grad_norm": 1.5695234752922633, "learning_rate": 4.992800647883078e-06, "loss": 0.437, "step": 18682 }, { "epoch": 0.5504470927920098, "grad_norm": 1.4618135101032592, "learning_rate": 4.992286408840518e-06, "loss": 0.4576, "step": 18683 }, { "epoch": 0.5504765552494734, "grad_norm": 1.5855365807661652, "learning_rate": 4.9917721698795475e-06, "loss": 0.3397, "step": 18684 }, { "epoch": 0.550506017706937, "grad_norm": 1.4531808019381374, "learning_rate": 4.991257931005609e-06, "loss": 0.3825, "step": 18685 }, { "epoch": 0.5505354801644005, "grad_norm": 1.5115592647304081, "learning_rate": 4.990743692224143e-06, "loss": 0.4651, "step": 18686 }, { "epoch": 0.5505649426218641, "grad_norm": 1.5406342099354693, "learning_rate": 4.990229453540585e-06, "loss": 0.4702, "step": 18687 }, { "epoch": 0.5505944050793277, "grad_norm": 1.7141311636050949, "learning_rate": 4.989715214960379e-06, "loss": 0.5077, "step": 18688 }, { "epoch": 0.5506238675367913, "grad_norm": 1.674471250031671, "learning_rate": 4.989200976488961e-06, "loss": 0.47, "step": 18689 }, { "epoch": 0.5506533299942549, "grad_norm": 1.5322227957515353, "learning_rate": 4.988686738131771e-06, "loss": 0.4973, "step": 18690 }, { "epoch": 0.5506827924517184, "grad_norm": 1.7779895856216341, "learning_rate": 4.988172499894252e-06, "loss": 0.5471, "step": 18691 }, { "epoch": 0.550712254909182, "grad_norm": 1.3220436189320937, "learning_rate": 4.987658261781838e-06, "loss": 0.4281, "step": 18692 }, { "epoch": 0.5507417173666456, "grad_norm": 1.3183903187492672, "learning_rate": 4.987144023799974e-06, "loss": 0.342, "step": 18693 }, { "epoch": 0.5507711798241092, "grad_norm": 1.3967846602920277, "learning_rate": 4.986629785954094e-06, "loss": 0.3617, "step": 18694 }, { "epoch": 0.5508006422815727, "grad_norm": 1.6075541428782658, "learning_rate": 4.986115548249642e-06, "loss": 0.4334, "step": 18695 }, { "epoch": 0.5508301047390363, "grad_norm": 1.4885927313964227, "learning_rate": 4.985601310692054e-06, "loss": 0.4572, "step": 18696 }, { "epoch": 0.5508595671964999, "grad_norm": 1.2098086967132013, "learning_rate": 4.985087073286771e-06, "loss": 0.2913, "step": 18697 }, { "epoch": 0.5508890296539635, "grad_norm": 1.4499782449395067, "learning_rate": 4.9845728360392335e-06, "loss": 0.3906, "step": 18698 }, { "epoch": 0.550918492111427, "grad_norm": 1.5087385537843416, "learning_rate": 4.984058598954878e-06, "loss": 0.2874, "step": 18699 }, { "epoch": 0.5509479545688906, "grad_norm": 1.3926964243104047, "learning_rate": 4.983544362039148e-06, "loss": 0.4034, "step": 18700 }, { "epoch": 0.5509774170263542, "grad_norm": 1.733793767398729, "learning_rate": 4.983030125297479e-06, "loss": 0.3364, "step": 18701 }, { "epoch": 0.5510068794838178, "grad_norm": 1.2891596661820894, "learning_rate": 4.982515888735312e-06, "loss": 0.3518, "step": 18702 }, { "epoch": 0.5510363419412814, "grad_norm": 1.5512938013579307, "learning_rate": 4.982001652358089e-06, "loss": 0.4549, "step": 18703 }, { "epoch": 0.5510658043987449, "grad_norm": 1.2976185876550477, "learning_rate": 4.981487416171246e-06, "loss": 0.3824, "step": 18704 }, { "epoch": 0.5510952668562085, "grad_norm": 1.5473392218789828, "learning_rate": 4.980973180180222e-06, "loss": 0.4505, "step": 18705 }, { "epoch": 0.5511247293136721, "grad_norm": 1.6583699581843436, "learning_rate": 4.980458944390459e-06, "loss": 0.5136, "step": 18706 }, { "epoch": 0.5511541917711357, "grad_norm": 1.6790431138367654, "learning_rate": 4.9799447088073955e-06, "loss": 0.3577, "step": 18707 }, { "epoch": 0.5511836542285992, "grad_norm": 1.6404894404960237, "learning_rate": 4.979430473436469e-06, "loss": 0.5165, "step": 18708 }, { "epoch": 0.5512131166860628, "grad_norm": 1.5880050510483459, "learning_rate": 4.978916238283124e-06, "loss": 0.3491, "step": 18709 }, { "epoch": 0.5512425791435264, "grad_norm": 1.3946125197395522, "learning_rate": 4.978402003352794e-06, "loss": 0.4832, "step": 18710 }, { "epoch": 0.55127204160099, "grad_norm": 1.4949688487318729, "learning_rate": 4.977887768650921e-06, "loss": 0.4012, "step": 18711 }, { "epoch": 0.5513015040584535, "grad_norm": 1.5692337184631215, "learning_rate": 4.9773735341829455e-06, "loss": 0.4503, "step": 18712 }, { "epoch": 0.5513309665159171, "grad_norm": 1.347221811356166, "learning_rate": 4.976859299954307e-06, "loss": 0.4462, "step": 18713 }, { "epoch": 0.5513604289733807, "grad_norm": 1.8598920263438827, "learning_rate": 4.976345065970442e-06, "loss": 0.473, "step": 18714 }, { "epoch": 0.5513898914308443, "grad_norm": 1.3572340826411113, "learning_rate": 4.975830832236791e-06, "loss": 0.3855, "step": 18715 }, { "epoch": 0.5514193538883079, "grad_norm": 1.3693003031114535, "learning_rate": 4.975316598758796e-06, "loss": 0.3916, "step": 18716 }, { "epoch": 0.5514488163457714, "grad_norm": 1.6630518721084497, "learning_rate": 4.974802365541893e-06, "loss": 0.5373, "step": 18717 }, { "epoch": 0.551478278803235, "grad_norm": 1.591211559810906, "learning_rate": 4.9742881325915234e-06, "loss": 0.5002, "step": 18718 }, { "epoch": 0.5515077412606986, "grad_norm": 1.618960747915554, "learning_rate": 4.973773899913126e-06, "loss": 0.3517, "step": 18719 }, { "epoch": 0.5515372037181622, "grad_norm": 1.3932825235463608, "learning_rate": 4.97325966751214e-06, "loss": 0.4752, "step": 18720 }, { "epoch": 0.5515666661756257, "grad_norm": 1.7360202520832848, "learning_rate": 4.9727454353940054e-06, "loss": 0.614, "step": 18721 }, { "epoch": 0.5515961286330893, "grad_norm": 1.7240953614571366, "learning_rate": 4.9722312035641614e-06, "loss": 0.5914, "step": 18722 }, { "epoch": 0.5516255910905529, "grad_norm": 1.5087513678095572, "learning_rate": 4.971716972028049e-06, "loss": 0.4486, "step": 18723 }, { "epoch": 0.5516550535480165, "grad_norm": 1.59074505945281, "learning_rate": 4.9712027407911035e-06, "loss": 0.5982, "step": 18724 }, { "epoch": 0.55168451600548, "grad_norm": 1.4903706154593361, "learning_rate": 4.970688509858768e-06, "loss": 0.5071, "step": 18725 }, { "epoch": 0.5517139784629436, "grad_norm": 1.5563580524286316, "learning_rate": 4.97017427923648e-06, "loss": 0.2813, "step": 18726 }, { "epoch": 0.5517434409204072, "grad_norm": 1.477222363947175, "learning_rate": 4.969660048929678e-06, "loss": 0.3701, "step": 18727 }, { "epoch": 0.5517729033778708, "grad_norm": 1.499000330906399, "learning_rate": 4.969145818943804e-06, "loss": 0.4926, "step": 18728 }, { "epoch": 0.5518023658353344, "grad_norm": 1.5887498581111048, "learning_rate": 4.968631589284296e-06, "loss": 0.348, "step": 18729 }, { "epoch": 0.5518318282927979, "grad_norm": 1.4568323747047385, "learning_rate": 4.968117359956593e-06, "loss": 0.3666, "step": 18730 }, { "epoch": 0.5518612907502615, "grad_norm": 1.3729081048679266, "learning_rate": 4.967603130966136e-06, "loss": 0.4564, "step": 18731 }, { "epoch": 0.5518907532077251, "grad_norm": 1.6577183103182649, "learning_rate": 4.967088902318362e-06, "loss": 0.317, "step": 18732 }, { "epoch": 0.5519202156651887, "grad_norm": 1.596759043716277, "learning_rate": 4.966574674018713e-06, "loss": 0.339, "step": 18733 }, { "epoch": 0.5519496781226522, "grad_norm": 1.434848056233425, "learning_rate": 4.966060446072627e-06, "loss": 0.432, "step": 18734 }, { "epoch": 0.5519791405801158, "grad_norm": 1.5076522740125688, "learning_rate": 4.965546218485542e-06, "loss": 0.473, "step": 18735 }, { "epoch": 0.5520086030375794, "grad_norm": 1.5525524734948115, "learning_rate": 4.965031991262898e-06, "loss": 0.5568, "step": 18736 }, { "epoch": 0.552038065495043, "grad_norm": 1.4941186143934722, "learning_rate": 4.964517764410137e-06, "loss": 0.3584, "step": 18737 }, { "epoch": 0.5520675279525065, "grad_norm": 1.5389615705240778, "learning_rate": 4.964003537932694e-06, "loss": 0.53, "step": 18738 }, { "epoch": 0.5520969904099701, "grad_norm": 1.3892699541501168, "learning_rate": 4.963489311836012e-06, "loss": 0.3941, "step": 18739 }, { "epoch": 0.5521264528674337, "grad_norm": 1.4861459859554518, "learning_rate": 4.962975086125529e-06, "loss": 0.4253, "step": 18740 }, { "epoch": 0.5521559153248973, "grad_norm": 1.3209168011036616, "learning_rate": 4.962460860806682e-06, "loss": 0.3204, "step": 18741 }, { "epoch": 0.5521853777823609, "grad_norm": 1.4790760198693278, "learning_rate": 4.961946635884916e-06, "loss": 0.5015, "step": 18742 }, { "epoch": 0.5522148402398244, "grad_norm": 1.7681725009816074, "learning_rate": 4.961432411365667e-06, "loss": 0.5531, "step": 18743 }, { "epoch": 0.552244302697288, "grad_norm": 1.4486986283208294, "learning_rate": 4.960918187254372e-06, "loss": 0.4666, "step": 18744 }, { "epoch": 0.5522737651547516, "grad_norm": 1.7474853860960762, "learning_rate": 4.960403963556473e-06, "loss": 0.6084, "step": 18745 }, { "epoch": 0.5523032276122152, "grad_norm": 1.5364918354171833, "learning_rate": 4.9598897402774086e-06, "loss": 0.3465, "step": 18746 }, { "epoch": 0.5523326900696787, "grad_norm": 1.8527434757714463, "learning_rate": 4.959375517422618e-06, "loss": 0.6361, "step": 18747 }, { "epoch": 0.5523621525271423, "grad_norm": 1.6822298955483226, "learning_rate": 4.958861294997541e-06, "loss": 0.3633, "step": 18748 }, { "epoch": 0.5523916149846059, "grad_norm": 1.393506140839683, "learning_rate": 4.958347073007618e-06, "loss": 0.405, "step": 18749 }, { "epoch": 0.5524210774420695, "grad_norm": 1.3517249202973791, "learning_rate": 4.9578328514582855e-06, "loss": 0.4241, "step": 18750 }, { "epoch": 0.552450539899533, "grad_norm": 1.5833217319761148, "learning_rate": 4.9573186303549856e-06, "loss": 0.3477, "step": 18751 }, { "epoch": 0.5524800023569966, "grad_norm": 1.541607161426711, "learning_rate": 4.956804409703154e-06, "loss": 0.4035, "step": 18752 }, { "epoch": 0.5525094648144602, "grad_norm": 1.6195689196273901, "learning_rate": 4.956290189508236e-06, "loss": 0.4571, "step": 18753 }, { "epoch": 0.5525389272719238, "grad_norm": 1.4302486292594732, "learning_rate": 4.955775969775664e-06, "loss": 0.4308, "step": 18754 }, { "epoch": 0.5525683897293874, "grad_norm": 1.6021522052210395, "learning_rate": 4.955261750510881e-06, "loss": 0.3491, "step": 18755 }, { "epoch": 0.5525978521868509, "grad_norm": 1.5362286856891036, "learning_rate": 4.954747531719326e-06, "loss": 0.4169, "step": 18756 }, { "epoch": 0.5526273146443145, "grad_norm": 1.4782159669795827, "learning_rate": 4.954233313406436e-06, "loss": 0.3619, "step": 18757 }, { "epoch": 0.5526567771017781, "grad_norm": 1.3840608315707268, "learning_rate": 4.9537190955776545e-06, "loss": 0.4167, "step": 18758 }, { "epoch": 0.5526862395592417, "grad_norm": 1.366523003891433, "learning_rate": 4.9532048782384165e-06, "loss": 0.4703, "step": 18759 }, { "epoch": 0.5527157020167052, "grad_norm": 1.6728147424397732, "learning_rate": 4.9526906613941625e-06, "loss": 0.549, "step": 18760 }, { "epoch": 0.5527451644741688, "grad_norm": 1.8449323547853682, "learning_rate": 4.952176445050335e-06, "loss": 0.5563, "step": 18761 }, { "epoch": 0.5527746269316324, "grad_norm": 1.6426789819320706, "learning_rate": 4.951662229212368e-06, "loss": 0.4258, "step": 18762 }, { "epoch": 0.552804089389096, "grad_norm": 1.8658543605952496, "learning_rate": 4.951148013885706e-06, "loss": 0.7157, "step": 18763 }, { "epoch": 0.5528335518465595, "grad_norm": 1.407020008776252, "learning_rate": 4.950633799075784e-06, "loss": 0.4364, "step": 18764 }, { "epoch": 0.5528630143040231, "grad_norm": 1.4260033451557959, "learning_rate": 4.9501195847880415e-06, "loss": 0.4226, "step": 18765 }, { "epoch": 0.5528924767614867, "grad_norm": 1.6975423597675623, "learning_rate": 4.94960537102792e-06, "loss": 0.4954, "step": 18766 }, { "epoch": 0.5529219392189503, "grad_norm": 1.4223981877788126, "learning_rate": 4.9490911578008576e-06, "loss": 0.4272, "step": 18767 }, { "epoch": 0.5529514016764139, "grad_norm": 1.3154202376082658, "learning_rate": 4.948576945112292e-06, "loss": 0.35, "step": 18768 }, { "epoch": 0.5529808641338774, "grad_norm": 1.3730665988554658, "learning_rate": 4.948062732967664e-06, "loss": 0.4333, "step": 18769 }, { "epoch": 0.553010326591341, "grad_norm": 1.4312666446951297, "learning_rate": 4.947548521372414e-06, "loss": 0.531, "step": 18770 }, { "epoch": 0.5530397890488046, "grad_norm": 1.256122841761271, "learning_rate": 4.947034310331979e-06, "loss": 0.3458, "step": 18771 }, { "epoch": 0.5530692515062682, "grad_norm": 1.3969775477851973, "learning_rate": 4.946520099851799e-06, "loss": 0.4125, "step": 18772 }, { "epoch": 0.5530987139637317, "grad_norm": 1.5095143352932223, "learning_rate": 4.946005889937313e-06, "loss": 0.5065, "step": 18773 }, { "epoch": 0.5531281764211953, "grad_norm": 1.6805144538742627, "learning_rate": 4.945491680593962e-06, "loss": 0.4438, "step": 18774 }, { "epoch": 0.5531576388786589, "grad_norm": 1.3828923543689777, "learning_rate": 4.944977471827181e-06, "loss": 0.4713, "step": 18775 }, { "epoch": 0.5531871013361225, "grad_norm": 1.459706201597595, "learning_rate": 4.944463263642413e-06, "loss": 0.5281, "step": 18776 }, { "epoch": 0.553216563793586, "grad_norm": 1.4340661008173605, "learning_rate": 4.943949056045095e-06, "loss": 0.3473, "step": 18777 }, { "epoch": 0.5532460262510496, "grad_norm": 1.4766616461323343, "learning_rate": 4.943434849040666e-06, "loss": 0.575, "step": 18778 }, { "epoch": 0.5532754887085132, "grad_norm": 1.393550412143714, "learning_rate": 4.9429206426345664e-06, "loss": 0.4805, "step": 18779 }, { "epoch": 0.5533049511659768, "grad_norm": 1.7352598286966212, "learning_rate": 4.942406436832235e-06, "loss": 0.4412, "step": 18780 }, { "epoch": 0.5533344136234404, "grad_norm": 1.5298480753498036, "learning_rate": 4.94189223163911e-06, "loss": 0.4855, "step": 18781 }, { "epoch": 0.5533638760809039, "grad_norm": 1.4640975724506515, "learning_rate": 4.941378027060633e-06, "loss": 0.4172, "step": 18782 }, { "epoch": 0.5533933385383675, "grad_norm": 1.3599313578141858, "learning_rate": 4.940863823102241e-06, "loss": 0.2889, "step": 18783 }, { "epoch": 0.5534228009958311, "grad_norm": 1.4942787969261877, "learning_rate": 4.940349619769373e-06, "loss": 0.5515, "step": 18784 }, { "epoch": 0.5534522634532947, "grad_norm": 1.3879943101900978, "learning_rate": 4.939835417067469e-06, "loss": 0.3633, "step": 18785 }, { "epoch": 0.5534817259107582, "grad_norm": 1.5910426450305386, "learning_rate": 4.939321215001967e-06, "loss": 0.3547, "step": 18786 }, { "epoch": 0.5535111883682218, "grad_norm": 1.4495461517364825, "learning_rate": 4.938807013578306e-06, "loss": 0.3954, "step": 18787 }, { "epoch": 0.5535406508256854, "grad_norm": 1.3960716260031731, "learning_rate": 4.938292812801927e-06, "loss": 0.2977, "step": 18788 }, { "epoch": 0.553570113283149, "grad_norm": 1.74975557670254, "learning_rate": 4.937778612678267e-06, "loss": 0.5027, "step": 18789 }, { "epoch": 0.5535995757406125, "grad_norm": 1.4087296415011732, "learning_rate": 4.937264413212766e-06, "loss": 0.4188, "step": 18790 }, { "epoch": 0.5536290381980761, "grad_norm": 1.374758846474654, "learning_rate": 4.936750214410863e-06, "loss": 0.3426, "step": 18791 }, { "epoch": 0.5536585006555397, "grad_norm": 1.4983004811078435, "learning_rate": 4.936236016277997e-06, "loss": 0.5046, "step": 18792 }, { "epoch": 0.5536879631130033, "grad_norm": 1.4948810453149197, "learning_rate": 4.935721818819609e-06, "loss": 0.4645, "step": 18793 }, { "epoch": 0.5537174255704669, "grad_norm": 1.7555601627791544, "learning_rate": 4.935207622041133e-06, "loss": 0.5811, "step": 18794 }, { "epoch": 0.5537468880279304, "grad_norm": 1.7230532509257241, "learning_rate": 4.934693425948013e-06, "loss": 0.3599, "step": 18795 }, { "epoch": 0.553776350485394, "grad_norm": 1.5437119144391251, "learning_rate": 4.934179230545684e-06, "loss": 0.4083, "step": 18796 }, { "epoch": 0.5538058129428576, "grad_norm": 1.4607107670474175, "learning_rate": 4.933665035839589e-06, "loss": 0.3672, "step": 18797 }, { "epoch": 0.5538352754003212, "grad_norm": 1.6136056986128344, "learning_rate": 4.933150841835164e-06, "loss": 0.4892, "step": 18798 }, { "epoch": 0.5538647378577847, "grad_norm": 1.4367124650534604, "learning_rate": 4.932636648537849e-06, "loss": 0.3198, "step": 18799 }, { "epoch": 0.5538942003152483, "grad_norm": 1.4893011435152672, "learning_rate": 4.932122455953084e-06, "loss": 0.4783, "step": 18800 }, { "epoch": 0.5539236627727119, "grad_norm": 1.5214165462298215, "learning_rate": 4.931608264086306e-06, "loss": 0.4696, "step": 18801 }, { "epoch": 0.5539531252301755, "grad_norm": 1.531138006026788, "learning_rate": 4.931094072942954e-06, "loss": 0.5083, "step": 18802 }, { "epoch": 0.553982587687639, "grad_norm": 1.7958111773747871, "learning_rate": 4.930579882528471e-06, "loss": 0.5285, "step": 18803 }, { "epoch": 0.5540120501451026, "grad_norm": 1.5767526810224426, "learning_rate": 4.930065692848292e-06, "loss": 0.614, "step": 18804 }, { "epoch": 0.5540415126025662, "grad_norm": 1.529303463827351, "learning_rate": 4.929551503907855e-06, "loss": 0.5011, "step": 18805 }, { "epoch": 0.5540709750600298, "grad_norm": 1.5505009028148802, "learning_rate": 4.929037315712602e-06, "loss": 0.4574, "step": 18806 }, { "epoch": 0.5541004375174934, "grad_norm": 1.3634271882324729, "learning_rate": 4.928523128267969e-06, "loss": 0.4152, "step": 18807 }, { "epoch": 0.5541298999749569, "grad_norm": 1.4832785948636324, "learning_rate": 4.928008941579398e-06, "loss": 0.4219, "step": 18808 }, { "epoch": 0.5541593624324205, "grad_norm": 1.4155221762786825, "learning_rate": 4.927494755652326e-06, "loss": 0.4113, "step": 18809 }, { "epoch": 0.5541888248898841, "grad_norm": 1.5450159227050198, "learning_rate": 4.926980570492192e-06, "loss": 0.4348, "step": 18810 }, { "epoch": 0.5542182873473477, "grad_norm": 1.564372307147052, "learning_rate": 4.926466386104436e-06, "loss": 0.525, "step": 18811 }, { "epoch": 0.5542477498048112, "grad_norm": 1.4975653877309485, "learning_rate": 4.925952202494496e-06, "loss": 0.3301, "step": 18812 }, { "epoch": 0.5542772122622748, "grad_norm": 1.38650751571336, "learning_rate": 4.925438019667813e-06, "loss": 0.3777, "step": 18813 }, { "epoch": 0.5543066747197384, "grad_norm": 1.329520874672051, "learning_rate": 4.924923837629821e-06, "loss": 0.4245, "step": 18814 }, { "epoch": 0.554336137177202, "grad_norm": 1.4805276285811662, "learning_rate": 4.9244096563859625e-06, "loss": 0.4131, "step": 18815 }, { "epoch": 0.5543655996346655, "grad_norm": 1.7700207649546373, "learning_rate": 4.923895475941676e-06, "loss": 0.4958, "step": 18816 }, { "epoch": 0.5543950620921291, "grad_norm": 1.426394021673471, "learning_rate": 4.923381296302399e-06, "loss": 0.329, "step": 18817 }, { "epoch": 0.5544245245495927, "grad_norm": 1.441257263971232, "learning_rate": 4.922867117473573e-06, "loss": 0.4064, "step": 18818 }, { "epoch": 0.5544539870070563, "grad_norm": 1.540491559497092, "learning_rate": 4.922352939460634e-06, "loss": 0.5059, "step": 18819 }, { "epoch": 0.5544834494645199, "grad_norm": 1.4856511332088684, "learning_rate": 4.921838762269021e-06, "loss": 0.4566, "step": 18820 }, { "epoch": 0.5545129119219834, "grad_norm": 1.404131431055065, "learning_rate": 4.921324585904175e-06, "loss": 0.3682, "step": 18821 }, { "epoch": 0.554542374379447, "grad_norm": 1.4518407505377853, "learning_rate": 4.920810410371533e-06, "loss": 0.4386, "step": 18822 }, { "epoch": 0.5545718368369106, "grad_norm": 1.662765056441794, "learning_rate": 4.920296235676536e-06, "loss": 0.521, "step": 18823 }, { "epoch": 0.5546012992943742, "grad_norm": 1.456500414566175, "learning_rate": 4.91978206182462e-06, "loss": 0.5144, "step": 18824 }, { "epoch": 0.5546307617518377, "grad_norm": 1.4273167823324573, "learning_rate": 4.919267888821226e-06, "loss": 0.4038, "step": 18825 }, { "epoch": 0.5546602242093013, "grad_norm": 1.511597563418451, "learning_rate": 4.918753716671789e-06, "loss": 0.36, "step": 18826 }, { "epoch": 0.5546896866667649, "grad_norm": 1.5898974554651344, "learning_rate": 4.918239545381753e-06, "loss": 0.6154, "step": 18827 }, { "epoch": 0.5547191491242285, "grad_norm": 1.513819718227102, "learning_rate": 4.917725374956552e-06, "loss": 0.4069, "step": 18828 }, { "epoch": 0.554748611581692, "grad_norm": 1.631220244318161, "learning_rate": 4.917211205401628e-06, "loss": 0.4902, "step": 18829 }, { "epoch": 0.5547780740391556, "grad_norm": 1.427789188458381, "learning_rate": 4.916697036722419e-06, "loss": 0.3968, "step": 18830 }, { "epoch": 0.5548075364966192, "grad_norm": 1.332620940029656, "learning_rate": 4.9161828689243625e-06, "loss": 0.4152, "step": 18831 }, { "epoch": 0.5548369989540828, "grad_norm": 1.5314675890019067, "learning_rate": 4.915668702012898e-06, "loss": 0.5293, "step": 18832 }, { "epoch": 0.5548664614115464, "grad_norm": 1.4067000406148362, "learning_rate": 4.915154535993466e-06, "loss": 0.4519, "step": 18833 }, { "epoch": 0.5548959238690099, "grad_norm": 1.5278538014511065, "learning_rate": 4.914640370871503e-06, "loss": 0.4282, "step": 18834 }, { "epoch": 0.5549253863264735, "grad_norm": 1.3887186475679698, "learning_rate": 4.914126206652447e-06, "loss": 0.47, "step": 18835 }, { "epoch": 0.5549548487839371, "grad_norm": 1.2859257743010997, "learning_rate": 4.913612043341738e-06, "loss": 0.3287, "step": 18836 }, { "epoch": 0.5549843112414007, "grad_norm": 1.605998276060983, "learning_rate": 4.913097880944815e-06, "loss": 0.3667, "step": 18837 }, { "epoch": 0.5550137736988642, "grad_norm": 1.3358300117271775, "learning_rate": 4.912583719467115e-06, "loss": 0.4339, "step": 18838 }, { "epoch": 0.5550432361563278, "grad_norm": 1.5951508522799762, "learning_rate": 4.91206955891408e-06, "loss": 0.4235, "step": 18839 }, { "epoch": 0.5550726986137914, "grad_norm": 1.5905051200615952, "learning_rate": 4.911555399291144e-06, "loss": 0.5646, "step": 18840 }, { "epoch": 0.555102161071255, "grad_norm": 1.6141271592012678, "learning_rate": 4.911041240603749e-06, "loss": 0.3915, "step": 18841 }, { "epoch": 0.5551316235287185, "grad_norm": 1.5937938507527274, "learning_rate": 4.9105270828573325e-06, "loss": 0.4634, "step": 18842 }, { "epoch": 0.5551610859861821, "grad_norm": 1.435011710716659, "learning_rate": 4.910012926057336e-06, "loss": 0.4231, "step": 18843 }, { "epoch": 0.5551905484436457, "grad_norm": 1.4111928834132863, "learning_rate": 4.9094987702091915e-06, "loss": 0.4253, "step": 18844 }, { "epoch": 0.5552200109011093, "grad_norm": 1.6797914843276003, "learning_rate": 4.908984615318343e-06, "loss": 0.4511, "step": 18845 }, { "epoch": 0.5552494733585729, "grad_norm": 1.4986216479782544, "learning_rate": 4.908470461390226e-06, "loss": 0.3358, "step": 18846 }, { "epoch": 0.5552789358160364, "grad_norm": 1.5164913863039637, "learning_rate": 4.907956308430282e-06, "loss": 0.4701, "step": 18847 }, { "epoch": 0.5553083982735, "grad_norm": 1.6568616388417703, "learning_rate": 4.907442156443948e-06, "loss": 0.4812, "step": 18848 }, { "epoch": 0.5553378607309636, "grad_norm": 1.5888105021322927, "learning_rate": 4.906928005436663e-06, "loss": 0.4932, "step": 18849 }, { "epoch": 0.5553673231884272, "grad_norm": 1.4597318858982369, "learning_rate": 4.906413855413864e-06, "loss": 0.5402, "step": 18850 }, { "epoch": 0.5553967856458907, "grad_norm": 1.5737993897202047, "learning_rate": 4.905899706380992e-06, "loss": 0.3744, "step": 18851 }, { "epoch": 0.5554262481033543, "grad_norm": 1.3549016306110349, "learning_rate": 4.905385558343483e-06, "loss": 0.3836, "step": 18852 }, { "epoch": 0.5554557105608179, "grad_norm": 1.4923419176349055, "learning_rate": 4.904871411306779e-06, "loss": 0.465, "step": 18853 }, { "epoch": 0.5554851730182815, "grad_norm": 1.3935098454215398, "learning_rate": 4.904357265276314e-06, "loss": 0.323, "step": 18854 }, { "epoch": 0.555514635475745, "grad_norm": 1.3905433648874965, "learning_rate": 4.9038431202575295e-06, "loss": 0.3664, "step": 18855 }, { "epoch": 0.5555440979332086, "grad_norm": 1.5933277094388227, "learning_rate": 4.9033289762558635e-06, "loss": 0.4646, "step": 18856 }, { "epoch": 0.5555735603906722, "grad_norm": 1.6679195981815185, "learning_rate": 4.9028148332767526e-06, "loss": 0.4616, "step": 18857 }, { "epoch": 0.5556030228481358, "grad_norm": 1.7402952856695533, "learning_rate": 4.902300691325638e-06, "loss": 0.4851, "step": 18858 }, { "epoch": 0.5556324853055994, "grad_norm": 1.4570388681089466, "learning_rate": 4.901786550407956e-06, "loss": 0.4633, "step": 18859 }, { "epoch": 0.5556619477630629, "grad_norm": 1.5694983014855948, "learning_rate": 4.901272410529148e-06, "loss": 0.4474, "step": 18860 }, { "epoch": 0.5556914102205265, "grad_norm": 1.6999496531005838, "learning_rate": 4.900758271694648e-06, "loss": 0.3937, "step": 18861 }, { "epoch": 0.5557208726779901, "grad_norm": 1.7308019573448032, "learning_rate": 4.900244133909898e-06, "loss": 0.4767, "step": 18862 }, { "epoch": 0.5557503351354537, "grad_norm": 1.4979052462576397, "learning_rate": 4.899729997180336e-06, "loss": 0.4932, "step": 18863 }, { "epoch": 0.5557797975929172, "grad_norm": 1.4377990342152607, "learning_rate": 4.899215861511399e-06, "loss": 0.4567, "step": 18864 }, { "epoch": 0.5558092600503808, "grad_norm": 1.5555077947687312, "learning_rate": 4.898701726908525e-06, "loss": 0.4859, "step": 18865 }, { "epoch": 0.5558387225078444, "grad_norm": 1.6269050737306467, "learning_rate": 4.8981875933771534e-06, "loss": 0.508, "step": 18866 }, { "epoch": 0.555868184965308, "grad_norm": 1.7434558872462813, "learning_rate": 4.8976734609227225e-06, "loss": 0.4459, "step": 18867 }, { "epoch": 0.5558976474227715, "grad_norm": 1.4015530089700485, "learning_rate": 4.89715932955067e-06, "loss": 0.3385, "step": 18868 }, { "epoch": 0.5559271098802351, "grad_norm": 1.6700936713167032, "learning_rate": 4.896645199266435e-06, "loss": 0.626, "step": 18869 }, { "epoch": 0.5559565723376987, "grad_norm": 1.7216491126280289, "learning_rate": 4.896131070075457e-06, "loss": 0.4601, "step": 18870 }, { "epoch": 0.5559860347951623, "grad_norm": 1.4874113920231211, "learning_rate": 4.895616941983171e-06, "loss": 0.4681, "step": 18871 }, { "epoch": 0.5560154972526259, "grad_norm": 1.7037569713758003, "learning_rate": 4.895102814995019e-06, "loss": 0.4937, "step": 18872 }, { "epoch": 0.5560449597100894, "grad_norm": 1.563614484462491, "learning_rate": 4.894588689116439e-06, "loss": 0.5078, "step": 18873 }, { "epoch": 0.556074422167553, "grad_norm": 1.5760144227684239, "learning_rate": 4.8940745643528645e-06, "loss": 0.4176, "step": 18874 }, { "epoch": 0.5561038846250166, "grad_norm": 1.7874765545700855, "learning_rate": 4.893560440709736e-06, "loss": 0.4361, "step": 18875 }, { "epoch": 0.5561333470824802, "grad_norm": 1.6365013324657083, "learning_rate": 4.893046318192496e-06, "loss": 0.3228, "step": 18876 }, { "epoch": 0.5561628095399437, "grad_norm": 1.5536942852318951, "learning_rate": 4.892532196806577e-06, "loss": 0.3358, "step": 18877 }, { "epoch": 0.5561922719974073, "grad_norm": 1.660878284635974, "learning_rate": 4.8920180765574195e-06, "loss": 0.5523, "step": 18878 }, { "epoch": 0.5562217344548709, "grad_norm": 1.5464192775179053, "learning_rate": 4.891503957450464e-06, "loss": 0.5259, "step": 18879 }, { "epoch": 0.5562511969123345, "grad_norm": 1.5576766058474434, "learning_rate": 4.890989839491146e-06, "loss": 0.4086, "step": 18880 }, { "epoch": 0.556280659369798, "grad_norm": 1.4171301652784065, "learning_rate": 4.890475722684904e-06, "loss": 0.5001, "step": 18881 }, { "epoch": 0.5563101218272616, "grad_norm": 1.5065522158335112, "learning_rate": 4.889961607037175e-06, "loss": 0.5208, "step": 18882 }, { "epoch": 0.5563395842847252, "grad_norm": 1.4282072417229403, "learning_rate": 4.889447492553402e-06, "loss": 0.4583, "step": 18883 }, { "epoch": 0.5563690467421888, "grad_norm": 1.7361973835088182, "learning_rate": 4.8889333792390166e-06, "loss": 0.438, "step": 18884 }, { "epoch": 0.5563985091996524, "grad_norm": 1.5477784213705679, "learning_rate": 4.888419267099462e-06, "loss": 0.585, "step": 18885 }, { "epoch": 0.5564279716571159, "grad_norm": 1.3541175861681423, "learning_rate": 4.887905156140173e-06, "loss": 0.326, "step": 18886 }, { "epoch": 0.5564574341145795, "grad_norm": 1.4749327563217187, "learning_rate": 4.887391046366588e-06, "loss": 0.4223, "step": 18887 }, { "epoch": 0.5564868965720431, "grad_norm": 1.5776912045640075, "learning_rate": 4.886876937784149e-06, "loss": 0.4655, "step": 18888 }, { "epoch": 0.5565163590295067, "grad_norm": 1.5220002826703027, "learning_rate": 4.886362830398289e-06, "loss": 0.4791, "step": 18889 }, { "epoch": 0.5565458214869702, "grad_norm": 1.523714074963858, "learning_rate": 4.8858487242144484e-06, "loss": 0.374, "step": 18890 }, { "epoch": 0.5565752839444338, "grad_norm": 1.642675323011117, "learning_rate": 4.885334619238067e-06, "loss": 0.4901, "step": 18891 }, { "epoch": 0.5566047464018974, "grad_norm": 1.2537466360154177, "learning_rate": 4.88482051547458e-06, "loss": 0.3063, "step": 18892 }, { "epoch": 0.556634208859361, "grad_norm": 1.626388751207299, "learning_rate": 4.884306412929428e-06, "loss": 0.4779, "step": 18893 }, { "epoch": 0.5566636713168245, "grad_norm": 1.5733452003707784, "learning_rate": 4.883792311608046e-06, "loss": 0.4014, "step": 18894 }, { "epoch": 0.5566931337742881, "grad_norm": 1.6465500954362409, "learning_rate": 4.883278211515874e-06, "loss": 0.4587, "step": 18895 }, { "epoch": 0.5567225962317517, "grad_norm": 1.4060946731747468, "learning_rate": 4.882764112658348e-06, "loss": 0.3803, "step": 18896 }, { "epoch": 0.5567520586892153, "grad_norm": 1.4277151490070472, "learning_rate": 4.8822500150409094e-06, "loss": 0.4744, "step": 18897 }, { "epoch": 0.5567815211466789, "grad_norm": 1.5429782662277285, "learning_rate": 4.881735918668994e-06, "loss": 0.4718, "step": 18898 }, { "epoch": 0.5568109836041424, "grad_norm": 1.4217948960081421, "learning_rate": 4.881221823548038e-06, "loss": 0.4273, "step": 18899 }, { "epoch": 0.556840446061606, "grad_norm": 1.3173437138394177, "learning_rate": 4.880707729683484e-06, "loss": 0.3755, "step": 18900 }, { "epoch": 0.5568699085190696, "grad_norm": 1.4482367365248903, "learning_rate": 4.880193637080765e-06, "loss": 0.3509, "step": 18901 }, { "epoch": 0.5568993709765332, "grad_norm": 1.5290164059892979, "learning_rate": 4.879679545745324e-06, "loss": 0.4237, "step": 18902 }, { "epoch": 0.5569288334339967, "grad_norm": 1.548292938408311, "learning_rate": 4.879165455682594e-06, "loss": 0.477, "step": 18903 }, { "epoch": 0.5569582958914603, "grad_norm": 1.4990433039685394, "learning_rate": 4.878651366898016e-06, "loss": 0.5202, "step": 18904 }, { "epoch": 0.5569877583489239, "grad_norm": 1.5737780514772186, "learning_rate": 4.878137279397026e-06, "loss": 0.4992, "step": 18905 }, { "epoch": 0.5570172208063875, "grad_norm": 1.5647350706529883, "learning_rate": 4.8776231931850634e-06, "loss": 0.4321, "step": 18906 }, { "epoch": 0.557046683263851, "grad_norm": 1.485674242766905, "learning_rate": 4.877109108267565e-06, "loss": 0.3375, "step": 18907 }, { "epoch": 0.5570761457213146, "grad_norm": 1.503869912957686, "learning_rate": 4.876595024649968e-06, "loss": 0.6092, "step": 18908 }, { "epoch": 0.5571056081787782, "grad_norm": 1.4289262966600207, "learning_rate": 4.876080942337712e-06, "loss": 0.4028, "step": 18909 }, { "epoch": 0.5571350706362418, "grad_norm": 1.5406117591524826, "learning_rate": 4.875566861336234e-06, "loss": 0.4301, "step": 18910 }, { "epoch": 0.5571645330937054, "grad_norm": 1.385424941982376, "learning_rate": 4.875052781650972e-06, "loss": 0.3286, "step": 18911 }, { "epoch": 0.5571939955511689, "grad_norm": 1.401167935044053, "learning_rate": 4.874538703287364e-06, "loss": 0.4411, "step": 18912 }, { "epoch": 0.5572234580086325, "grad_norm": 1.4039861321171043, "learning_rate": 4.874024626250849e-06, "loss": 0.3643, "step": 18913 }, { "epoch": 0.5572529204660961, "grad_norm": 1.55947707527548, "learning_rate": 4.87351055054686e-06, "loss": 0.5005, "step": 18914 }, { "epoch": 0.5572823829235597, "grad_norm": 1.602914705663711, "learning_rate": 4.87299647618084e-06, "loss": 0.41, "step": 18915 }, { "epoch": 0.5573118453810232, "grad_norm": 1.6693884067732758, "learning_rate": 4.872482403158223e-06, "loss": 0.4098, "step": 18916 }, { "epoch": 0.5573413078384868, "grad_norm": 1.5928754041370439, "learning_rate": 4.871968331484448e-06, "loss": 0.5349, "step": 18917 }, { "epoch": 0.5573707702959504, "grad_norm": 1.4109959281272737, "learning_rate": 4.8714542611649544e-06, "loss": 0.3943, "step": 18918 }, { "epoch": 0.557400232753414, "grad_norm": 1.4986836027698105, "learning_rate": 4.870940192205177e-06, "loss": 0.6163, "step": 18919 }, { "epoch": 0.5574296952108775, "grad_norm": 1.3653586471491477, "learning_rate": 4.870426124610555e-06, "loss": 0.3023, "step": 18920 }, { "epoch": 0.5574591576683411, "grad_norm": 1.5140897432390947, "learning_rate": 4.8699120583865275e-06, "loss": 0.4052, "step": 18921 }, { "epoch": 0.5574886201258047, "grad_norm": 1.4851258599540038, "learning_rate": 4.869397993538529e-06, "loss": 0.4777, "step": 18922 }, { "epoch": 0.5575180825832683, "grad_norm": 1.2648675245712242, "learning_rate": 4.868883930072001e-06, "loss": 0.2922, "step": 18923 }, { "epoch": 0.5575475450407319, "grad_norm": 1.5192935626590258, "learning_rate": 4.868369867992378e-06, "loss": 0.5015, "step": 18924 }, { "epoch": 0.5575770074981954, "grad_norm": 1.5941350979596667, "learning_rate": 4.867855807305098e-06, "loss": 0.4215, "step": 18925 }, { "epoch": 0.557606469955659, "grad_norm": 1.4870973493330757, "learning_rate": 4.867341748015598e-06, "loss": 0.4664, "step": 18926 }, { "epoch": 0.5576359324131226, "grad_norm": 1.4178057265055783, "learning_rate": 4.866827690129318e-06, "loss": 0.4352, "step": 18927 }, { "epoch": 0.5576653948705862, "grad_norm": 1.6953783610279483, "learning_rate": 4.866313633651693e-06, "loss": 0.6394, "step": 18928 }, { "epoch": 0.5576948573280497, "grad_norm": 1.6331452954214913, "learning_rate": 4.865799578588161e-06, "loss": 0.5145, "step": 18929 }, { "epoch": 0.5577243197855133, "grad_norm": 1.2228148813347137, "learning_rate": 4.8652855249441625e-06, "loss": 0.3429, "step": 18930 }, { "epoch": 0.5577537822429769, "grad_norm": 1.4565444725407748, "learning_rate": 4.864771472725131e-06, "loss": 0.4852, "step": 18931 }, { "epoch": 0.5577832447004405, "grad_norm": 1.5772183792700178, "learning_rate": 4.864257421936505e-06, "loss": 0.582, "step": 18932 }, { "epoch": 0.557812707157904, "grad_norm": 1.3364257884253654, "learning_rate": 4.863743372583726e-06, "loss": 0.2939, "step": 18933 }, { "epoch": 0.5578421696153676, "grad_norm": 1.427038633737717, "learning_rate": 4.863229324672226e-06, "loss": 0.4348, "step": 18934 }, { "epoch": 0.5578716320728312, "grad_norm": 1.6320813624085573, "learning_rate": 4.862715278207443e-06, "loss": 0.4927, "step": 18935 }, { "epoch": 0.5579010945302948, "grad_norm": 1.5108416570540941, "learning_rate": 4.862201233194817e-06, "loss": 0.3939, "step": 18936 }, { "epoch": 0.5579305569877584, "grad_norm": 1.559352622822977, "learning_rate": 4.8616871896397856e-06, "loss": 0.4654, "step": 18937 }, { "epoch": 0.5579600194452219, "grad_norm": 1.5021579492981083, "learning_rate": 4.861173147547783e-06, "loss": 0.4852, "step": 18938 }, { "epoch": 0.5579894819026855, "grad_norm": 1.7349073023030075, "learning_rate": 4.860659106924251e-06, "loss": 0.6278, "step": 18939 }, { "epoch": 0.5580189443601491, "grad_norm": 1.3486034707807033, "learning_rate": 4.860145067774621e-06, "loss": 0.386, "step": 18940 }, { "epoch": 0.5580484068176127, "grad_norm": 1.5012516222691659, "learning_rate": 4.859631030104336e-06, "loss": 0.3461, "step": 18941 }, { "epoch": 0.5580778692750762, "grad_norm": 1.3996460694613366, "learning_rate": 4.859116993918832e-06, "loss": 0.4705, "step": 18942 }, { "epoch": 0.5581073317325398, "grad_norm": 1.9039346524329974, "learning_rate": 4.858602959223545e-06, "loss": 0.4328, "step": 18943 }, { "epoch": 0.5581367941900034, "grad_norm": 1.3153049908262482, "learning_rate": 4.8580889260239126e-06, "loss": 0.3821, "step": 18944 }, { "epoch": 0.558166256647467, "grad_norm": 1.5604877251477962, "learning_rate": 4.857574894325371e-06, "loss": 0.4411, "step": 18945 }, { "epoch": 0.5581957191049305, "grad_norm": 1.5319038871445487, "learning_rate": 4.85706086413336e-06, "loss": 0.6128, "step": 18946 }, { "epoch": 0.5582251815623941, "grad_norm": 1.26353412542666, "learning_rate": 4.856546835453316e-06, "loss": 0.3196, "step": 18947 }, { "epoch": 0.5582546440198577, "grad_norm": 1.6125865027489112, "learning_rate": 4.856032808290676e-06, "loss": 0.5782, "step": 18948 }, { "epoch": 0.5582841064773213, "grad_norm": 1.4024062625670215, "learning_rate": 4.855518782650875e-06, "loss": 0.4185, "step": 18949 }, { "epoch": 0.5583135689347849, "grad_norm": 1.6039028866490428, "learning_rate": 4.855004758539353e-06, "loss": 0.5135, "step": 18950 }, { "epoch": 0.5583430313922484, "grad_norm": 1.537628587486687, "learning_rate": 4.854490735961548e-06, "loss": 0.4039, "step": 18951 }, { "epoch": 0.558372493849712, "grad_norm": 1.464789153542642, "learning_rate": 4.853976714922894e-06, "loss": 0.342, "step": 18952 }, { "epoch": 0.5584019563071756, "grad_norm": 1.714125903745493, "learning_rate": 4.8534626954288325e-06, "loss": 0.4272, "step": 18953 }, { "epoch": 0.5584314187646392, "grad_norm": 1.4491321466697369, "learning_rate": 4.852948677484795e-06, "loss": 0.499, "step": 18954 }, { "epoch": 0.5584608812221027, "grad_norm": 1.306926807379908, "learning_rate": 4.852434661096223e-06, "loss": 0.386, "step": 18955 }, { "epoch": 0.5584903436795663, "grad_norm": 1.4475663706603075, "learning_rate": 4.851920646268552e-06, "loss": 0.5068, "step": 18956 }, { "epoch": 0.5585198061370299, "grad_norm": 1.4166533524268847, "learning_rate": 4.851406633007218e-06, "loss": 0.3131, "step": 18957 }, { "epoch": 0.5585492685944935, "grad_norm": 1.5133789811451102, "learning_rate": 4.850892621317661e-06, "loss": 0.3062, "step": 18958 }, { "epoch": 0.558578731051957, "grad_norm": 1.2937504495672454, "learning_rate": 4.8503786112053155e-06, "loss": 0.4708, "step": 18959 }, { "epoch": 0.5586081935094206, "grad_norm": 1.322884609713551, "learning_rate": 4.84986460267562e-06, "loss": 0.272, "step": 18960 }, { "epoch": 0.5586376559668842, "grad_norm": 1.3903707721386904, "learning_rate": 4.84935059573401e-06, "loss": 0.4158, "step": 18961 }, { "epoch": 0.5586671184243478, "grad_norm": 1.5261875293060139, "learning_rate": 4.8488365903859235e-06, "loss": 0.4307, "step": 18962 }, { "epoch": 0.5586965808818114, "grad_norm": 1.669607134235932, "learning_rate": 4.8483225866368e-06, "loss": 0.5777, "step": 18963 }, { "epoch": 0.5587260433392749, "grad_norm": 1.4814766703341788, "learning_rate": 4.847808584492073e-06, "loss": 0.4535, "step": 18964 }, { "epoch": 0.5587555057967385, "grad_norm": 1.5458260142633609, "learning_rate": 4.847294583957179e-06, "loss": 0.5948, "step": 18965 }, { "epoch": 0.5587849682542021, "grad_norm": 1.584332471480108, "learning_rate": 4.846780585037556e-06, "loss": 0.4922, "step": 18966 }, { "epoch": 0.5588144307116657, "grad_norm": 1.5042004659904504, "learning_rate": 4.846266587738643e-06, "loss": 0.4456, "step": 18967 }, { "epoch": 0.5588438931691292, "grad_norm": 1.5017582386779822, "learning_rate": 4.845752592065874e-06, "loss": 0.4564, "step": 18968 }, { "epoch": 0.5588733556265928, "grad_norm": 1.4651476301905457, "learning_rate": 4.845238598024688e-06, "loss": 0.3479, "step": 18969 }, { "epoch": 0.5589028180840564, "grad_norm": 1.3517031427203616, "learning_rate": 4.844724605620519e-06, "loss": 0.3129, "step": 18970 }, { "epoch": 0.55893228054152, "grad_norm": 1.4906363576542407, "learning_rate": 4.8442106148588074e-06, "loss": 0.4746, "step": 18971 }, { "epoch": 0.5589617429989835, "grad_norm": 1.725457408728967, "learning_rate": 4.843696625744988e-06, "loss": 0.4979, "step": 18972 }, { "epoch": 0.5589912054564471, "grad_norm": 1.352179397695789, "learning_rate": 4.8431826382845e-06, "loss": 0.3875, "step": 18973 }, { "epoch": 0.5590206679139107, "grad_norm": 1.3454090622040433, "learning_rate": 4.842668652482776e-06, "loss": 0.3755, "step": 18974 }, { "epoch": 0.5590501303713743, "grad_norm": 1.4366881759573695, "learning_rate": 4.842154668345256e-06, "loss": 0.4126, "step": 18975 }, { "epoch": 0.5590795928288379, "grad_norm": 1.2952337045630897, "learning_rate": 4.841640685877376e-06, "loss": 0.4142, "step": 18976 }, { "epoch": 0.5591090552863014, "grad_norm": 1.5358959668086216, "learning_rate": 4.841126705084571e-06, "loss": 0.4261, "step": 18977 }, { "epoch": 0.559138517743765, "grad_norm": 1.779309515408648, "learning_rate": 4.84061272597228e-06, "loss": 0.5806, "step": 18978 }, { "epoch": 0.5591679802012286, "grad_norm": 1.584468651515423, "learning_rate": 4.8400987485459396e-06, "loss": 0.4412, "step": 18979 }, { "epoch": 0.5591974426586922, "grad_norm": 1.3357227880246736, "learning_rate": 4.839584772810985e-06, "loss": 0.4317, "step": 18980 }, { "epoch": 0.5592269051161557, "grad_norm": 1.5317270670783336, "learning_rate": 4.839070798772855e-06, "loss": 0.4735, "step": 18981 }, { "epoch": 0.5592563675736193, "grad_norm": 1.601258789731169, "learning_rate": 4.838556826436984e-06, "loss": 0.4459, "step": 18982 }, { "epoch": 0.5592858300310829, "grad_norm": 1.5231437253780251, "learning_rate": 4.838042855808812e-06, "loss": 0.4137, "step": 18983 }, { "epoch": 0.5593152924885465, "grad_norm": 1.5023971625399457, "learning_rate": 4.83752888689377e-06, "loss": 0.4268, "step": 18984 }, { "epoch": 0.55934475494601, "grad_norm": 1.5936277303513813, "learning_rate": 4.8370149196973004e-06, "loss": 0.5343, "step": 18985 }, { "epoch": 0.5593742174034736, "grad_norm": 1.3905012732509263, "learning_rate": 4.836500954224836e-06, "loss": 0.4771, "step": 18986 }, { "epoch": 0.5594036798609372, "grad_norm": 1.650206908103785, "learning_rate": 4.835986990481814e-06, "loss": 0.4962, "step": 18987 }, { "epoch": 0.5594331423184008, "grad_norm": 1.5928181549082545, "learning_rate": 4.835473028473673e-06, "loss": 0.4998, "step": 18988 }, { "epoch": 0.5594626047758644, "grad_norm": 1.529244770019709, "learning_rate": 4.834959068205847e-06, "loss": 0.5598, "step": 18989 }, { "epoch": 0.5594920672333279, "grad_norm": 1.7149945811376524, "learning_rate": 4.834445109683775e-06, "loss": 0.4747, "step": 18990 }, { "epoch": 0.5595215296907915, "grad_norm": 1.6693674739821887, "learning_rate": 4.8339311529128905e-06, "loss": 0.5177, "step": 18991 }, { "epoch": 0.5595509921482551, "grad_norm": 1.5337940561385206, "learning_rate": 4.833417197898633e-06, "loss": 0.5295, "step": 18992 }, { "epoch": 0.5595804546057187, "grad_norm": 1.4234671301344126, "learning_rate": 4.832903244646439e-06, "loss": 0.4893, "step": 18993 }, { "epoch": 0.5596099170631822, "grad_norm": 1.655420981652509, "learning_rate": 4.832389293161742e-06, "loss": 0.4121, "step": 18994 }, { "epoch": 0.5596393795206458, "grad_norm": 1.582369093091237, "learning_rate": 4.831875343449979e-06, "loss": 0.4836, "step": 18995 }, { "epoch": 0.5596688419781094, "grad_norm": 1.5888206913253395, "learning_rate": 4.831361395516588e-06, "loss": 0.4506, "step": 18996 }, { "epoch": 0.559698304435573, "grad_norm": 1.5610603628610777, "learning_rate": 4.830847449367006e-06, "loss": 0.4537, "step": 18997 }, { "epoch": 0.5597277668930365, "grad_norm": 1.4424991625260786, "learning_rate": 4.830333505006666e-06, "loss": 0.4813, "step": 18998 }, { "epoch": 0.5597572293505001, "grad_norm": 1.4449983085251077, "learning_rate": 4.829819562441007e-06, "loss": 0.3672, "step": 18999 }, { "epoch": 0.5597866918079637, "grad_norm": 1.5549137246492242, "learning_rate": 4.829305621675466e-06, "loss": 0.359, "step": 19000 }, { "epoch": 0.5598161542654273, "grad_norm": 1.5689928782586535, "learning_rate": 4.828791682715477e-06, "loss": 0.4521, "step": 19001 }, { "epoch": 0.5598456167228909, "grad_norm": 1.5925633667222527, "learning_rate": 4.82827774556648e-06, "loss": 0.3389, "step": 19002 }, { "epoch": 0.5598750791803544, "grad_norm": 1.4701583888463026, "learning_rate": 4.827763810233907e-06, "loss": 0.4632, "step": 19003 }, { "epoch": 0.559904541637818, "grad_norm": 1.470340519475798, "learning_rate": 4.827249876723195e-06, "loss": 0.4443, "step": 19004 }, { "epoch": 0.5599340040952816, "grad_norm": 1.6716048176988016, "learning_rate": 4.826735945039781e-06, "loss": 0.5276, "step": 19005 }, { "epoch": 0.5599634665527452, "grad_norm": 1.558775268977221, "learning_rate": 4.826222015189102e-06, "loss": 0.4795, "step": 19006 }, { "epoch": 0.5599929290102087, "grad_norm": 1.2714731642824535, "learning_rate": 4.825708087176594e-06, "loss": 0.3794, "step": 19007 }, { "epoch": 0.5600223914676723, "grad_norm": 1.4747256487531173, "learning_rate": 4.825194161007691e-06, "loss": 0.4314, "step": 19008 }, { "epoch": 0.5600518539251359, "grad_norm": 1.5305749060246618, "learning_rate": 4.824680236687833e-06, "loss": 0.4839, "step": 19009 }, { "epoch": 0.5600813163825995, "grad_norm": 1.5783176058460784, "learning_rate": 4.824166314222453e-06, "loss": 0.4951, "step": 19010 }, { "epoch": 0.560110778840063, "grad_norm": 1.5455996442194662, "learning_rate": 4.823652393616988e-06, "loss": 0.5579, "step": 19011 }, { "epoch": 0.5601402412975266, "grad_norm": 1.5914484387764665, "learning_rate": 4.8231384748768765e-06, "loss": 0.3892, "step": 19012 }, { "epoch": 0.5601697037549902, "grad_norm": 1.4979306595869928, "learning_rate": 4.822624558007551e-06, "loss": 0.4404, "step": 19013 }, { "epoch": 0.5601991662124538, "grad_norm": 1.371707312518352, "learning_rate": 4.8221106430144476e-06, "loss": 0.4072, "step": 19014 }, { "epoch": 0.5602286286699174, "grad_norm": 1.5336900615578795, "learning_rate": 4.821596729903005e-06, "loss": 0.4608, "step": 19015 }, { "epoch": 0.5602580911273809, "grad_norm": 1.4796406959009816, "learning_rate": 4.821082818678656e-06, "loss": 0.4289, "step": 19016 }, { "epoch": 0.5602875535848445, "grad_norm": 1.5362740649907474, "learning_rate": 4.8205689093468395e-06, "loss": 0.4251, "step": 19017 }, { "epoch": 0.5603170160423081, "grad_norm": 1.4273546073605894, "learning_rate": 4.820055001912991e-06, "loss": 0.4453, "step": 19018 }, { "epoch": 0.5603464784997717, "grad_norm": 1.4860572780512407, "learning_rate": 4.819541096382545e-06, "loss": 0.3665, "step": 19019 }, { "epoch": 0.5603759409572352, "grad_norm": 1.5214411382252562, "learning_rate": 4.8190271927609375e-06, "loss": 0.4553, "step": 19020 }, { "epoch": 0.5604054034146988, "grad_norm": 1.6474776654648093, "learning_rate": 4.818513291053607e-06, "loss": 0.57, "step": 19021 }, { "epoch": 0.5604348658721624, "grad_norm": 1.5924016081771604, "learning_rate": 4.817999391265988e-06, "loss": 0.4182, "step": 19022 }, { "epoch": 0.560464328329626, "grad_norm": 1.5307163933222916, "learning_rate": 4.8174854934035145e-06, "loss": 0.3861, "step": 19023 }, { "epoch": 0.5604937907870895, "grad_norm": 1.459871791781418, "learning_rate": 4.8169715974716245e-06, "loss": 0.4475, "step": 19024 }, { "epoch": 0.5605232532445531, "grad_norm": 1.5072462781635894, "learning_rate": 4.816457703475753e-06, "loss": 0.4849, "step": 19025 }, { "epoch": 0.5605527157020167, "grad_norm": 1.4456622919309403, "learning_rate": 4.815943811421335e-06, "loss": 0.4279, "step": 19026 }, { "epoch": 0.5605821781594803, "grad_norm": 1.6636463924856448, "learning_rate": 4.815429921313809e-06, "loss": 0.4755, "step": 19027 }, { "epoch": 0.5606116406169439, "grad_norm": 1.4584357042571099, "learning_rate": 4.814916033158607e-06, "loss": 0.4377, "step": 19028 }, { "epoch": 0.5606411030744074, "grad_norm": 1.3239744649718852, "learning_rate": 4.814402146961168e-06, "loss": 0.3608, "step": 19029 }, { "epoch": 0.560670565531871, "grad_norm": 1.5998360005833032, "learning_rate": 4.813888262726927e-06, "loss": 0.5163, "step": 19030 }, { "epoch": 0.5607000279893346, "grad_norm": 1.6893420141775952, "learning_rate": 4.813374380461318e-06, "loss": 0.5717, "step": 19031 }, { "epoch": 0.5607294904467982, "grad_norm": 1.453241664221927, "learning_rate": 4.8128605001697805e-06, "loss": 0.491, "step": 19032 }, { "epoch": 0.5607589529042617, "grad_norm": 1.4903608214154578, "learning_rate": 4.8123466218577455e-06, "loss": 0.3915, "step": 19033 }, { "epoch": 0.5607884153617253, "grad_norm": 1.337708870267113, "learning_rate": 4.811832745530652e-06, "loss": 0.3797, "step": 19034 }, { "epoch": 0.5608178778191889, "grad_norm": 1.4716034419188737, "learning_rate": 4.811318871193932e-06, "loss": 0.2892, "step": 19035 }, { "epoch": 0.5608473402766525, "grad_norm": 1.3456697195469893, "learning_rate": 4.810804998853025e-06, "loss": 0.3613, "step": 19036 }, { "epoch": 0.560876802734116, "grad_norm": 1.5983186024030709, "learning_rate": 4.810291128513365e-06, "loss": 0.5182, "step": 19037 }, { "epoch": 0.5609062651915796, "grad_norm": 1.3323460229521698, "learning_rate": 4.809777260180387e-06, "loss": 0.3823, "step": 19038 }, { "epoch": 0.5609357276490432, "grad_norm": 1.2798791591497192, "learning_rate": 4.809263393859528e-06, "loss": 0.3607, "step": 19039 }, { "epoch": 0.5609651901065068, "grad_norm": 1.505669798858552, "learning_rate": 4.808749529556222e-06, "loss": 0.5259, "step": 19040 }, { "epoch": 0.5609946525639704, "grad_norm": 1.4772985658988596, "learning_rate": 4.808235667275905e-06, "loss": 0.4035, "step": 19041 }, { "epoch": 0.5610241150214339, "grad_norm": 1.6741473459278737, "learning_rate": 4.807721807024015e-06, "loss": 0.5666, "step": 19042 }, { "epoch": 0.5610535774788975, "grad_norm": 1.4482697344377824, "learning_rate": 4.807207948805984e-06, "loss": 0.4564, "step": 19043 }, { "epoch": 0.5610830399363611, "grad_norm": 1.5603649598748464, "learning_rate": 4.806694092627247e-06, "loss": 0.5305, "step": 19044 }, { "epoch": 0.5611125023938247, "grad_norm": 1.4397079335134548, "learning_rate": 4.806180238493241e-06, "loss": 0.3729, "step": 19045 }, { "epoch": 0.5611419648512882, "grad_norm": 1.4073821606976629, "learning_rate": 4.8056663864094035e-06, "loss": 0.4812, "step": 19046 }, { "epoch": 0.5611714273087518, "grad_norm": 1.4452729299814528, "learning_rate": 4.805152536381166e-06, "loss": 0.5389, "step": 19047 }, { "epoch": 0.5612008897662154, "grad_norm": 1.432987428781511, "learning_rate": 4.8046386884139665e-06, "loss": 0.4226, "step": 19048 }, { "epoch": 0.561230352223679, "grad_norm": 1.8036840591725296, "learning_rate": 4.804124842513238e-06, "loss": 0.5041, "step": 19049 }, { "epoch": 0.5612598146811425, "grad_norm": 1.4810984539386096, "learning_rate": 4.803610998684417e-06, "loss": 0.3424, "step": 19050 }, { "epoch": 0.5612892771386061, "grad_norm": 1.5765178048407147, "learning_rate": 4.803097156932942e-06, "loss": 0.5104, "step": 19051 }, { "epoch": 0.5613187395960697, "grad_norm": 1.2683045836848914, "learning_rate": 4.802583317264245e-06, "loss": 0.3053, "step": 19052 }, { "epoch": 0.5613482020535333, "grad_norm": 1.5466986520017179, "learning_rate": 4.802069479683759e-06, "loss": 0.6201, "step": 19053 }, { "epoch": 0.5613776645109969, "grad_norm": 1.5732603542739223, "learning_rate": 4.801555644196923e-06, "loss": 0.4597, "step": 19054 }, { "epoch": 0.5614071269684604, "grad_norm": 1.37998881608239, "learning_rate": 4.801041810809171e-06, "loss": 0.5316, "step": 19055 }, { "epoch": 0.561436589425924, "grad_norm": 1.6419171701294109, "learning_rate": 4.800527979525937e-06, "loss": 0.5071, "step": 19056 }, { "epoch": 0.5614660518833876, "grad_norm": 1.3216237079486204, "learning_rate": 4.8000141503526584e-06, "loss": 0.4293, "step": 19057 }, { "epoch": 0.5614955143408512, "grad_norm": 1.267401313697069, "learning_rate": 4.799500323294768e-06, "loss": 0.3524, "step": 19058 }, { "epoch": 0.5615249767983147, "grad_norm": 1.4925795221107128, "learning_rate": 4.798986498357702e-06, "loss": 0.358, "step": 19059 }, { "epoch": 0.5615544392557783, "grad_norm": 1.327946381325437, "learning_rate": 4.798472675546898e-06, "loss": 0.3684, "step": 19060 }, { "epoch": 0.5615839017132419, "grad_norm": 1.4013841310296504, "learning_rate": 4.797958854867786e-06, "loss": 0.4, "step": 19061 }, { "epoch": 0.5616133641707055, "grad_norm": 1.5537288675633336, "learning_rate": 4.7974450363258065e-06, "loss": 0.4859, "step": 19062 }, { "epoch": 0.561642826628169, "grad_norm": 1.4498087308612209, "learning_rate": 4.796931219926389e-06, "loss": 0.4469, "step": 19063 }, { "epoch": 0.5616722890856326, "grad_norm": 1.4270861634796677, "learning_rate": 4.7964174056749725e-06, "loss": 0.469, "step": 19064 }, { "epoch": 0.5617017515430962, "grad_norm": 1.5891135632008155, "learning_rate": 4.79590359357699e-06, "loss": 0.4382, "step": 19065 }, { "epoch": 0.5617312140005598, "grad_norm": 1.5816924463548185, "learning_rate": 4.7953897836378765e-06, "loss": 0.5165, "step": 19066 }, { "epoch": 0.5617606764580234, "grad_norm": 1.517592006850058, "learning_rate": 4.794875975863068e-06, "loss": 0.5785, "step": 19067 }, { "epoch": 0.5617901389154869, "grad_norm": 1.3747711656518973, "learning_rate": 4.794362170257999e-06, "loss": 0.4058, "step": 19068 }, { "epoch": 0.5618196013729505, "grad_norm": 1.363168736301135, "learning_rate": 4.793848366828105e-06, "loss": 0.336, "step": 19069 }, { "epoch": 0.5618490638304141, "grad_norm": 1.4255422309420385, "learning_rate": 4.793334565578819e-06, "loss": 0.5173, "step": 19070 }, { "epoch": 0.5618785262878777, "grad_norm": 1.5003119863883474, "learning_rate": 4.792820766515577e-06, "loss": 0.5136, "step": 19071 }, { "epoch": 0.5619079887453412, "grad_norm": 1.6620882558461125, "learning_rate": 4.792306969643816e-06, "loss": 0.4138, "step": 19072 }, { "epoch": 0.5619374512028048, "grad_norm": 1.4593633565687605, "learning_rate": 4.791793174968968e-06, "loss": 0.4964, "step": 19073 }, { "epoch": 0.5619669136602684, "grad_norm": 1.4815454028396333, "learning_rate": 4.7912793824964665e-06, "loss": 0.42, "step": 19074 }, { "epoch": 0.561996376117732, "grad_norm": 1.672402304626347, "learning_rate": 4.790765592231749e-06, "loss": 0.4021, "step": 19075 }, { "epoch": 0.5620258385751955, "grad_norm": 1.5077747640053991, "learning_rate": 4.790251804180249e-06, "loss": 0.5569, "step": 19076 }, { "epoch": 0.5620553010326591, "grad_norm": 1.6399631255497564, "learning_rate": 4.789738018347401e-06, "loss": 0.4695, "step": 19077 }, { "epoch": 0.5620847634901227, "grad_norm": 1.7659053143724195, "learning_rate": 4.789224234738642e-06, "loss": 0.5479, "step": 19078 }, { "epoch": 0.5621142259475863, "grad_norm": 1.4459121544004694, "learning_rate": 4.788710453359403e-06, "loss": 0.4065, "step": 19079 }, { "epoch": 0.5621436884050499, "grad_norm": 1.537012440357687, "learning_rate": 4.788196674215121e-06, "loss": 0.3671, "step": 19080 }, { "epoch": 0.5621731508625134, "grad_norm": 1.4658482754838493, "learning_rate": 4.78768289731123e-06, "loss": 0.2367, "step": 19081 }, { "epoch": 0.562202613319977, "grad_norm": 1.4815080195095562, "learning_rate": 4.787169122653167e-06, "loss": 0.5102, "step": 19082 }, { "epoch": 0.5622320757774406, "grad_norm": 1.4320143240227083, "learning_rate": 4.786655350246361e-06, "loss": 0.4039, "step": 19083 }, { "epoch": 0.5622615382349042, "grad_norm": 1.419264420412393, "learning_rate": 4.7861415800962495e-06, "loss": 0.4713, "step": 19084 }, { "epoch": 0.5622910006923677, "grad_norm": 1.5562462968920743, "learning_rate": 4.7856278122082685e-06, "loss": 0.5008, "step": 19085 }, { "epoch": 0.5623204631498313, "grad_norm": 1.4073961183971773, "learning_rate": 4.785114046587851e-06, "loss": 0.3757, "step": 19086 }, { "epoch": 0.5623499256072949, "grad_norm": 1.4338293268500637, "learning_rate": 4.784600283240431e-06, "loss": 0.3852, "step": 19087 }, { "epoch": 0.5623793880647585, "grad_norm": 1.2919767637679667, "learning_rate": 4.784086522171444e-06, "loss": 0.3038, "step": 19088 }, { "epoch": 0.562408850522222, "grad_norm": 1.448698718034337, "learning_rate": 4.783572763386324e-06, "loss": 0.4215, "step": 19089 }, { "epoch": 0.5624383129796856, "grad_norm": 1.5322927199972831, "learning_rate": 4.783059006890506e-06, "loss": 0.5018, "step": 19090 }, { "epoch": 0.5624677754371492, "grad_norm": 1.2083326925782585, "learning_rate": 4.782545252689423e-06, "loss": 0.3161, "step": 19091 }, { "epoch": 0.5624972378946128, "grad_norm": 1.4472849449596008, "learning_rate": 4.782031500788512e-06, "loss": 0.4434, "step": 19092 }, { "epoch": 0.5625267003520764, "grad_norm": 1.4910249591233595, "learning_rate": 4.781517751193203e-06, "loss": 0.5077, "step": 19093 }, { "epoch": 0.5625561628095399, "grad_norm": 1.4467087231809193, "learning_rate": 4.781004003908933e-06, "loss": 0.4646, "step": 19094 }, { "epoch": 0.5625856252670035, "grad_norm": 1.7039555661052892, "learning_rate": 4.780490258941136e-06, "loss": 0.2948, "step": 19095 }, { "epoch": 0.5626150877244671, "grad_norm": 1.4836992102053042, "learning_rate": 4.779976516295245e-06, "loss": 0.3634, "step": 19096 }, { "epoch": 0.5626445501819307, "grad_norm": 1.380851254094903, "learning_rate": 4.7794627759766975e-06, "loss": 0.4444, "step": 19097 }, { "epoch": 0.5626740126393942, "grad_norm": 1.3101178983519257, "learning_rate": 4.778949037990924e-06, "loss": 0.4046, "step": 19098 }, { "epoch": 0.5627034750968578, "grad_norm": 1.4045161764256286, "learning_rate": 4.77843530234336e-06, "loss": 0.3346, "step": 19099 }, { "epoch": 0.5627329375543214, "grad_norm": 1.299245824505642, "learning_rate": 4.7779215690394415e-06, "loss": 0.304, "step": 19100 }, { "epoch": 0.562762400011785, "grad_norm": 1.3734828795798057, "learning_rate": 4.777407838084599e-06, "loss": 0.4191, "step": 19101 }, { "epoch": 0.5627918624692485, "grad_norm": 1.4790874009617778, "learning_rate": 4.776894109484272e-06, "loss": 0.4769, "step": 19102 }, { "epoch": 0.5628213249267121, "grad_norm": 1.5063441280232472, "learning_rate": 4.776380383243889e-06, "loss": 0.4277, "step": 19103 }, { "epoch": 0.5628507873841757, "grad_norm": 1.4281838992286127, "learning_rate": 4.775866659368886e-06, "loss": 0.4074, "step": 19104 }, { "epoch": 0.5628802498416393, "grad_norm": 1.5481729463466423, "learning_rate": 4.775352937864696e-06, "loss": 0.5097, "step": 19105 }, { "epoch": 0.5629097122991029, "grad_norm": 1.500703561543493, "learning_rate": 4.774839218736756e-06, "loss": 0.4554, "step": 19106 }, { "epoch": 0.5629391747565664, "grad_norm": 1.4104196677991996, "learning_rate": 4.774325501990497e-06, "loss": 0.385, "step": 19107 }, { "epoch": 0.56296863721403, "grad_norm": 1.3814018451178791, "learning_rate": 4.773811787631354e-06, "loss": 0.3661, "step": 19108 }, { "epoch": 0.5629980996714936, "grad_norm": 1.5547237406260463, "learning_rate": 4.773298075664762e-06, "loss": 0.4879, "step": 19109 }, { "epoch": 0.5630275621289572, "grad_norm": 1.5959677156247238, "learning_rate": 4.772784366096153e-06, "loss": 0.4013, "step": 19110 }, { "epoch": 0.5630570245864207, "grad_norm": 1.4336325305497342, "learning_rate": 4.772270658930962e-06, "loss": 0.4227, "step": 19111 }, { "epoch": 0.5630864870438843, "grad_norm": 1.5190928422447414, "learning_rate": 4.771756954174624e-06, "loss": 0.4713, "step": 19112 }, { "epoch": 0.5631159495013479, "grad_norm": 1.3783940088513866, "learning_rate": 4.77124325183257e-06, "loss": 0.3998, "step": 19113 }, { "epoch": 0.5631454119588115, "grad_norm": 1.5202944255676343, "learning_rate": 4.770729551910234e-06, "loss": 0.3694, "step": 19114 }, { "epoch": 0.563174874416275, "grad_norm": 1.5074620438239543, "learning_rate": 4.770215854413054e-06, "loss": 0.395, "step": 19115 }, { "epoch": 0.5632043368737386, "grad_norm": 1.5958801882554419, "learning_rate": 4.769702159346458e-06, "loss": 0.4716, "step": 19116 }, { "epoch": 0.5632337993312022, "grad_norm": 1.4817950942973348, "learning_rate": 4.769188466715882e-06, "loss": 0.354, "step": 19117 }, { "epoch": 0.5632632617886658, "grad_norm": 1.573940388283528, "learning_rate": 4.768674776526762e-06, "loss": 0.5044, "step": 19118 }, { "epoch": 0.5632927242461294, "grad_norm": 1.5614500194103937, "learning_rate": 4.7681610887845285e-06, "loss": 0.4696, "step": 19119 }, { "epoch": 0.5633221867035929, "grad_norm": 1.4349142513233661, "learning_rate": 4.767647403494616e-06, "loss": 0.4375, "step": 19120 }, { "epoch": 0.5633516491610565, "grad_norm": 1.6294763386255966, "learning_rate": 4.76713372066246e-06, "loss": 0.5889, "step": 19121 }, { "epoch": 0.5633811116185201, "grad_norm": 1.468438440069888, "learning_rate": 4.766620040293493e-06, "loss": 0.4643, "step": 19122 }, { "epoch": 0.5634105740759837, "grad_norm": 1.8729626624777984, "learning_rate": 4.766106362393146e-06, "loss": 0.6332, "step": 19123 }, { "epoch": 0.5634400365334472, "grad_norm": 1.6115895370044366, "learning_rate": 4.7655926869668564e-06, "loss": 0.5379, "step": 19124 }, { "epoch": 0.5634694989909108, "grad_norm": 1.6614441843101695, "learning_rate": 4.765079014020055e-06, "loss": 0.4371, "step": 19125 }, { "epoch": 0.5634989614483744, "grad_norm": 1.510485769944554, "learning_rate": 4.7645653435581755e-06, "loss": 0.4621, "step": 19126 }, { "epoch": 0.563528423905838, "grad_norm": 1.4538097344123413, "learning_rate": 4.7640516755866525e-06, "loss": 0.5137, "step": 19127 }, { "epoch": 0.5635578863633015, "grad_norm": 1.631453971163005, "learning_rate": 4.763538010110919e-06, "loss": 0.6109, "step": 19128 }, { "epoch": 0.5635873488207651, "grad_norm": 1.5862769258839875, "learning_rate": 4.763024347136408e-06, "loss": 0.4735, "step": 19129 }, { "epoch": 0.5636168112782287, "grad_norm": 1.453989480441578, "learning_rate": 4.762510686668554e-06, "loss": 0.3767, "step": 19130 }, { "epoch": 0.5636462737356923, "grad_norm": 1.7261960466076474, "learning_rate": 4.761997028712789e-06, "loss": 0.4111, "step": 19131 }, { "epoch": 0.5636757361931559, "grad_norm": 1.3274449063324127, "learning_rate": 4.761483373274549e-06, "loss": 0.3288, "step": 19132 }, { "epoch": 0.5637051986506194, "grad_norm": 1.2461801339969412, "learning_rate": 4.760969720359263e-06, "loss": 0.2776, "step": 19133 }, { "epoch": 0.563734661108083, "grad_norm": 1.7011988814899714, "learning_rate": 4.760456069972367e-06, "loss": 0.4851, "step": 19134 }, { "epoch": 0.5637641235655466, "grad_norm": 1.5587076285083876, "learning_rate": 4.7599424221192935e-06, "loss": 0.5363, "step": 19135 }, { "epoch": 0.5637935860230102, "grad_norm": 1.5395052688418893, "learning_rate": 4.7594287768054766e-06, "loss": 0.3702, "step": 19136 }, { "epoch": 0.5638230484804737, "grad_norm": 1.4932331957921432, "learning_rate": 4.758915134036347e-06, "loss": 0.454, "step": 19137 }, { "epoch": 0.5638525109379373, "grad_norm": 1.7977883301464443, "learning_rate": 4.7584014938173405e-06, "loss": 0.504, "step": 19138 }, { "epoch": 0.5638819733954009, "grad_norm": 1.52020002358893, "learning_rate": 4.7578878561538904e-06, "loss": 0.4554, "step": 19139 }, { "epoch": 0.5639114358528645, "grad_norm": 1.5591098671500516, "learning_rate": 4.7573742210514276e-06, "loss": 0.6086, "step": 19140 }, { "epoch": 0.563940898310328, "grad_norm": 1.7312936846516853, "learning_rate": 4.756860588515386e-06, "loss": 0.588, "step": 19141 }, { "epoch": 0.5639703607677916, "grad_norm": 1.4577628623119128, "learning_rate": 4.756346958551202e-06, "loss": 0.4709, "step": 19142 }, { "epoch": 0.5639998232252552, "grad_norm": 1.7545937769818056, "learning_rate": 4.755833331164303e-06, "loss": 0.4848, "step": 19143 }, { "epoch": 0.5640292856827188, "grad_norm": 1.462295781395658, "learning_rate": 4.7553197063601255e-06, "loss": 0.4564, "step": 19144 }, { "epoch": 0.5640587481401824, "grad_norm": 1.675841841904168, "learning_rate": 4.754806084144101e-06, "loss": 0.4837, "step": 19145 }, { "epoch": 0.5640882105976459, "grad_norm": 1.5726130323814822, "learning_rate": 4.754292464521662e-06, "loss": 0.4635, "step": 19146 }, { "epoch": 0.5641176730551095, "grad_norm": 1.6081390678604879, "learning_rate": 4.7537788474982436e-06, "loss": 0.382, "step": 19147 }, { "epoch": 0.5641471355125731, "grad_norm": 1.3478981036761333, "learning_rate": 4.753265233079278e-06, "loss": 0.3181, "step": 19148 }, { "epoch": 0.5641765979700367, "grad_norm": 1.7887205068009433, "learning_rate": 4.752751621270197e-06, "loss": 0.4986, "step": 19149 }, { "epoch": 0.5642060604275002, "grad_norm": 1.3085714873125085, "learning_rate": 4.752238012076433e-06, "loss": 0.3515, "step": 19150 }, { "epoch": 0.5642355228849638, "grad_norm": 1.2411504754319516, "learning_rate": 4.751724405503421e-06, "loss": 0.329, "step": 19151 }, { "epoch": 0.5642649853424274, "grad_norm": 1.533140608939899, "learning_rate": 4.751210801556595e-06, "loss": 0.4619, "step": 19152 }, { "epoch": 0.564294447799891, "grad_norm": 1.5257504973343572, "learning_rate": 4.750697200241382e-06, "loss": 0.4542, "step": 19153 }, { "epoch": 0.5643239102573545, "grad_norm": 1.4771890039948448, "learning_rate": 4.750183601563218e-06, "loss": 0.3865, "step": 19154 }, { "epoch": 0.5643533727148181, "grad_norm": 1.75148989570211, "learning_rate": 4.749670005527537e-06, "loss": 0.5405, "step": 19155 }, { "epoch": 0.5643828351722817, "grad_norm": 1.6811588269651867, "learning_rate": 4.749156412139769e-06, "loss": 0.5762, "step": 19156 }, { "epoch": 0.5644122976297453, "grad_norm": 1.4845598642254418, "learning_rate": 4.74864282140535e-06, "loss": 0.4698, "step": 19157 }, { "epoch": 0.5644417600872089, "grad_norm": 1.6363707107963223, "learning_rate": 4.748129233329708e-06, "loss": 0.4826, "step": 19158 }, { "epoch": 0.5644712225446724, "grad_norm": 1.55431292140195, "learning_rate": 4.74761564791828e-06, "loss": 0.3693, "step": 19159 }, { "epoch": 0.564500685002136, "grad_norm": 1.7036284313795003, "learning_rate": 4.747102065176497e-06, "loss": 0.4397, "step": 19160 }, { "epoch": 0.5645301474595996, "grad_norm": 1.6253685412458783, "learning_rate": 4.746588485109791e-06, "loss": 0.5377, "step": 19161 }, { "epoch": 0.5645596099170632, "grad_norm": 1.534437306681407, "learning_rate": 4.746074907723596e-06, "loss": 0.444, "step": 19162 }, { "epoch": 0.5645890723745267, "grad_norm": 1.4741629893481598, "learning_rate": 4.745561333023341e-06, "loss": 0.5132, "step": 19163 }, { "epoch": 0.5646185348319903, "grad_norm": 1.4368636349771837, "learning_rate": 4.745047761014462e-06, "loss": 0.3351, "step": 19164 }, { "epoch": 0.5646479972894539, "grad_norm": 1.6164038066464588, "learning_rate": 4.744534191702389e-06, "loss": 0.5546, "step": 19165 }, { "epoch": 0.5646774597469175, "grad_norm": 1.5021478542572952, "learning_rate": 4.7440206250925565e-06, "loss": 0.4347, "step": 19166 }, { "epoch": 0.564706922204381, "grad_norm": 1.4244603211396023, "learning_rate": 4.743507061190394e-06, "loss": 0.3566, "step": 19167 }, { "epoch": 0.5647363846618446, "grad_norm": 1.5287981851728805, "learning_rate": 4.742993500001337e-06, "loss": 0.4374, "step": 19168 }, { "epoch": 0.5647658471193082, "grad_norm": 1.5082036580789462, "learning_rate": 4.742479941530817e-06, "loss": 0.4961, "step": 19169 }, { "epoch": 0.5647953095767718, "grad_norm": 1.6387834595382333, "learning_rate": 4.7419663857842644e-06, "loss": 0.3692, "step": 19170 }, { "epoch": 0.5648247720342354, "grad_norm": 1.511170673084772, "learning_rate": 4.741452832767113e-06, "loss": 0.4259, "step": 19171 }, { "epoch": 0.5648542344916989, "grad_norm": 1.438103928052385, "learning_rate": 4.7409392824847965e-06, "loss": 0.4439, "step": 19172 }, { "epoch": 0.5648836969491625, "grad_norm": 1.5135136565111404, "learning_rate": 4.740425734942744e-06, "loss": 0.5405, "step": 19173 }, { "epoch": 0.5649131594066261, "grad_norm": 1.3852250554650702, "learning_rate": 4.739912190146389e-06, "loss": 0.4347, "step": 19174 }, { "epoch": 0.5649426218640897, "grad_norm": 1.5491445219522537, "learning_rate": 4.739398648101164e-06, "loss": 0.3385, "step": 19175 }, { "epoch": 0.5649720843215532, "grad_norm": 1.6125574743768216, "learning_rate": 4.7388851088125e-06, "loss": 0.4304, "step": 19176 }, { "epoch": 0.5650015467790168, "grad_norm": 1.5119258779390061, "learning_rate": 4.73837157228583e-06, "loss": 0.5238, "step": 19177 }, { "epoch": 0.5650310092364804, "grad_norm": 1.6603375117075356, "learning_rate": 4.737858038526586e-06, "loss": 0.488, "step": 19178 }, { "epoch": 0.565060471693944, "grad_norm": 1.4026537486141317, "learning_rate": 4.737344507540199e-06, "loss": 0.45, "step": 19179 }, { "epoch": 0.5650899341514075, "grad_norm": 1.6105492059665334, "learning_rate": 4.736830979332101e-06, "loss": 0.4063, "step": 19180 }, { "epoch": 0.5651193966088711, "grad_norm": 1.5634341130761946, "learning_rate": 4.7363174539077275e-06, "loss": 0.4096, "step": 19181 }, { "epoch": 0.5651488590663347, "grad_norm": 1.4325610134476288, "learning_rate": 4.735803931272507e-06, "loss": 0.362, "step": 19182 }, { "epoch": 0.5651783215237983, "grad_norm": 1.30432505268946, "learning_rate": 4.735290411431871e-06, "loss": 0.4144, "step": 19183 }, { "epoch": 0.5652077839812619, "grad_norm": 1.500107143095946, "learning_rate": 4.7347768943912515e-06, "loss": 0.3545, "step": 19184 }, { "epoch": 0.5652372464387254, "grad_norm": 1.6216355174029868, "learning_rate": 4.734263380156083e-06, "loss": 0.4345, "step": 19185 }, { "epoch": 0.565266708896189, "grad_norm": 1.5709666407163403, "learning_rate": 4.733749868731794e-06, "loss": 0.4933, "step": 19186 }, { "epoch": 0.5652961713536526, "grad_norm": 1.4550563974563215, "learning_rate": 4.733236360123817e-06, "loss": 0.3773, "step": 19187 }, { "epoch": 0.5653256338111162, "grad_norm": 1.355805277742935, "learning_rate": 4.732722854337587e-06, "loss": 0.4385, "step": 19188 }, { "epoch": 0.5653550962685797, "grad_norm": 1.4324011730092845, "learning_rate": 4.732209351378531e-06, "loss": 0.526, "step": 19189 }, { "epoch": 0.5653845587260433, "grad_norm": 1.634580006573278, "learning_rate": 4.731695851252085e-06, "loss": 0.5226, "step": 19190 }, { "epoch": 0.5654140211835069, "grad_norm": 1.4995681530840683, "learning_rate": 4.731182353963677e-06, "loss": 0.4067, "step": 19191 }, { "epoch": 0.5654434836409705, "grad_norm": 1.3540341086512535, "learning_rate": 4.7306688595187425e-06, "loss": 0.4148, "step": 19192 }, { "epoch": 0.565472946098434, "grad_norm": 1.3767748390765548, "learning_rate": 4.730155367922707e-06, "loss": 0.4075, "step": 19193 }, { "epoch": 0.5655024085558976, "grad_norm": 1.6028627115003307, "learning_rate": 4.729641879181009e-06, "loss": 0.5799, "step": 19194 }, { "epoch": 0.5655318710133612, "grad_norm": 1.7000819010228392, "learning_rate": 4.729128393299075e-06, "loss": 0.4367, "step": 19195 }, { "epoch": 0.5655613334708248, "grad_norm": 1.6129674950650164, "learning_rate": 4.728614910282337e-06, "loss": 0.4851, "step": 19196 }, { "epoch": 0.5655907959282884, "grad_norm": 1.3689392480506306, "learning_rate": 4.728101430136231e-06, "loss": 0.4134, "step": 19197 }, { "epoch": 0.5656202583857519, "grad_norm": 1.6620031090271423, "learning_rate": 4.7275879528661824e-06, "loss": 0.532, "step": 19198 }, { "epoch": 0.5656497208432155, "grad_norm": 1.5205122949024232, "learning_rate": 4.7270744784776275e-06, "loss": 0.4846, "step": 19199 }, { "epoch": 0.5656791833006791, "grad_norm": 1.507943766232531, "learning_rate": 4.7265610069759946e-06, "loss": 0.4832, "step": 19200 }, { "epoch": 0.5657086457581427, "grad_norm": 1.4690072685781892, "learning_rate": 4.726047538366716e-06, "loss": 0.4093, "step": 19201 }, { "epoch": 0.5657381082156062, "grad_norm": 1.6119545264273076, "learning_rate": 4.725534072655224e-06, "loss": 0.3795, "step": 19202 }, { "epoch": 0.5657675706730698, "grad_norm": 1.7903292728694071, "learning_rate": 4.7250206098469484e-06, "loss": 0.5639, "step": 19203 }, { "epoch": 0.5657970331305334, "grad_norm": 1.5042926610840786, "learning_rate": 4.724507149947321e-06, "loss": 0.3317, "step": 19204 }, { "epoch": 0.565826495587997, "grad_norm": 1.6096530127065594, "learning_rate": 4.723993692961772e-06, "loss": 0.4117, "step": 19205 }, { "epoch": 0.5658559580454605, "grad_norm": 1.4778380637259039, "learning_rate": 4.723480238895734e-06, "loss": 0.3873, "step": 19206 }, { "epoch": 0.5658854205029241, "grad_norm": 1.3728108369864027, "learning_rate": 4.722966787754638e-06, "loss": 0.435, "step": 19207 }, { "epoch": 0.5659148829603877, "grad_norm": 1.528501475363346, "learning_rate": 4.722453339543915e-06, "loss": 0.4445, "step": 19208 }, { "epoch": 0.5659443454178513, "grad_norm": 1.8397458760960892, "learning_rate": 4.721939894268996e-06, "loss": 0.4999, "step": 19209 }, { "epoch": 0.5659738078753149, "grad_norm": 1.5928889623873064, "learning_rate": 4.721426451935312e-06, "loss": 0.4597, "step": 19210 }, { "epoch": 0.5660032703327784, "grad_norm": 1.3147278735558925, "learning_rate": 4.720913012548295e-06, "loss": 0.3257, "step": 19211 }, { "epoch": 0.566032732790242, "grad_norm": 1.4869966062695799, "learning_rate": 4.720399576113375e-06, "loss": 0.3534, "step": 19212 }, { "epoch": 0.5660621952477056, "grad_norm": 1.5247359929614475, "learning_rate": 4.719886142635982e-06, "loss": 0.446, "step": 19213 }, { "epoch": 0.5660916577051692, "grad_norm": 1.6192587498938542, "learning_rate": 4.719372712121549e-06, "loss": 0.5062, "step": 19214 }, { "epoch": 0.5661211201626327, "grad_norm": 1.6028805632812761, "learning_rate": 4.718859284575505e-06, "loss": 0.4959, "step": 19215 }, { "epoch": 0.5661505826200963, "grad_norm": 1.7121343350542821, "learning_rate": 4.718345860003281e-06, "loss": 0.3967, "step": 19216 }, { "epoch": 0.5661800450775599, "grad_norm": 1.8297296623194668, "learning_rate": 4.71783243841031e-06, "loss": 0.6275, "step": 19217 }, { "epoch": 0.5662095075350235, "grad_norm": 1.5506100158355927, "learning_rate": 4.717319019802021e-06, "loss": 0.4392, "step": 19218 }, { "epoch": 0.566238969992487, "grad_norm": 1.4515327220046443, "learning_rate": 4.716805604183845e-06, "loss": 0.4787, "step": 19219 }, { "epoch": 0.5662684324499506, "grad_norm": 1.4567356275897747, "learning_rate": 4.716292191561215e-06, "loss": 0.2894, "step": 19220 }, { "epoch": 0.5662978949074142, "grad_norm": 1.3641270481158672, "learning_rate": 4.715778781939557e-06, "loss": 0.3435, "step": 19221 }, { "epoch": 0.5663273573648778, "grad_norm": 1.679282943397726, "learning_rate": 4.715265375324308e-06, "loss": 0.6175, "step": 19222 }, { "epoch": 0.5663568198223414, "grad_norm": 1.4924357577229463, "learning_rate": 4.714751971720891e-06, "loss": 0.4324, "step": 19223 }, { "epoch": 0.5663862822798049, "grad_norm": 1.4586824693141551, "learning_rate": 4.714238571134743e-06, "loss": 0.4326, "step": 19224 }, { "epoch": 0.5664157447372685, "grad_norm": 1.480984303130139, "learning_rate": 4.7137251735712916e-06, "loss": 0.4979, "step": 19225 }, { "epoch": 0.5664452071947321, "grad_norm": 1.5662792874097398, "learning_rate": 4.713211779035968e-06, "loss": 0.4214, "step": 19226 }, { "epoch": 0.5664746696521957, "grad_norm": 1.5450851309489873, "learning_rate": 4.712698387534203e-06, "loss": 0.497, "step": 19227 }, { "epoch": 0.5665041321096592, "grad_norm": 1.5978104717962138, "learning_rate": 4.712184999071427e-06, "loss": 0.3422, "step": 19228 }, { "epoch": 0.5665335945671228, "grad_norm": 1.7227122174421503, "learning_rate": 4.7116716136530696e-06, "loss": 0.5604, "step": 19229 }, { "epoch": 0.5665630570245864, "grad_norm": 1.597635263720119, "learning_rate": 4.711158231284563e-06, "loss": 0.593, "step": 19230 }, { "epoch": 0.56659251948205, "grad_norm": 1.461667453176616, "learning_rate": 4.7106448519713355e-06, "loss": 0.3906, "step": 19231 }, { "epoch": 0.5666219819395135, "grad_norm": 1.3805245470579224, "learning_rate": 4.710131475718821e-06, "loss": 0.4785, "step": 19232 }, { "epoch": 0.5666514443969771, "grad_norm": 1.531683161534089, "learning_rate": 4.709618102532446e-06, "loss": 0.563, "step": 19233 }, { "epoch": 0.5666809068544407, "grad_norm": 1.4533018440867378, "learning_rate": 4.709104732417642e-06, "loss": 0.4541, "step": 19234 }, { "epoch": 0.5667103693119043, "grad_norm": 1.3770449948769699, "learning_rate": 4.708591365379838e-06, "loss": 0.278, "step": 19235 }, { "epoch": 0.5667398317693679, "grad_norm": 1.625065796598366, "learning_rate": 4.7080780014244685e-06, "loss": 0.4585, "step": 19236 }, { "epoch": 0.5667692942268314, "grad_norm": 1.51941360505492, "learning_rate": 4.707564640556959e-06, "loss": 0.5157, "step": 19237 }, { "epoch": 0.566798756684295, "grad_norm": 1.5108678653437642, "learning_rate": 4.707051282782742e-06, "loss": 0.4582, "step": 19238 }, { "epoch": 0.5668282191417586, "grad_norm": 1.4020713650772194, "learning_rate": 4.706537928107248e-06, "loss": 0.4941, "step": 19239 }, { "epoch": 0.5668576815992222, "grad_norm": 1.6043270833555265, "learning_rate": 4.706024576535905e-06, "loss": 0.4203, "step": 19240 }, { "epoch": 0.5668871440566857, "grad_norm": 1.3706514766237803, "learning_rate": 4.705511228074146e-06, "loss": 0.4343, "step": 19241 }, { "epoch": 0.5669166065141493, "grad_norm": 1.7078839192098068, "learning_rate": 4.704997882727399e-06, "loss": 0.4807, "step": 19242 }, { "epoch": 0.5669460689716129, "grad_norm": 1.3808967486828854, "learning_rate": 4.704484540501094e-06, "loss": 0.3496, "step": 19243 }, { "epoch": 0.5669755314290765, "grad_norm": 1.453848316915907, "learning_rate": 4.703971201400662e-06, "loss": 0.5493, "step": 19244 }, { "epoch": 0.5670049938865401, "grad_norm": 1.4685634461528383, "learning_rate": 4.703457865431533e-06, "loss": 0.5685, "step": 19245 }, { "epoch": 0.5670344563440036, "grad_norm": 1.4366787422885885, "learning_rate": 4.702944532599135e-06, "loss": 0.4006, "step": 19246 }, { "epoch": 0.5670639188014672, "grad_norm": 1.5017672842645233, "learning_rate": 4.702431202908899e-06, "loss": 0.5163, "step": 19247 }, { "epoch": 0.5670933812589308, "grad_norm": 1.519652739533374, "learning_rate": 4.701917876366256e-06, "loss": 0.5026, "step": 19248 }, { "epoch": 0.5671228437163944, "grad_norm": 1.4587886501902303, "learning_rate": 4.701404552976634e-06, "loss": 0.5046, "step": 19249 }, { "epoch": 0.5671523061738579, "grad_norm": 1.5292238950386468, "learning_rate": 4.700891232745463e-06, "loss": 0.5278, "step": 19250 }, { "epoch": 0.5671817686313215, "grad_norm": 1.3382247851669078, "learning_rate": 4.700377915678176e-06, "loss": 0.4656, "step": 19251 }, { "epoch": 0.5672112310887851, "grad_norm": 1.459120394089133, "learning_rate": 4.6998646017802e-06, "loss": 0.4413, "step": 19252 }, { "epoch": 0.5672406935462487, "grad_norm": 1.4978283130645444, "learning_rate": 4.699351291056963e-06, "loss": 0.4726, "step": 19253 }, { "epoch": 0.5672701560037122, "grad_norm": 1.3877199848706747, "learning_rate": 4.6988379835138975e-06, "loss": 0.5052, "step": 19254 }, { "epoch": 0.5672996184611758, "grad_norm": 1.5514637767114081, "learning_rate": 4.698324679156431e-06, "loss": 0.4473, "step": 19255 }, { "epoch": 0.5673290809186394, "grad_norm": 1.466847098668529, "learning_rate": 4.697811377989994e-06, "loss": 0.4965, "step": 19256 }, { "epoch": 0.567358543376103, "grad_norm": 1.5969037553020895, "learning_rate": 4.697298080020017e-06, "loss": 0.4525, "step": 19257 }, { "epoch": 0.5673880058335666, "grad_norm": 1.4551677580668934, "learning_rate": 4.696784785251928e-06, "loss": 0.3399, "step": 19258 }, { "epoch": 0.5674174682910301, "grad_norm": 1.5375784183429069, "learning_rate": 4.696271493691156e-06, "loss": 0.3876, "step": 19259 }, { "epoch": 0.5674469307484937, "grad_norm": 1.4600592896043496, "learning_rate": 4.695758205343134e-06, "loss": 0.3365, "step": 19260 }, { "epoch": 0.5674763932059573, "grad_norm": 1.3845774819988823, "learning_rate": 4.695244920213288e-06, "loss": 0.4864, "step": 19261 }, { "epoch": 0.5675058556634209, "grad_norm": 1.480769259253373, "learning_rate": 4.694731638307049e-06, "loss": 0.4153, "step": 19262 }, { "epoch": 0.5675353181208844, "grad_norm": 1.537971259425075, "learning_rate": 4.694218359629845e-06, "loss": 0.4852, "step": 19263 }, { "epoch": 0.567564780578348, "grad_norm": 1.4854296578773765, "learning_rate": 4.693705084187107e-06, "loss": 0.4595, "step": 19264 }, { "epoch": 0.5675942430358116, "grad_norm": 2.0494773359480067, "learning_rate": 4.693191811984261e-06, "loss": 0.4832, "step": 19265 }, { "epoch": 0.5676237054932752, "grad_norm": 1.775096011298249, "learning_rate": 4.692678543026741e-06, "loss": 0.5346, "step": 19266 }, { "epoch": 0.5676531679507387, "grad_norm": 1.6032846872493642, "learning_rate": 4.692165277319972e-06, "loss": 0.4094, "step": 19267 }, { "epoch": 0.5676826304082023, "grad_norm": 1.4764218432813392, "learning_rate": 4.6916520148693846e-06, "loss": 0.4254, "step": 19268 }, { "epoch": 0.5677120928656659, "grad_norm": 1.6979871663666437, "learning_rate": 4.69113875568041e-06, "loss": 0.5374, "step": 19269 }, { "epoch": 0.5677415553231295, "grad_norm": 1.5407511661363782, "learning_rate": 4.690625499758474e-06, "loss": 0.4564, "step": 19270 }, { "epoch": 0.5677710177805931, "grad_norm": 1.4269427543489421, "learning_rate": 4.690112247109007e-06, "loss": 0.4542, "step": 19271 }, { "epoch": 0.5678004802380566, "grad_norm": 1.40557776298288, "learning_rate": 4.689598997737441e-06, "loss": 0.425, "step": 19272 }, { "epoch": 0.5678299426955202, "grad_norm": 1.7939998054564292, "learning_rate": 4.6890857516492e-06, "loss": 0.4017, "step": 19273 }, { "epoch": 0.5678594051529838, "grad_norm": 1.5540181502674333, "learning_rate": 4.688572508849715e-06, "loss": 0.4559, "step": 19274 }, { "epoch": 0.5678888676104474, "grad_norm": 1.461236095939524, "learning_rate": 4.688059269344415e-06, "loss": 0.406, "step": 19275 }, { "epoch": 0.5679183300679109, "grad_norm": 1.5662816646662368, "learning_rate": 4.68754603313873e-06, "loss": 0.5064, "step": 19276 }, { "epoch": 0.5679477925253745, "grad_norm": 1.5196051926305156, "learning_rate": 4.687032800238086e-06, "loss": 0.5118, "step": 19277 }, { "epoch": 0.5679772549828381, "grad_norm": 1.4499629949284696, "learning_rate": 4.686519570647916e-06, "loss": 0.4535, "step": 19278 }, { "epoch": 0.5680067174403017, "grad_norm": 1.5986554648871956, "learning_rate": 4.686006344373645e-06, "loss": 0.3935, "step": 19279 }, { "epoch": 0.5680361798977652, "grad_norm": 1.537931133915554, "learning_rate": 4.685493121420703e-06, "loss": 0.4763, "step": 19280 }, { "epoch": 0.5680656423552288, "grad_norm": 1.5561208803429358, "learning_rate": 4.684979901794519e-06, "loss": 0.401, "step": 19281 }, { "epoch": 0.5680951048126924, "grad_norm": 1.6642977354873676, "learning_rate": 4.6844666855005244e-06, "loss": 0.4981, "step": 19282 }, { "epoch": 0.568124567270156, "grad_norm": 1.4255451091548776, "learning_rate": 4.683953472544142e-06, "loss": 0.4096, "step": 19283 }, { "epoch": 0.5681540297276196, "grad_norm": 1.6377599665173193, "learning_rate": 4.683440262930803e-06, "loss": 0.5092, "step": 19284 }, { "epoch": 0.5681834921850831, "grad_norm": 1.43263572934093, "learning_rate": 4.682927056665939e-06, "loss": 0.4496, "step": 19285 }, { "epoch": 0.5682129546425467, "grad_norm": 1.3765979268892314, "learning_rate": 4.682413853754974e-06, "loss": 0.3992, "step": 19286 }, { "epoch": 0.5682424171000103, "grad_norm": 1.6688800907093628, "learning_rate": 4.681900654203339e-06, "loss": 0.4853, "step": 19287 }, { "epoch": 0.5682718795574739, "grad_norm": 1.583900408459962, "learning_rate": 4.681387458016461e-06, "loss": 0.4755, "step": 19288 }, { "epoch": 0.5683013420149374, "grad_norm": 1.7457257629871188, "learning_rate": 4.68087426519977e-06, "loss": 0.5056, "step": 19289 }, { "epoch": 0.568330804472401, "grad_norm": 1.4619540101279027, "learning_rate": 4.680361075758695e-06, "loss": 0.441, "step": 19290 }, { "epoch": 0.5683602669298646, "grad_norm": 1.383761921003938, "learning_rate": 4.679847889698662e-06, "loss": 0.418, "step": 19291 }, { "epoch": 0.5683897293873282, "grad_norm": 1.5436424198104883, "learning_rate": 4.679334707025102e-06, "loss": 0.6042, "step": 19292 }, { "epoch": 0.5684191918447917, "grad_norm": 1.4455618131726444, "learning_rate": 4.67882152774344e-06, "loss": 0.3401, "step": 19293 }, { "epoch": 0.5684486543022553, "grad_norm": 1.4285291961705886, "learning_rate": 4.678308351859107e-06, "loss": 0.541, "step": 19294 }, { "epoch": 0.5684781167597189, "grad_norm": 1.5236853432038664, "learning_rate": 4.677795179377529e-06, "loss": 0.5651, "step": 19295 }, { "epoch": 0.5685075792171825, "grad_norm": 1.5807298797314009, "learning_rate": 4.677282010304135e-06, "loss": 0.5028, "step": 19296 }, { "epoch": 0.5685370416746461, "grad_norm": 1.5066198763355605, "learning_rate": 4.676768844644356e-06, "loss": 0.5133, "step": 19297 }, { "epoch": 0.5685665041321096, "grad_norm": 1.4215979654119464, "learning_rate": 4.676255682403615e-06, "loss": 0.4513, "step": 19298 }, { "epoch": 0.5685959665895732, "grad_norm": 1.429134100447389, "learning_rate": 4.675742523587345e-06, "loss": 0.4186, "step": 19299 }, { "epoch": 0.5686254290470368, "grad_norm": 1.451125593333145, "learning_rate": 4.675229368200969e-06, "loss": 0.4075, "step": 19300 }, { "epoch": 0.5686548915045004, "grad_norm": 1.6383215166577396, "learning_rate": 4.6747162162499195e-06, "loss": 0.5471, "step": 19301 }, { "epoch": 0.5686843539619639, "grad_norm": 1.5088097722771998, "learning_rate": 4.674203067739624e-06, "loss": 0.4675, "step": 19302 }, { "epoch": 0.5687138164194275, "grad_norm": 1.507316555969735, "learning_rate": 4.673689922675508e-06, "loss": 0.4583, "step": 19303 }, { "epoch": 0.5687432788768911, "grad_norm": 1.242847256208907, "learning_rate": 4.6731767810630006e-06, "loss": 0.3372, "step": 19304 }, { "epoch": 0.5687727413343547, "grad_norm": 1.5402451307060612, "learning_rate": 4.672663642907528e-06, "loss": 0.4737, "step": 19305 }, { "epoch": 0.5688022037918182, "grad_norm": 1.4024178638516036, "learning_rate": 4.6721505082145225e-06, "loss": 0.4499, "step": 19306 }, { "epoch": 0.5688316662492818, "grad_norm": 1.4520065743264239, "learning_rate": 4.671637376989406e-06, "loss": 0.4275, "step": 19307 }, { "epoch": 0.5688611287067454, "grad_norm": 1.574528616315268, "learning_rate": 4.671124249237612e-06, "loss": 0.4864, "step": 19308 }, { "epoch": 0.568890591164209, "grad_norm": 1.534730395055091, "learning_rate": 4.670611124964564e-06, "loss": 0.336, "step": 19309 }, { "epoch": 0.5689200536216726, "grad_norm": 1.5144386546810398, "learning_rate": 4.67009800417569e-06, "loss": 0.3802, "step": 19310 }, { "epoch": 0.5689495160791361, "grad_norm": 1.5623772927246233, "learning_rate": 4.669584886876421e-06, "loss": 0.4602, "step": 19311 }, { "epoch": 0.5689789785365997, "grad_norm": 1.325399179789169, "learning_rate": 4.669071773072183e-06, "loss": 0.4024, "step": 19312 }, { "epoch": 0.5690084409940633, "grad_norm": 1.737059132882182, "learning_rate": 4.668558662768401e-06, "loss": 0.6411, "step": 19313 }, { "epoch": 0.5690379034515269, "grad_norm": 1.4264353664762863, "learning_rate": 4.668045555970504e-06, "loss": 0.4542, "step": 19314 }, { "epoch": 0.5690673659089904, "grad_norm": 1.7203336223019396, "learning_rate": 4.6675324526839225e-06, "loss": 0.3943, "step": 19315 }, { "epoch": 0.569096828366454, "grad_norm": 1.4226271735537128, "learning_rate": 4.667019352914079e-06, "loss": 0.3788, "step": 19316 }, { "epoch": 0.5691262908239176, "grad_norm": 1.3528653251442986, "learning_rate": 4.666506256666403e-06, "loss": 0.3747, "step": 19317 }, { "epoch": 0.5691557532813812, "grad_norm": 1.2998849048782963, "learning_rate": 4.6659931639463244e-06, "loss": 0.4124, "step": 19318 }, { "epoch": 0.5691852157388447, "grad_norm": 1.520158411620421, "learning_rate": 4.665480074759266e-06, "loss": 0.3659, "step": 19319 }, { "epoch": 0.5692146781963083, "grad_norm": 1.4174223786518532, "learning_rate": 4.664966989110659e-06, "loss": 0.418, "step": 19320 }, { "epoch": 0.5692441406537719, "grad_norm": 1.7234123590341428, "learning_rate": 4.664453907005931e-06, "loss": 0.6002, "step": 19321 }, { "epoch": 0.5692736031112355, "grad_norm": 1.5736685816148308, "learning_rate": 4.663940828450504e-06, "loss": 0.5494, "step": 19322 }, { "epoch": 0.5693030655686991, "grad_norm": 1.4770866641025688, "learning_rate": 4.663427753449809e-06, "loss": 0.4971, "step": 19323 }, { "epoch": 0.5693325280261626, "grad_norm": 1.5187256139727927, "learning_rate": 4.662914682009274e-06, "loss": 0.3684, "step": 19324 }, { "epoch": 0.5693619904836262, "grad_norm": 1.4666196270020102, "learning_rate": 4.662401614134323e-06, "loss": 0.3484, "step": 19325 }, { "epoch": 0.5693914529410898, "grad_norm": 1.4338520695426038, "learning_rate": 4.6618885498303854e-06, "loss": 0.3866, "step": 19326 }, { "epoch": 0.5694209153985534, "grad_norm": 1.4637492686465277, "learning_rate": 4.6613754891028885e-06, "loss": 0.4336, "step": 19327 }, { "epoch": 0.5694503778560169, "grad_norm": 1.7014211888741442, "learning_rate": 4.660862431957257e-06, "loss": 0.5412, "step": 19328 }, { "epoch": 0.5694798403134805, "grad_norm": 1.5090002932906317, "learning_rate": 4.6603493783989214e-06, "loss": 0.3837, "step": 19329 }, { "epoch": 0.5695093027709441, "grad_norm": 1.3666371385115943, "learning_rate": 4.659836328433305e-06, "loss": 0.4537, "step": 19330 }, { "epoch": 0.5695387652284077, "grad_norm": 1.594545517159008, "learning_rate": 4.659323282065838e-06, "loss": 0.3364, "step": 19331 }, { "epoch": 0.5695682276858712, "grad_norm": 1.6847195376695772, "learning_rate": 4.658810239301943e-06, "loss": 0.6113, "step": 19332 }, { "epoch": 0.5695976901433348, "grad_norm": 1.4381278732589537, "learning_rate": 4.658297200147052e-06, "loss": 0.4273, "step": 19333 }, { "epoch": 0.5696271526007984, "grad_norm": 1.3396694462491066, "learning_rate": 4.657784164606587e-06, "loss": 0.4679, "step": 19334 }, { "epoch": 0.569656615058262, "grad_norm": 1.4360267233207167, "learning_rate": 4.657271132685977e-06, "loss": 0.5609, "step": 19335 }, { "epoch": 0.5696860775157256, "grad_norm": 1.4475338829396496, "learning_rate": 4.656758104390649e-06, "loss": 0.3934, "step": 19336 }, { "epoch": 0.5697155399731891, "grad_norm": 1.5880620558143654, "learning_rate": 4.6562450797260285e-06, "loss": 0.4824, "step": 19337 }, { "epoch": 0.5697450024306527, "grad_norm": 1.6419272395721556, "learning_rate": 4.655732058697543e-06, "loss": 0.4843, "step": 19338 }, { "epoch": 0.5697744648881163, "grad_norm": 1.430329597798307, "learning_rate": 4.6552190413106195e-06, "loss": 0.4246, "step": 19339 }, { "epoch": 0.5698039273455799, "grad_norm": 1.7452548806937667, "learning_rate": 4.654706027570684e-06, "loss": 0.6433, "step": 19340 }, { "epoch": 0.5698333898030434, "grad_norm": 1.5608099863918987, "learning_rate": 4.6541930174831635e-06, "loss": 0.5275, "step": 19341 }, { "epoch": 0.569862852260507, "grad_norm": 1.476074086456405, "learning_rate": 4.653680011053484e-06, "loss": 0.4404, "step": 19342 }, { "epoch": 0.5698923147179706, "grad_norm": 1.5637172255992555, "learning_rate": 4.65316700828707e-06, "loss": 0.4719, "step": 19343 }, { "epoch": 0.5699217771754342, "grad_norm": 1.3779543644638443, "learning_rate": 4.65265400918935e-06, "loss": 0.38, "step": 19344 }, { "epoch": 0.5699512396328977, "grad_norm": 1.6878352039018383, "learning_rate": 4.652141013765751e-06, "loss": 0.6671, "step": 19345 }, { "epoch": 0.5699807020903613, "grad_norm": 1.5546775415768965, "learning_rate": 4.651628022021697e-06, "loss": 0.4137, "step": 19346 }, { "epoch": 0.5700101645478249, "grad_norm": 1.42657856422176, "learning_rate": 4.651115033962616e-06, "loss": 0.352, "step": 19347 }, { "epoch": 0.5700396270052885, "grad_norm": 1.2637938932272887, "learning_rate": 4.6506020495939345e-06, "loss": 0.3139, "step": 19348 }, { "epoch": 0.5700690894627521, "grad_norm": 1.3436098120415492, "learning_rate": 4.6500890689210775e-06, "loss": 0.4464, "step": 19349 }, { "epoch": 0.5700985519202156, "grad_norm": 1.5385174546192462, "learning_rate": 4.6495760919494715e-06, "loss": 0.5772, "step": 19350 }, { "epoch": 0.5701280143776792, "grad_norm": 1.4300476755187863, "learning_rate": 4.649063118684544e-06, "loss": 0.4055, "step": 19351 }, { "epoch": 0.5701574768351428, "grad_norm": 1.4601090421762382, "learning_rate": 4.6485501491317206e-06, "loss": 0.5463, "step": 19352 }, { "epoch": 0.5701869392926064, "grad_norm": 1.4323887252730825, "learning_rate": 4.648037183296424e-06, "loss": 0.3828, "step": 19353 }, { "epoch": 0.5702164017500699, "grad_norm": 1.4190267009459934, "learning_rate": 4.6475242211840845e-06, "loss": 0.4161, "step": 19354 }, { "epoch": 0.5702458642075335, "grad_norm": 1.4288074174982135, "learning_rate": 4.647011262800126e-06, "loss": 0.4034, "step": 19355 }, { "epoch": 0.5702753266649971, "grad_norm": 1.2798753075090914, "learning_rate": 4.646498308149974e-06, "loss": 0.4314, "step": 19356 }, { "epoch": 0.5703047891224607, "grad_norm": 1.5478654273552248, "learning_rate": 4.645985357239056e-06, "loss": 0.4001, "step": 19357 }, { "epoch": 0.5703342515799242, "grad_norm": 1.2800689233424258, "learning_rate": 4.645472410072797e-06, "loss": 0.2127, "step": 19358 }, { "epoch": 0.5703637140373878, "grad_norm": 1.6585464524754654, "learning_rate": 4.644959466656621e-06, "loss": 0.5538, "step": 19359 }, { "epoch": 0.5703931764948514, "grad_norm": 1.1925618842808354, "learning_rate": 4.644446526995958e-06, "loss": 0.2599, "step": 19360 }, { "epoch": 0.570422638952315, "grad_norm": 1.575659630692004, "learning_rate": 4.643933591096232e-06, "loss": 0.4015, "step": 19361 }, { "epoch": 0.5704521014097786, "grad_norm": 1.320989644917951, "learning_rate": 4.643420658962866e-06, "loss": 0.3798, "step": 19362 }, { "epoch": 0.5704815638672421, "grad_norm": 1.6075816910338494, "learning_rate": 4.6429077306012874e-06, "loss": 0.4426, "step": 19363 }, { "epoch": 0.5705110263247057, "grad_norm": 1.4816736293566688, "learning_rate": 4.6423948060169235e-06, "loss": 0.4494, "step": 19364 }, { "epoch": 0.5705404887821693, "grad_norm": 1.5245270991337383, "learning_rate": 4.641881885215197e-06, "loss": 0.4459, "step": 19365 }, { "epoch": 0.5705699512396329, "grad_norm": 1.2726455603369302, "learning_rate": 4.641368968201536e-06, "loss": 0.3223, "step": 19366 }, { "epoch": 0.5705994136970964, "grad_norm": 1.4778151351772473, "learning_rate": 4.640856054981363e-06, "loss": 0.4347, "step": 19367 }, { "epoch": 0.57062887615456, "grad_norm": 1.6563182438824104, "learning_rate": 4.640343145560105e-06, "loss": 0.5832, "step": 19368 }, { "epoch": 0.5706583386120236, "grad_norm": 1.5613092275169036, "learning_rate": 4.63983023994319e-06, "loss": 0.409, "step": 19369 }, { "epoch": 0.5706878010694872, "grad_norm": 1.5336888098020256, "learning_rate": 4.639317338136039e-06, "loss": 0.4461, "step": 19370 }, { "epoch": 0.5707172635269507, "grad_norm": 1.76762401536284, "learning_rate": 4.6388044401440815e-06, "loss": 0.6061, "step": 19371 }, { "epoch": 0.5707467259844143, "grad_norm": 1.5416545251908647, "learning_rate": 4.638291545972739e-06, "loss": 0.417, "step": 19372 }, { "epoch": 0.5707761884418779, "grad_norm": 1.6196714004970452, "learning_rate": 4.637778655627439e-06, "loss": 0.4464, "step": 19373 }, { "epoch": 0.5708056508993415, "grad_norm": 1.5684070958813263, "learning_rate": 4.637265769113604e-06, "loss": 0.4944, "step": 19374 }, { "epoch": 0.5708351133568051, "grad_norm": 1.5403271687746474, "learning_rate": 4.636752886436663e-06, "loss": 0.2849, "step": 19375 }, { "epoch": 0.5708645758142686, "grad_norm": 1.7473442441995888, "learning_rate": 4.6362400076020386e-06, "loss": 0.3598, "step": 19376 }, { "epoch": 0.5708940382717322, "grad_norm": 1.6168000048534692, "learning_rate": 4.635727132615156e-06, "loss": 0.3905, "step": 19377 }, { "epoch": 0.5709235007291958, "grad_norm": 1.737785627664695, "learning_rate": 4.635214261481443e-06, "loss": 0.5779, "step": 19378 }, { "epoch": 0.5709529631866594, "grad_norm": 1.841834010756817, "learning_rate": 4.63470139420632e-06, "loss": 0.5772, "step": 19379 }, { "epoch": 0.5709824256441229, "grad_norm": 1.4199686812335324, "learning_rate": 4.634188530795214e-06, "loss": 0.3697, "step": 19380 }, { "epoch": 0.5710118881015865, "grad_norm": 1.5619921269811343, "learning_rate": 4.633675671253554e-06, "loss": 0.3658, "step": 19381 }, { "epoch": 0.5710413505590501, "grad_norm": 1.5735644177903976, "learning_rate": 4.633162815586759e-06, "loss": 0.4281, "step": 19382 }, { "epoch": 0.5710708130165137, "grad_norm": 1.5272966422504122, "learning_rate": 4.632649963800254e-06, "loss": 0.4794, "step": 19383 }, { "epoch": 0.5711002754739772, "grad_norm": 1.515699591810123, "learning_rate": 4.632137115899467e-06, "loss": 0.4999, "step": 19384 }, { "epoch": 0.5711297379314408, "grad_norm": 1.402848863720847, "learning_rate": 4.631624271889823e-06, "loss": 0.3025, "step": 19385 }, { "epoch": 0.5711592003889044, "grad_norm": 1.5783023700467362, "learning_rate": 4.631111431776743e-06, "loss": 0.4145, "step": 19386 }, { "epoch": 0.571188662846368, "grad_norm": 1.5531622920182788, "learning_rate": 4.630598595565655e-06, "loss": 0.4462, "step": 19387 }, { "epoch": 0.5712181253038316, "grad_norm": 1.3666016791937465, "learning_rate": 4.6300857632619814e-06, "loss": 0.4126, "step": 19388 }, { "epoch": 0.5712475877612951, "grad_norm": 1.3232213113984927, "learning_rate": 4.629572934871148e-06, "loss": 0.4232, "step": 19389 }, { "epoch": 0.5712770502187587, "grad_norm": 1.514918374833182, "learning_rate": 4.6290601103985805e-06, "loss": 0.4762, "step": 19390 }, { "epoch": 0.5713065126762223, "grad_norm": 1.4308079585579088, "learning_rate": 4.628547289849703e-06, "loss": 0.4932, "step": 19391 }, { "epoch": 0.571335975133686, "grad_norm": 1.396917204267119, "learning_rate": 4.628034473229937e-06, "loss": 0.4061, "step": 19392 }, { "epoch": 0.5713654375911494, "grad_norm": 1.3145222900742741, "learning_rate": 4.6275216605447095e-06, "loss": 0.4183, "step": 19393 }, { "epoch": 0.571394900048613, "grad_norm": 1.469011931926742, "learning_rate": 4.627008851799444e-06, "loss": 0.4224, "step": 19394 }, { "epoch": 0.5714243625060766, "grad_norm": 1.4127532428231964, "learning_rate": 4.626496046999566e-06, "loss": 0.477, "step": 19395 }, { "epoch": 0.5714538249635402, "grad_norm": 1.474305483760763, "learning_rate": 4.625983246150499e-06, "loss": 0.4121, "step": 19396 }, { "epoch": 0.5714832874210037, "grad_norm": 1.3770923880086419, "learning_rate": 4.625470449257666e-06, "loss": 0.3948, "step": 19397 }, { "epoch": 0.5715127498784673, "grad_norm": 1.4426247654550879, "learning_rate": 4.6249576563264926e-06, "loss": 0.3131, "step": 19398 }, { "epoch": 0.5715422123359309, "grad_norm": 1.4482101261062446, "learning_rate": 4.624444867362403e-06, "loss": 0.4173, "step": 19399 }, { "epoch": 0.5715716747933945, "grad_norm": 1.4522627072113679, "learning_rate": 4.6239320823708226e-06, "loss": 0.4218, "step": 19400 }, { "epoch": 0.5716011372508581, "grad_norm": 1.5435429178405946, "learning_rate": 4.6234193013571736e-06, "loss": 0.429, "step": 19401 }, { "epoch": 0.5716305997083216, "grad_norm": 1.5769939456357247, "learning_rate": 4.622906524326879e-06, "loss": 0.4656, "step": 19402 }, { "epoch": 0.5716600621657852, "grad_norm": 1.5897436316657974, "learning_rate": 4.622393751285366e-06, "loss": 0.3366, "step": 19403 }, { "epoch": 0.5716895246232488, "grad_norm": 1.5770672053763253, "learning_rate": 4.621880982238056e-06, "loss": 0.4549, "step": 19404 }, { "epoch": 0.5717189870807124, "grad_norm": 1.6755016820804425, "learning_rate": 4.621368217190373e-06, "loss": 0.4765, "step": 19405 }, { "epoch": 0.5717484495381759, "grad_norm": 1.5010298359556422, "learning_rate": 4.620855456147742e-06, "loss": 0.4082, "step": 19406 }, { "epoch": 0.5717779119956395, "grad_norm": 1.569458436151581, "learning_rate": 4.6203426991155866e-06, "loss": 0.3892, "step": 19407 }, { "epoch": 0.5718073744531031, "grad_norm": 1.527022959569735, "learning_rate": 4.619829946099331e-06, "loss": 0.4167, "step": 19408 }, { "epoch": 0.5718368369105667, "grad_norm": 1.3571133172815806, "learning_rate": 4.619317197104397e-06, "loss": 0.32, "step": 19409 }, { "epoch": 0.5718662993680302, "grad_norm": 1.3906675711199528, "learning_rate": 4.618804452136211e-06, "loss": 0.4178, "step": 19410 }, { "epoch": 0.5718957618254938, "grad_norm": 1.485307208009538, "learning_rate": 4.618291711200197e-06, "loss": 0.4331, "step": 19411 }, { "epoch": 0.5719252242829574, "grad_norm": 1.415059499743479, "learning_rate": 4.617778974301775e-06, "loss": 0.2779, "step": 19412 }, { "epoch": 0.571954686740421, "grad_norm": 1.588461771324221, "learning_rate": 4.61726624144637e-06, "loss": 0.5147, "step": 19413 }, { "epoch": 0.5719841491978847, "grad_norm": 1.4275320371054214, "learning_rate": 4.616753512639406e-06, "loss": 0.4029, "step": 19414 }, { "epoch": 0.5720136116553481, "grad_norm": 1.7349537701409108, "learning_rate": 4.616240787886307e-06, "loss": 0.4669, "step": 19415 }, { "epoch": 0.5720430741128117, "grad_norm": 1.448162525727087, "learning_rate": 4.615728067192495e-06, "loss": 0.5464, "step": 19416 }, { "epoch": 0.5720725365702753, "grad_norm": 1.6905376552802205, "learning_rate": 4.615215350563397e-06, "loss": 0.4271, "step": 19417 }, { "epoch": 0.572101999027739, "grad_norm": 1.4691723475729686, "learning_rate": 4.614702638004431e-06, "loss": 0.3883, "step": 19418 }, { "epoch": 0.5721314614852024, "grad_norm": 1.7418550432392048, "learning_rate": 4.6141899295210235e-06, "loss": 0.4458, "step": 19419 }, { "epoch": 0.572160923942666, "grad_norm": 1.5206193925489389, "learning_rate": 4.613677225118599e-06, "loss": 0.4997, "step": 19420 }, { "epoch": 0.5721903864001296, "grad_norm": 1.6131127452255016, "learning_rate": 4.61316452480258e-06, "loss": 0.4626, "step": 19421 }, { "epoch": 0.5722198488575932, "grad_norm": 1.5471133419401946, "learning_rate": 4.612651828578386e-06, "loss": 0.4606, "step": 19422 }, { "epoch": 0.5722493113150567, "grad_norm": 1.458886124776979, "learning_rate": 4.612139136451443e-06, "loss": 0.375, "step": 19423 }, { "epoch": 0.5722787737725203, "grad_norm": 1.4958215172356375, "learning_rate": 4.611626448427176e-06, "loss": 0.5446, "step": 19424 }, { "epoch": 0.572308236229984, "grad_norm": 1.612427062665022, "learning_rate": 4.611113764511004e-06, "loss": 0.4912, "step": 19425 }, { "epoch": 0.5723376986874475, "grad_norm": 1.461643728170967, "learning_rate": 4.610601084708352e-06, "loss": 0.3971, "step": 19426 }, { "epoch": 0.5723671611449112, "grad_norm": 1.5069201533523386, "learning_rate": 4.610088409024645e-06, "loss": 0.4968, "step": 19427 }, { "epoch": 0.5723966236023746, "grad_norm": 1.576219073923954, "learning_rate": 4.6095757374653026e-06, "loss": 0.344, "step": 19428 }, { "epoch": 0.5724260860598382, "grad_norm": 1.53429097203533, "learning_rate": 4.60906307003575e-06, "loss": 0.5254, "step": 19429 }, { "epoch": 0.5724555485173018, "grad_norm": 1.3884335048361849, "learning_rate": 4.608550406741408e-06, "loss": 0.3472, "step": 19430 }, { "epoch": 0.5724850109747655, "grad_norm": 1.5790635026070414, "learning_rate": 4.608037747587702e-06, "loss": 0.3442, "step": 19431 }, { "epoch": 0.5725144734322289, "grad_norm": 1.3499911657999955, "learning_rate": 4.607525092580053e-06, "loss": 0.4273, "step": 19432 }, { "epoch": 0.5725439358896925, "grad_norm": 1.392536417643447, "learning_rate": 4.6070124417238835e-06, "loss": 0.3852, "step": 19433 }, { "epoch": 0.5725733983471561, "grad_norm": 1.3962990748771906, "learning_rate": 4.606499795024616e-06, "loss": 0.4003, "step": 19434 }, { "epoch": 0.5726028608046198, "grad_norm": 1.452259808896927, "learning_rate": 4.605987152487673e-06, "loss": 0.524, "step": 19435 }, { "epoch": 0.5726323232620832, "grad_norm": 1.5949151613932793, "learning_rate": 4.6054745141184795e-06, "loss": 0.5676, "step": 19436 }, { "epoch": 0.5726617857195468, "grad_norm": 1.4205385794880883, "learning_rate": 4.6049618799224556e-06, "loss": 0.3754, "step": 19437 }, { "epoch": 0.5726912481770104, "grad_norm": 1.472620930136243, "learning_rate": 4.604449249905024e-06, "loss": 0.3688, "step": 19438 }, { "epoch": 0.572720710634474, "grad_norm": 1.6770178749235403, "learning_rate": 4.603936624071609e-06, "loss": 0.4529, "step": 19439 }, { "epoch": 0.5727501730919377, "grad_norm": 1.621371478208963, "learning_rate": 4.603424002427631e-06, "loss": 0.4659, "step": 19440 }, { "epoch": 0.5727796355494011, "grad_norm": 1.5929489436183077, "learning_rate": 4.602911384978515e-06, "loss": 0.3276, "step": 19441 }, { "epoch": 0.5728090980068647, "grad_norm": 1.465734572604941, "learning_rate": 4.602398771729679e-06, "loss": 0.418, "step": 19442 }, { "epoch": 0.5728385604643284, "grad_norm": 1.5809731107839295, "learning_rate": 4.601886162686548e-06, "loss": 0.4179, "step": 19443 }, { "epoch": 0.572868022921792, "grad_norm": 1.2910983293143805, "learning_rate": 4.601373557854543e-06, "loss": 0.3667, "step": 19444 }, { "epoch": 0.5728974853792554, "grad_norm": 1.4004414363435795, "learning_rate": 4.600860957239089e-06, "loss": 0.5117, "step": 19445 }, { "epoch": 0.572926947836719, "grad_norm": 1.4173975301345285, "learning_rate": 4.600348360845605e-06, "loss": 0.4563, "step": 19446 }, { "epoch": 0.5729564102941826, "grad_norm": 1.491975030730532, "learning_rate": 4.599835768679513e-06, "loss": 0.5407, "step": 19447 }, { "epoch": 0.5729858727516463, "grad_norm": 1.421412081412009, "learning_rate": 4.5993231807462385e-06, "loss": 0.3145, "step": 19448 }, { "epoch": 0.5730153352091097, "grad_norm": 1.3693907363112572, "learning_rate": 4.5988105970512e-06, "loss": 0.4016, "step": 19449 }, { "epoch": 0.5730447976665733, "grad_norm": 1.5722555879910944, "learning_rate": 4.598298017599822e-06, "loss": 0.5035, "step": 19450 }, { "epoch": 0.573074260124037, "grad_norm": 1.3704787207213154, "learning_rate": 4.597785442397525e-06, "loss": 0.4845, "step": 19451 }, { "epoch": 0.5731037225815006, "grad_norm": 1.422856347497938, "learning_rate": 4.597272871449731e-06, "loss": 0.3596, "step": 19452 }, { "epoch": 0.5731331850389642, "grad_norm": 1.3709413625955196, "learning_rate": 4.59676030476186e-06, "loss": 0.3902, "step": 19453 }, { "epoch": 0.5731626474964276, "grad_norm": 1.7322423843269057, "learning_rate": 4.596247742339337e-06, "loss": 0.4416, "step": 19454 }, { "epoch": 0.5731921099538912, "grad_norm": 1.387300287966232, "learning_rate": 4.595735184187583e-06, "loss": 0.3853, "step": 19455 }, { "epoch": 0.5732215724113549, "grad_norm": 1.5734665206341465, "learning_rate": 4.595222630312017e-06, "loss": 0.5147, "step": 19456 }, { "epoch": 0.5732510348688185, "grad_norm": 1.5985509116104584, "learning_rate": 4.594710080718064e-06, "loss": 0.4505, "step": 19457 }, { "epoch": 0.573280497326282, "grad_norm": 1.5552597520151425, "learning_rate": 4.594197535411144e-06, "loss": 0.5592, "step": 19458 }, { "epoch": 0.5733099597837455, "grad_norm": 1.5378099969780332, "learning_rate": 4.593684994396678e-06, "loss": 0.5475, "step": 19459 }, { "epoch": 0.5733394222412092, "grad_norm": 1.4705903584828284, "learning_rate": 4.593172457680089e-06, "loss": 0.4476, "step": 19460 }, { "epoch": 0.5733688846986728, "grad_norm": 1.4129209698670837, "learning_rate": 4.5926599252668004e-06, "loss": 0.3285, "step": 19461 }, { "epoch": 0.5733983471561362, "grad_norm": 1.6467218691012684, "learning_rate": 4.5921473971622275e-06, "loss": 0.5557, "step": 19462 }, { "epoch": 0.5734278096135998, "grad_norm": 1.4330858458204176, "learning_rate": 4.591634873371797e-06, "loss": 0.2986, "step": 19463 }, { "epoch": 0.5734572720710635, "grad_norm": 1.4696433627307104, "learning_rate": 4.591122353900927e-06, "loss": 0.4612, "step": 19464 }, { "epoch": 0.573486734528527, "grad_norm": 1.6138733314953482, "learning_rate": 4.59060983875504e-06, "loss": 0.5581, "step": 19465 }, { "epoch": 0.5735161969859907, "grad_norm": 1.5607849328977557, "learning_rate": 4.590097327939559e-06, "loss": 0.3842, "step": 19466 }, { "epoch": 0.5735456594434541, "grad_norm": 1.7370625621263704, "learning_rate": 4.589584821459902e-06, "loss": 0.553, "step": 19467 }, { "epoch": 0.5735751219009178, "grad_norm": 1.4205438550061644, "learning_rate": 4.589072319321491e-06, "loss": 0.4091, "step": 19468 }, { "epoch": 0.5736045843583814, "grad_norm": 1.6369074621541102, "learning_rate": 4.58855982152975e-06, "loss": 0.3995, "step": 19469 }, { "epoch": 0.573634046815845, "grad_norm": 1.3732942812077134, "learning_rate": 4.588047328090096e-06, "loss": 0.3602, "step": 19470 }, { "epoch": 0.5736635092733084, "grad_norm": 1.709360403076855, "learning_rate": 4.587534839007954e-06, "loss": 0.4981, "step": 19471 }, { "epoch": 0.573692971730772, "grad_norm": 1.400925726301481, "learning_rate": 4.587022354288741e-06, "loss": 0.3453, "step": 19472 }, { "epoch": 0.5737224341882357, "grad_norm": 1.5688530460674959, "learning_rate": 4.58650987393788e-06, "loss": 0.5655, "step": 19473 }, { "epoch": 0.5737518966456993, "grad_norm": 1.8649726865428666, "learning_rate": 4.585997397960792e-06, "loss": 0.5629, "step": 19474 }, { "epoch": 0.5737813591031627, "grad_norm": 1.2780884444637775, "learning_rate": 4.585484926362898e-06, "loss": 0.3329, "step": 19475 }, { "epoch": 0.5738108215606263, "grad_norm": 1.5278953362470578, "learning_rate": 4.584972459149616e-06, "loss": 0.3979, "step": 19476 }, { "epoch": 0.57384028401809, "grad_norm": 1.4243399375007983, "learning_rate": 4.5844599963263695e-06, "loss": 0.4826, "step": 19477 }, { "epoch": 0.5738697464755536, "grad_norm": 1.9093265043657128, "learning_rate": 4.583947537898579e-06, "loss": 0.4822, "step": 19478 }, { "epoch": 0.5738992089330172, "grad_norm": 1.447670782161511, "learning_rate": 4.583435083871664e-06, "loss": 0.487, "step": 19479 }, { "epoch": 0.5739286713904806, "grad_norm": 1.6019515143632934, "learning_rate": 4.582922634251046e-06, "loss": 0.4004, "step": 19480 }, { "epoch": 0.5739581338479443, "grad_norm": 1.5724671961844756, "learning_rate": 4.582410189042147e-06, "loss": 0.4598, "step": 19481 }, { "epoch": 0.5739875963054079, "grad_norm": 1.4204012413041884, "learning_rate": 4.5818977482503855e-06, "loss": 0.4299, "step": 19482 }, { "epoch": 0.5740170587628715, "grad_norm": 1.4942248497333999, "learning_rate": 4.58138531188118e-06, "loss": 0.4708, "step": 19483 }, { "epoch": 0.574046521220335, "grad_norm": 1.2887928666942374, "learning_rate": 4.580872879939955e-06, "loss": 0.3232, "step": 19484 }, { "epoch": 0.5740759836777986, "grad_norm": 1.6095599925860609, "learning_rate": 4.580360452432127e-06, "loss": 0.4667, "step": 19485 }, { "epoch": 0.5741054461352622, "grad_norm": 1.3160393534432464, "learning_rate": 4.579848029363119e-06, "loss": 0.3833, "step": 19486 }, { "epoch": 0.5741349085927258, "grad_norm": 1.351179440766221, "learning_rate": 4.5793356107383516e-06, "loss": 0.323, "step": 19487 }, { "epoch": 0.5741643710501892, "grad_norm": 1.4168203686133074, "learning_rate": 4.578823196563242e-06, "loss": 0.3903, "step": 19488 }, { "epoch": 0.5741938335076529, "grad_norm": 1.516457624590713, "learning_rate": 4.5783107868432135e-06, "loss": 0.5225, "step": 19489 }, { "epoch": 0.5742232959651165, "grad_norm": 1.3650863005013756, "learning_rate": 4.577798381583685e-06, "loss": 0.312, "step": 19490 }, { "epoch": 0.5742527584225801, "grad_norm": 1.5629971003107908, "learning_rate": 4.577285980790078e-06, "loss": 0.4093, "step": 19491 }, { "epoch": 0.5742822208800437, "grad_norm": 1.8060571634050004, "learning_rate": 4.576773584467809e-06, "loss": 0.6431, "step": 19492 }, { "epoch": 0.5743116833375072, "grad_norm": 1.7052399252643062, "learning_rate": 4.5762611926223e-06, "loss": 0.5214, "step": 19493 }, { "epoch": 0.5743411457949708, "grad_norm": 1.7438269392231047, "learning_rate": 4.575748805258972e-06, "loss": 0.5453, "step": 19494 }, { "epoch": 0.5743706082524344, "grad_norm": 1.3820404045162715, "learning_rate": 4.575236422383243e-06, "loss": 0.3073, "step": 19495 }, { "epoch": 0.574400070709898, "grad_norm": 1.6287244415849116, "learning_rate": 4.5747240440005345e-06, "loss": 0.4844, "step": 19496 }, { "epoch": 0.5744295331673615, "grad_norm": 1.3800768080080439, "learning_rate": 4.574211670116264e-06, "loss": 0.3648, "step": 19497 }, { "epoch": 0.574458995624825, "grad_norm": 1.5033058674207296, "learning_rate": 4.5736993007358535e-06, "loss": 0.4037, "step": 19498 }, { "epoch": 0.5744884580822887, "grad_norm": 1.5735620650869433, "learning_rate": 4.573186935864723e-06, "loss": 0.4595, "step": 19499 }, { "epoch": 0.5745179205397523, "grad_norm": 1.2644049021154322, "learning_rate": 4.572674575508289e-06, "loss": 0.3231, "step": 19500 }, { "epoch": 0.5745473829972157, "grad_norm": 1.3829638967309623, "learning_rate": 4.572162219671975e-06, "loss": 0.3544, "step": 19501 }, { "epoch": 0.5745768454546794, "grad_norm": 1.3782260963834028, "learning_rate": 4.571649868361198e-06, "loss": 0.4352, "step": 19502 }, { "epoch": 0.574606307912143, "grad_norm": 1.4795002519411802, "learning_rate": 4.571137521581378e-06, "loss": 0.4772, "step": 19503 }, { "epoch": 0.5746357703696066, "grad_norm": 1.5816961608319828, "learning_rate": 4.570625179337935e-06, "loss": 0.5226, "step": 19504 }, { "epoch": 0.5746652328270702, "grad_norm": 1.592133210626407, "learning_rate": 4.570112841636288e-06, "loss": 0.5247, "step": 19505 }, { "epoch": 0.5746946952845337, "grad_norm": 1.5215067252624175, "learning_rate": 4.569600508481855e-06, "loss": 0.4756, "step": 19506 }, { "epoch": 0.5747241577419973, "grad_norm": 1.5202140406047604, "learning_rate": 4.569088179880057e-06, "loss": 0.4241, "step": 19507 }, { "epoch": 0.5747536201994609, "grad_norm": 1.629154051004557, "learning_rate": 4.5685758558363145e-06, "loss": 0.3024, "step": 19508 }, { "epoch": 0.5747830826569245, "grad_norm": 1.499471263607866, "learning_rate": 4.568063536356043e-06, "loss": 0.5906, "step": 19509 }, { "epoch": 0.574812545114388, "grad_norm": 1.3651414337310392, "learning_rate": 4.5675512214446644e-06, "loss": 0.3295, "step": 19510 }, { "epoch": 0.5748420075718516, "grad_norm": 1.3775610471787854, "learning_rate": 4.567038911107599e-06, "loss": 0.32, "step": 19511 }, { "epoch": 0.5748714700293152, "grad_norm": 1.3908893305194368, "learning_rate": 4.5665266053502635e-06, "loss": 0.4015, "step": 19512 }, { "epoch": 0.5749009324867788, "grad_norm": 1.6744657854741163, "learning_rate": 4.566014304178076e-06, "loss": 0.6484, "step": 19513 }, { "epoch": 0.5749303949442423, "grad_norm": 1.6160382601393999, "learning_rate": 4.565502007596458e-06, "loss": 0.575, "step": 19514 }, { "epoch": 0.5749598574017059, "grad_norm": 1.6277575104721993, "learning_rate": 4.564989715610826e-06, "loss": 0.4239, "step": 19515 }, { "epoch": 0.5749893198591695, "grad_norm": 1.440291062709392, "learning_rate": 4.564477428226602e-06, "loss": 0.4726, "step": 19516 }, { "epoch": 0.5750187823166331, "grad_norm": 1.4192258539352693, "learning_rate": 4.563965145449202e-06, "loss": 0.495, "step": 19517 }, { "epoch": 0.5750482447740967, "grad_norm": 1.5895573729125467, "learning_rate": 4.5634528672840455e-06, "loss": 0.5129, "step": 19518 }, { "epoch": 0.5750777072315602, "grad_norm": 1.3842092207442116, "learning_rate": 4.562940593736551e-06, "loss": 0.3858, "step": 19519 }, { "epoch": 0.5751071696890238, "grad_norm": 1.5086577791681637, "learning_rate": 4.56242832481214e-06, "loss": 0.5232, "step": 19520 }, { "epoch": 0.5751366321464874, "grad_norm": 1.417677265139104, "learning_rate": 4.561916060516229e-06, "loss": 0.3803, "step": 19521 }, { "epoch": 0.575166094603951, "grad_norm": 1.3663159693726876, "learning_rate": 4.561403800854235e-06, "loss": 0.3043, "step": 19522 }, { "epoch": 0.5751955570614145, "grad_norm": 1.575782123849386, "learning_rate": 4.560891545831576e-06, "loss": 0.4017, "step": 19523 }, { "epoch": 0.5752250195188781, "grad_norm": 2.0543666125344924, "learning_rate": 4.5603792954536755e-06, "loss": 0.7741, "step": 19524 }, { "epoch": 0.5752544819763417, "grad_norm": 1.3705868016091765, "learning_rate": 4.559867049725947e-06, "loss": 0.502, "step": 19525 }, { "epoch": 0.5752839444338053, "grad_norm": 1.6169444534210695, "learning_rate": 4.559354808653811e-06, "loss": 0.5876, "step": 19526 }, { "epoch": 0.5753134068912688, "grad_norm": 1.5853631952257, "learning_rate": 4.558842572242686e-06, "loss": 0.3622, "step": 19527 }, { "epoch": 0.5753428693487324, "grad_norm": 1.4373942856816788, "learning_rate": 4.55833034049799e-06, "loss": 0.4675, "step": 19528 }, { "epoch": 0.575372331806196, "grad_norm": 1.488581898465425, "learning_rate": 4.55781811342514e-06, "loss": 0.4121, "step": 19529 }, { "epoch": 0.5754017942636596, "grad_norm": 1.6578180650434475, "learning_rate": 4.557305891029556e-06, "loss": 0.5746, "step": 19530 }, { "epoch": 0.5754312567211232, "grad_norm": 1.5562991852132795, "learning_rate": 4.556793673316657e-06, "loss": 0.3767, "step": 19531 }, { "epoch": 0.5754607191785867, "grad_norm": 1.6983884374624783, "learning_rate": 4.556281460291857e-06, "loss": 0.606, "step": 19532 }, { "epoch": 0.5754901816360503, "grad_norm": 1.5634829964105104, "learning_rate": 4.555769251960578e-06, "loss": 0.4813, "step": 19533 }, { "epoch": 0.5755196440935139, "grad_norm": 1.7139848116303977, "learning_rate": 4.555257048328236e-06, "loss": 0.6483, "step": 19534 }, { "epoch": 0.5755491065509775, "grad_norm": 1.4803923293116326, "learning_rate": 4.554744849400248e-06, "loss": 0.3635, "step": 19535 }, { "epoch": 0.575578569008441, "grad_norm": 1.5291249504767406, "learning_rate": 4.554232655182035e-06, "loss": 0.5127, "step": 19536 }, { "epoch": 0.5756080314659046, "grad_norm": 1.7181793080397392, "learning_rate": 4.553720465679012e-06, "loss": 0.5159, "step": 19537 }, { "epoch": 0.5756374939233682, "grad_norm": 1.3894535999843571, "learning_rate": 4.553208280896599e-06, "loss": 0.4315, "step": 19538 }, { "epoch": 0.5756669563808318, "grad_norm": 1.4806563875132897, "learning_rate": 4.552696100840211e-06, "loss": 0.4286, "step": 19539 }, { "epoch": 0.5756964188382953, "grad_norm": 1.7639965999418619, "learning_rate": 4.552183925515268e-06, "loss": 0.4281, "step": 19540 }, { "epoch": 0.5757258812957589, "grad_norm": 1.2638912632836041, "learning_rate": 4.551671754927189e-06, "loss": 0.3973, "step": 19541 }, { "epoch": 0.5757553437532225, "grad_norm": 1.576517344234086, "learning_rate": 4.551159589081388e-06, "loss": 0.367, "step": 19542 }, { "epoch": 0.5757848062106861, "grad_norm": 1.44285429017538, "learning_rate": 4.5506474279832844e-06, "loss": 0.3693, "step": 19543 }, { "epoch": 0.5758142686681497, "grad_norm": 1.4560677583601678, "learning_rate": 4.550135271638294e-06, "loss": 0.4848, "step": 19544 }, { "epoch": 0.5758437311256132, "grad_norm": 1.6197353348978751, "learning_rate": 4.549623120051837e-06, "loss": 0.4999, "step": 19545 }, { "epoch": 0.5758731935830768, "grad_norm": 1.7732464993485868, "learning_rate": 4.549110973229328e-06, "loss": 0.605, "step": 19546 }, { "epoch": 0.5759026560405404, "grad_norm": 1.7446218794718498, "learning_rate": 4.548598831176187e-06, "loss": 0.5554, "step": 19547 }, { "epoch": 0.575932118498004, "grad_norm": 1.671038530629076, "learning_rate": 4.54808669389783e-06, "loss": 0.6241, "step": 19548 }, { "epoch": 0.5759615809554675, "grad_norm": 1.601844760213441, "learning_rate": 4.547574561399673e-06, "loss": 0.4658, "step": 19549 }, { "epoch": 0.5759910434129311, "grad_norm": 1.5106200178783902, "learning_rate": 4.5470624336871365e-06, "loss": 0.2859, "step": 19550 }, { "epoch": 0.5760205058703947, "grad_norm": 1.428619211648161, "learning_rate": 4.546550310765636e-06, "loss": 0.4627, "step": 19551 }, { "epoch": 0.5760499683278583, "grad_norm": 1.651815046352995, "learning_rate": 4.546038192640586e-06, "loss": 0.3732, "step": 19552 }, { "epoch": 0.5760794307853218, "grad_norm": 1.5989232793964523, "learning_rate": 4.5455260793174055e-06, "loss": 0.6617, "step": 19553 }, { "epoch": 0.5761088932427854, "grad_norm": 1.5256751810219895, "learning_rate": 4.545013970801513e-06, "loss": 0.5198, "step": 19554 }, { "epoch": 0.576138355700249, "grad_norm": 1.6152992854754749, "learning_rate": 4.544501867098322e-06, "loss": 0.4675, "step": 19555 }, { "epoch": 0.5761678181577126, "grad_norm": 1.5066822421700763, "learning_rate": 4.543989768213253e-06, "loss": 0.5124, "step": 19556 }, { "epoch": 0.5761972806151762, "grad_norm": 1.6418958232240106, "learning_rate": 4.543477674151721e-06, "loss": 0.3859, "step": 19557 }, { "epoch": 0.5762267430726397, "grad_norm": 1.3992352953090186, "learning_rate": 4.542965584919143e-06, "loss": 0.4105, "step": 19558 }, { "epoch": 0.5762562055301033, "grad_norm": 1.5772572794914592, "learning_rate": 4.542453500520936e-06, "loss": 0.5184, "step": 19559 }, { "epoch": 0.5762856679875669, "grad_norm": 1.4291593464280525, "learning_rate": 4.541941420962515e-06, "loss": 0.4006, "step": 19560 }, { "epoch": 0.5763151304450305, "grad_norm": 1.510854936092946, "learning_rate": 4.541429346249301e-06, "loss": 0.5481, "step": 19561 }, { "epoch": 0.576344592902494, "grad_norm": 1.3852470281148608, "learning_rate": 4.540917276386706e-06, "loss": 0.2671, "step": 19562 }, { "epoch": 0.5763740553599576, "grad_norm": 1.6481798293392949, "learning_rate": 4.5404052113801485e-06, "loss": 0.5094, "step": 19563 }, { "epoch": 0.5764035178174212, "grad_norm": 1.3886811616909513, "learning_rate": 4.539893151235044e-06, "loss": 0.4379, "step": 19564 }, { "epoch": 0.5764329802748848, "grad_norm": 1.6072772245515845, "learning_rate": 4.539381095956808e-06, "loss": 0.5364, "step": 19565 }, { "epoch": 0.5764624427323483, "grad_norm": 1.5043400684992871, "learning_rate": 4.538869045550861e-06, "loss": 0.527, "step": 19566 }, { "epoch": 0.5764919051898119, "grad_norm": 1.3344857760393036, "learning_rate": 4.538357000022615e-06, "loss": 0.4287, "step": 19567 }, { "epoch": 0.5765213676472755, "grad_norm": 1.3546193475030768, "learning_rate": 4.537844959377489e-06, "loss": 0.3745, "step": 19568 }, { "epoch": 0.5765508301047391, "grad_norm": 1.5106793874033477, "learning_rate": 4.537332923620898e-06, "loss": 0.4689, "step": 19569 }, { "epoch": 0.5765802925622027, "grad_norm": 1.659533324500614, "learning_rate": 4.5368208927582574e-06, "loss": 0.4206, "step": 19570 }, { "epoch": 0.5766097550196662, "grad_norm": 1.4733339407376351, "learning_rate": 4.536308866794987e-06, "loss": 0.3204, "step": 19571 }, { "epoch": 0.5766392174771298, "grad_norm": 1.5888275862160033, "learning_rate": 4.535796845736499e-06, "loss": 0.4319, "step": 19572 }, { "epoch": 0.5766686799345934, "grad_norm": 1.3372050564891098, "learning_rate": 4.535284829588209e-06, "loss": 0.3582, "step": 19573 }, { "epoch": 0.576698142392057, "grad_norm": 1.343648774426934, "learning_rate": 4.534772818355534e-06, "loss": 0.3772, "step": 19574 }, { "epoch": 0.5767276048495205, "grad_norm": 1.362697620603603, "learning_rate": 4.534260812043892e-06, "loss": 0.3268, "step": 19575 }, { "epoch": 0.5767570673069841, "grad_norm": 1.4247654141789305, "learning_rate": 4.533748810658696e-06, "loss": 0.3683, "step": 19576 }, { "epoch": 0.5767865297644477, "grad_norm": 1.6446463044331705, "learning_rate": 4.533236814205363e-06, "loss": 0.4731, "step": 19577 }, { "epoch": 0.5768159922219113, "grad_norm": 1.4510847734031749, "learning_rate": 4.53272482268931e-06, "loss": 0.4233, "step": 19578 }, { "epoch": 0.5768454546793748, "grad_norm": 1.4582648296123144, "learning_rate": 4.53221283611595e-06, "loss": 0.4542, "step": 19579 }, { "epoch": 0.5768749171368384, "grad_norm": 1.4757972718833041, "learning_rate": 4.5317008544907015e-06, "loss": 0.3893, "step": 19580 }, { "epoch": 0.576904379594302, "grad_norm": 1.5519263841676005, "learning_rate": 4.5311888778189785e-06, "loss": 0.3074, "step": 19581 }, { "epoch": 0.5769338420517656, "grad_norm": 1.447255563306142, "learning_rate": 4.530676906106196e-06, "loss": 0.4487, "step": 19582 }, { "epoch": 0.5769633045092292, "grad_norm": 1.3806047522187226, "learning_rate": 4.530164939357771e-06, "loss": 0.5149, "step": 19583 }, { "epoch": 0.5769927669666927, "grad_norm": 1.50044404549413, "learning_rate": 4.529652977579117e-06, "loss": 0.479, "step": 19584 }, { "epoch": 0.5770222294241563, "grad_norm": 1.5945674352095653, "learning_rate": 4.529141020775651e-06, "loss": 0.5118, "step": 19585 }, { "epoch": 0.5770516918816199, "grad_norm": 1.7686325454928176, "learning_rate": 4.528629068952787e-06, "loss": 0.5601, "step": 19586 }, { "epoch": 0.5770811543390835, "grad_norm": 1.4563860314281534, "learning_rate": 4.528117122115942e-06, "loss": 0.393, "step": 19587 }, { "epoch": 0.577110616796547, "grad_norm": 1.4941108510897423, "learning_rate": 4.52760518027053e-06, "loss": 0.4232, "step": 19588 }, { "epoch": 0.5771400792540106, "grad_norm": 1.5800432404134657, "learning_rate": 4.527093243421965e-06, "loss": 0.56, "step": 19589 }, { "epoch": 0.5771695417114742, "grad_norm": 1.3434378532361684, "learning_rate": 4.526581311575666e-06, "loss": 0.3217, "step": 19590 }, { "epoch": 0.5771990041689378, "grad_norm": 1.5334238119132573, "learning_rate": 4.526069384737045e-06, "loss": 0.3261, "step": 19591 }, { "epoch": 0.5772284666264013, "grad_norm": 1.7507465357834469, "learning_rate": 4.525557462911517e-06, "loss": 0.5718, "step": 19592 }, { "epoch": 0.5772579290838649, "grad_norm": 1.4563859767016507, "learning_rate": 4.525045546104498e-06, "loss": 0.4777, "step": 19593 }, { "epoch": 0.5772873915413285, "grad_norm": 1.3485202344483622, "learning_rate": 4.524533634321403e-06, "loss": 0.3045, "step": 19594 }, { "epoch": 0.5773168539987921, "grad_norm": 1.2379284894185079, "learning_rate": 4.524021727567644e-06, "loss": 0.2989, "step": 19595 }, { "epoch": 0.5773463164562557, "grad_norm": 2.057263339551273, "learning_rate": 4.52350982584864e-06, "loss": 0.4611, "step": 19596 }, { "epoch": 0.5773757789137192, "grad_norm": 1.6066220394232316, "learning_rate": 4.522997929169803e-06, "loss": 0.5369, "step": 19597 }, { "epoch": 0.5774052413711828, "grad_norm": 1.4608159721753515, "learning_rate": 4.522486037536548e-06, "loss": 0.3619, "step": 19598 }, { "epoch": 0.5774347038286464, "grad_norm": 1.5342426394229762, "learning_rate": 4.521974150954292e-06, "loss": 0.4823, "step": 19599 }, { "epoch": 0.57746416628611, "grad_norm": 1.3636350337020366, "learning_rate": 4.521462269428446e-06, "loss": 0.4153, "step": 19600 }, { "epoch": 0.5774936287435735, "grad_norm": 1.6736819593132568, "learning_rate": 4.520950392964429e-06, "loss": 0.503, "step": 19601 }, { "epoch": 0.5775230912010371, "grad_norm": 1.8017080660490574, "learning_rate": 4.520438521567651e-06, "loss": 0.5131, "step": 19602 }, { "epoch": 0.5775525536585007, "grad_norm": 1.598265258901109, "learning_rate": 4.519926655243528e-06, "loss": 0.4639, "step": 19603 }, { "epoch": 0.5775820161159643, "grad_norm": 1.5635949926557438, "learning_rate": 4.5194147939974745e-06, "loss": 0.4642, "step": 19604 }, { "epoch": 0.5776114785734278, "grad_norm": 1.5243888108435681, "learning_rate": 4.518902937834905e-06, "loss": 0.3996, "step": 19605 }, { "epoch": 0.5776409410308914, "grad_norm": 1.6475322464498874, "learning_rate": 4.5183910867612335e-06, "loss": 0.5611, "step": 19606 }, { "epoch": 0.577670403488355, "grad_norm": 1.593205833394765, "learning_rate": 4.517879240781874e-06, "loss": 0.6562, "step": 19607 }, { "epoch": 0.5776998659458186, "grad_norm": 1.626486163741182, "learning_rate": 4.517367399902242e-06, "loss": 0.4686, "step": 19608 }, { "epoch": 0.5777293284032822, "grad_norm": 1.5527745817588707, "learning_rate": 4.516855564127749e-06, "loss": 0.5165, "step": 19609 }, { "epoch": 0.5777587908607457, "grad_norm": 1.3690374260210796, "learning_rate": 4.516343733463811e-06, "loss": 0.5152, "step": 19610 }, { "epoch": 0.5777882533182093, "grad_norm": 1.5408313845232227, "learning_rate": 4.515831907915843e-06, "loss": 0.3548, "step": 19611 }, { "epoch": 0.5778177157756729, "grad_norm": 1.4512805190366669, "learning_rate": 4.5153200874892576e-06, "loss": 0.4232, "step": 19612 }, { "epoch": 0.5778471782331365, "grad_norm": 1.6440389989367314, "learning_rate": 4.514808272189467e-06, "loss": 0.5255, "step": 19613 }, { "epoch": 0.5778766406906, "grad_norm": 1.4979970555550253, "learning_rate": 4.514296462021887e-06, "loss": 0.3162, "step": 19614 }, { "epoch": 0.5779061031480636, "grad_norm": 1.5102620227143428, "learning_rate": 4.513784656991932e-06, "loss": 0.4047, "step": 19615 }, { "epoch": 0.5779355656055272, "grad_norm": 1.6323958163286871, "learning_rate": 4.513272857105013e-06, "loss": 0.5047, "step": 19616 }, { "epoch": 0.5779650280629908, "grad_norm": 1.6134303287912248, "learning_rate": 4.5127610623665465e-06, "loss": 0.5203, "step": 19617 }, { "epoch": 0.5779944905204543, "grad_norm": 1.3825741272119427, "learning_rate": 4.512249272781945e-06, "loss": 0.3202, "step": 19618 }, { "epoch": 0.5780239529779179, "grad_norm": 1.5659534118409857, "learning_rate": 4.511737488356621e-06, "loss": 0.4073, "step": 19619 }, { "epoch": 0.5780534154353815, "grad_norm": 1.446562581465815, "learning_rate": 4.51122570909599e-06, "loss": 0.4216, "step": 19620 }, { "epoch": 0.5780828778928451, "grad_norm": 1.4432046358164579, "learning_rate": 4.510713935005467e-06, "loss": 0.4325, "step": 19621 }, { "epoch": 0.5781123403503087, "grad_norm": 1.5621187391767588, "learning_rate": 4.510202166090459e-06, "loss": 0.4741, "step": 19622 }, { "epoch": 0.5781418028077722, "grad_norm": 1.6566446355813769, "learning_rate": 4.509690402356384e-06, "loss": 0.4849, "step": 19623 }, { "epoch": 0.5781712652652358, "grad_norm": 1.4340046049843163, "learning_rate": 4.509178643808656e-06, "loss": 0.4033, "step": 19624 }, { "epoch": 0.5782007277226994, "grad_norm": 1.5603514358355135, "learning_rate": 4.508666890452685e-06, "loss": 0.404, "step": 19625 }, { "epoch": 0.578230190180163, "grad_norm": 1.531485993959248, "learning_rate": 4.508155142293887e-06, "loss": 0.4992, "step": 19626 }, { "epoch": 0.5782596526376265, "grad_norm": 1.355530887089351, "learning_rate": 4.507643399337673e-06, "loss": 0.3142, "step": 19627 }, { "epoch": 0.5782891150950901, "grad_norm": 1.5604434737316295, "learning_rate": 4.507131661589458e-06, "loss": 0.4559, "step": 19628 }, { "epoch": 0.5783185775525537, "grad_norm": 1.3971398822701524, "learning_rate": 4.506619929054654e-06, "loss": 0.3541, "step": 19629 }, { "epoch": 0.5783480400100173, "grad_norm": 1.8999468748251391, "learning_rate": 4.506108201738673e-06, "loss": 0.3836, "step": 19630 }, { "epoch": 0.5783775024674808, "grad_norm": 1.4040972387100497, "learning_rate": 4.505596479646932e-06, "loss": 0.5076, "step": 19631 }, { "epoch": 0.5784069649249444, "grad_norm": 1.4061186030640804, "learning_rate": 4.505084762784838e-06, "loss": 0.3572, "step": 19632 }, { "epoch": 0.578436427382408, "grad_norm": 1.418482974721618, "learning_rate": 4.504573051157807e-06, "loss": 0.3379, "step": 19633 }, { "epoch": 0.5784658898398716, "grad_norm": 1.5249053535934542, "learning_rate": 4.504061344771251e-06, "loss": 0.4784, "step": 19634 }, { "epoch": 0.5784953522973352, "grad_norm": 1.290543013865053, "learning_rate": 4.503549643630583e-06, "loss": 0.3764, "step": 19635 }, { "epoch": 0.5785248147547987, "grad_norm": 1.4427443801490114, "learning_rate": 4.503037947741216e-06, "loss": 0.3668, "step": 19636 }, { "epoch": 0.5785542772122623, "grad_norm": 1.6415051643698564, "learning_rate": 4.502526257108561e-06, "loss": 0.5251, "step": 19637 }, { "epoch": 0.5785837396697259, "grad_norm": 1.4640294062008863, "learning_rate": 4.502014571738034e-06, "loss": 0.4293, "step": 19638 }, { "epoch": 0.5786132021271895, "grad_norm": 1.4141015138238318, "learning_rate": 4.501502891635043e-06, "loss": 0.3493, "step": 19639 }, { "epoch": 0.578642664584653, "grad_norm": 1.3787389120218168, "learning_rate": 4.500991216805002e-06, "loss": 0.3826, "step": 19640 }, { "epoch": 0.5786721270421166, "grad_norm": 1.572037624333252, "learning_rate": 4.500479547253327e-06, "loss": 0.3879, "step": 19641 }, { "epoch": 0.5787015894995802, "grad_norm": 1.453503746618881, "learning_rate": 4.499967882985426e-06, "loss": 0.4478, "step": 19642 }, { "epoch": 0.5787310519570438, "grad_norm": 1.6137268328617724, "learning_rate": 4.4994562240067105e-06, "loss": 0.5717, "step": 19643 }, { "epoch": 0.5787605144145073, "grad_norm": 1.3804287788648493, "learning_rate": 4.498944570322595e-06, "loss": 0.3317, "step": 19644 }, { "epoch": 0.5787899768719709, "grad_norm": 1.393188401279512, "learning_rate": 4.4984329219384925e-06, "loss": 0.4322, "step": 19645 }, { "epoch": 0.5788194393294345, "grad_norm": 1.57937625525173, "learning_rate": 4.497921278859812e-06, "loss": 0.443, "step": 19646 }, { "epoch": 0.5788489017868981, "grad_norm": 1.4111845920881871, "learning_rate": 4.497409641091968e-06, "loss": 0.4446, "step": 19647 }, { "epoch": 0.5788783642443617, "grad_norm": 1.5638266871211033, "learning_rate": 4.496898008640372e-06, "loss": 0.4228, "step": 19648 }, { "epoch": 0.5789078267018252, "grad_norm": 1.5803341695643602, "learning_rate": 4.496386381510435e-06, "loss": 0.4229, "step": 19649 }, { "epoch": 0.5789372891592888, "grad_norm": 1.5102326880644814, "learning_rate": 4.495874759707571e-06, "loss": 0.5302, "step": 19650 }, { "epoch": 0.5789667516167524, "grad_norm": 1.5326341987997285, "learning_rate": 4.4953631432371896e-06, "loss": 0.5089, "step": 19651 }, { "epoch": 0.578996214074216, "grad_norm": 1.6456661291824577, "learning_rate": 4.494851532104702e-06, "loss": 0.4792, "step": 19652 }, { "epoch": 0.5790256765316795, "grad_norm": 1.405686671233696, "learning_rate": 4.4943399263155194e-06, "loss": 0.4193, "step": 19653 }, { "epoch": 0.5790551389891431, "grad_norm": 1.5771071450442966, "learning_rate": 4.493828325875057e-06, "loss": 0.4437, "step": 19654 }, { "epoch": 0.5790846014466067, "grad_norm": 1.618447510710167, "learning_rate": 4.493316730788724e-06, "loss": 0.365, "step": 19655 }, { "epoch": 0.5791140639040703, "grad_norm": 1.650067992712027, "learning_rate": 4.492805141061932e-06, "loss": 0.5517, "step": 19656 }, { "epoch": 0.5791435263615338, "grad_norm": 1.3899587933356268, "learning_rate": 4.492293556700094e-06, "loss": 0.3599, "step": 19657 }, { "epoch": 0.5791729888189974, "grad_norm": 1.3107005502554903, "learning_rate": 4.491781977708618e-06, "loss": 0.3755, "step": 19658 }, { "epoch": 0.579202451276461, "grad_norm": 1.4768072414075053, "learning_rate": 4.491270404092918e-06, "loss": 0.5054, "step": 19659 }, { "epoch": 0.5792319137339246, "grad_norm": 1.4875105558681636, "learning_rate": 4.490758835858407e-06, "loss": 0.5512, "step": 19660 }, { "epoch": 0.5792613761913882, "grad_norm": 1.4687838801524662, "learning_rate": 4.490247273010491e-06, "loss": 0.4498, "step": 19661 }, { "epoch": 0.5792908386488517, "grad_norm": 1.3821896989119833, "learning_rate": 4.489735715554584e-06, "loss": 0.3725, "step": 19662 }, { "epoch": 0.5793203011063153, "grad_norm": 1.543043725673467, "learning_rate": 4.4892241634960985e-06, "loss": 0.5397, "step": 19663 }, { "epoch": 0.5793497635637789, "grad_norm": 1.5581509108994067, "learning_rate": 4.488712616840443e-06, "loss": 0.438, "step": 19664 }, { "epoch": 0.5793792260212425, "grad_norm": 1.490843919131597, "learning_rate": 4.48820107559303e-06, "loss": 0.4557, "step": 19665 }, { "epoch": 0.579408688478706, "grad_norm": 1.6420873783582208, "learning_rate": 4.487689539759271e-06, "loss": 0.4513, "step": 19666 }, { "epoch": 0.5794381509361696, "grad_norm": 1.592830188060834, "learning_rate": 4.487178009344574e-06, "loss": 0.438, "step": 19667 }, { "epoch": 0.5794676133936332, "grad_norm": 1.3661221765957958, "learning_rate": 4.486666484354353e-06, "loss": 0.4102, "step": 19668 }, { "epoch": 0.5794970758510968, "grad_norm": 1.373017532569, "learning_rate": 4.486154964794017e-06, "loss": 0.313, "step": 19669 }, { "epoch": 0.5795265383085603, "grad_norm": 1.487208233808534, "learning_rate": 4.48564345066898e-06, "loss": 0.4097, "step": 19670 }, { "epoch": 0.5795560007660239, "grad_norm": 1.4474941259129541, "learning_rate": 4.4851319419846455e-06, "loss": 0.4088, "step": 19671 }, { "epoch": 0.5795854632234875, "grad_norm": 1.5911381085814271, "learning_rate": 4.484620438746431e-06, "loss": 0.5042, "step": 19672 }, { "epoch": 0.5796149256809511, "grad_norm": 1.545488400150522, "learning_rate": 4.484108940959743e-06, "loss": 0.4845, "step": 19673 }, { "epoch": 0.5796443881384147, "grad_norm": 1.5599530936572632, "learning_rate": 4.483597448629994e-06, "loss": 0.4464, "step": 19674 }, { "epoch": 0.5796738505958782, "grad_norm": 1.4726936495449345, "learning_rate": 4.483085961762594e-06, "loss": 0.3633, "step": 19675 }, { "epoch": 0.5797033130533418, "grad_norm": 1.5332358311654106, "learning_rate": 4.482574480362953e-06, "loss": 0.4307, "step": 19676 }, { "epoch": 0.5797327755108054, "grad_norm": 1.5120467454010382, "learning_rate": 4.48206300443648e-06, "loss": 0.5718, "step": 19677 }, { "epoch": 0.579762237968269, "grad_norm": 1.8050251376174744, "learning_rate": 4.481551533988589e-06, "loss": 0.4348, "step": 19678 }, { "epoch": 0.5797917004257325, "grad_norm": 1.8784707517609047, "learning_rate": 4.481040069024686e-06, "loss": 0.6296, "step": 19679 }, { "epoch": 0.5798211628831961, "grad_norm": 1.5406403615549022, "learning_rate": 4.480528609550186e-06, "loss": 0.5402, "step": 19680 }, { "epoch": 0.5798506253406597, "grad_norm": 1.4625460859520059, "learning_rate": 4.4800171555704944e-06, "loss": 0.4383, "step": 19681 }, { "epoch": 0.5798800877981233, "grad_norm": 1.5174291020195354, "learning_rate": 4.479505707091022e-06, "loss": 0.4136, "step": 19682 }, { "epoch": 0.5799095502555868, "grad_norm": 1.4952347176348162, "learning_rate": 4.4789942641171796e-06, "loss": 0.5428, "step": 19683 }, { "epoch": 0.5799390127130504, "grad_norm": 1.4088513822286985, "learning_rate": 4.478482826654378e-06, "loss": 0.3956, "step": 19684 }, { "epoch": 0.579968475170514, "grad_norm": 1.476593123428536, "learning_rate": 4.477971394708026e-06, "loss": 0.4343, "step": 19685 }, { "epoch": 0.5799979376279776, "grad_norm": 1.527557820257445, "learning_rate": 4.477459968283532e-06, "loss": 0.3964, "step": 19686 }, { "epoch": 0.5800274000854412, "grad_norm": 1.4787314906434676, "learning_rate": 4.476948547386309e-06, "loss": 0.4345, "step": 19687 }, { "epoch": 0.5800568625429047, "grad_norm": 1.5748459104145807, "learning_rate": 4.476437132021763e-06, "loss": 0.445, "step": 19688 }, { "epoch": 0.5800863250003683, "grad_norm": 1.418465745892595, "learning_rate": 4.475925722195308e-06, "loss": 0.4956, "step": 19689 }, { "epoch": 0.5801157874578319, "grad_norm": 1.5223492038366964, "learning_rate": 4.475414317912351e-06, "loss": 0.4485, "step": 19690 }, { "epoch": 0.5801452499152955, "grad_norm": 1.4888959613837058, "learning_rate": 4.4749029191783e-06, "loss": 0.4456, "step": 19691 }, { "epoch": 0.580174712372759, "grad_norm": 1.752935301668776, "learning_rate": 4.474391525998566e-06, "loss": 0.4961, "step": 19692 }, { "epoch": 0.5802041748302226, "grad_norm": 1.7588246682567534, "learning_rate": 4.473880138378559e-06, "loss": 0.5077, "step": 19693 }, { "epoch": 0.5802336372876862, "grad_norm": 1.4847271499432546, "learning_rate": 4.473368756323686e-06, "loss": 0.3316, "step": 19694 }, { "epoch": 0.5802630997451498, "grad_norm": 1.384159701382603, "learning_rate": 4.4728573798393585e-06, "loss": 0.3502, "step": 19695 }, { "epoch": 0.5802925622026133, "grad_norm": 1.305116756581086, "learning_rate": 4.472346008930985e-06, "loss": 0.3231, "step": 19696 }, { "epoch": 0.5803220246600769, "grad_norm": 1.4624276850295277, "learning_rate": 4.471834643603975e-06, "loss": 0.4351, "step": 19697 }, { "epoch": 0.5803514871175405, "grad_norm": 1.417472316012598, "learning_rate": 4.471323283863736e-06, "loss": 0.5167, "step": 19698 }, { "epoch": 0.5803809495750041, "grad_norm": 1.7557475354932182, "learning_rate": 4.470811929715679e-06, "loss": 0.5438, "step": 19699 }, { "epoch": 0.5804104120324677, "grad_norm": 1.555303609434456, "learning_rate": 4.470300581165214e-06, "loss": 0.453, "step": 19700 }, { "epoch": 0.5804398744899312, "grad_norm": 1.2658782029686566, "learning_rate": 4.469789238217747e-06, "loss": 0.3177, "step": 19701 }, { "epoch": 0.5804693369473948, "grad_norm": 1.355505876658246, "learning_rate": 4.469277900878686e-06, "loss": 0.3094, "step": 19702 }, { "epoch": 0.5804987994048584, "grad_norm": 1.6909797037348149, "learning_rate": 4.468766569153443e-06, "loss": 0.3999, "step": 19703 }, { "epoch": 0.580528261862322, "grad_norm": 1.5719629096756602, "learning_rate": 4.468255243047425e-06, "loss": 0.494, "step": 19704 }, { "epoch": 0.5805577243197855, "grad_norm": 1.452672728691201, "learning_rate": 4.467743922566041e-06, "loss": 0.4333, "step": 19705 }, { "epoch": 0.5805871867772491, "grad_norm": 1.3626630407549325, "learning_rate": 4.4672326077147e-06, "loss": 0.3767, "step": 19706 }, { "epoch": 0.5806166492347127, "grad_norm": 1.6863445382318736, "learning_rate": 4.466721298498809e-06, "loss": 0.3672, "step": 19707 }, { "epoch": 0.5806461116921763, "grad_norm": 1.3066944722475458, "learning_rate": 4.4662099949237785e-06, "loss": 0.2631, "step": 19708 }, { "epoch": 0.5806755741496398, "grad_norm": 1.5311099840011677, "learning_rate": 4.465698696995015e-06, "loss": 0.4728, "step": 19709 }, { "epoch": 0.5807050366071034, "grad_norm": 1.5901187196046533, "learning_rate": 4.465187404717931e-06, "loss": 0.5107, "step": 19710 }, { "epoch": 0.580734499064567, "grad_norm": 1.6119074400486935, "learning_rate": 4.464676118097929e-06, "loss": 0.3441, "step": 19711 }, { "epoch": 0.5807639615220306, "grad_norm": 1.480636479799953, "learning_rate": 4.464164837140421e-06, "loss": 0.3913, "step": 19712 }, { "epoch": 0.5807934239794942, "grad_norm": 1.5893825212628117, "learning_rate": 4.463653561850813e-06, "loss": 0.5179, "step": 19713 }, { "epoch": 0.5808228864369577, "grad_norm": 1.5565141798005273, "learning_rate": 4.463142292234514e-06, "loss": 0.4884, "step": 19714 }, { "epoch": 0.5808523488944213, "grad_norm": 1.5642621352728, "learning_rate": 4.462631028296932e-06, "loss": 0.3909, "step": 19715 }, { "epoch": 0.5808818113518849, "grad_norm": 1.3552015648543927, "learning_rate": 4.462119770043476e-06, "loss": 0.324, "step": 19716 }, { "epoch": 0.5809112738093485, "grad_norm": 1.378856534300111, "learning_rate": 4.461608517479553e-06, "loss": 0.3158, "step": 19717 }, { "epoch": 0.580940736266812, "grad_norm": 1.4428571255022145, "learning_rate": 4.461097270610571e-06, "loss": 0.5785, "step": 19718 }, { "epoch": 0.5809701987242756, "grad_norm": 1.6525035799636283, "learning_rate": 4.460586029441937e-06, "loss": 0.4483, "step": 19719 }, { "epoch": 0.5809996611817392, "grad_norm": 1.5943291021680435, "learning_rate": 4.460074793979062e-06, "loss": 0.4326, "step": 19720 }, { "epoch": 0.5810291236392028, "grad_norm": 1.6421729026496692, "learning_rate": 4.4595635642273504e-06, "loss": 0.4563, "step": 19721 }, { "epoch": 0.5810585860966663, "grad_norm": 1.54267753137491, "learning_rate": 4.45905234019221e-06, "loss": 0.3735, "step": 19722 }, { "epoch": 0.5810880485541299, "grad_norm": 1.5450083889919515, "learning_rate": 4.458541121879048e-06, "loss": 0.5547, "step": 19723 }, { "epoch": 0.5811175110115935, "grad_norm": 1.8762922178272345, "learning_rate": 4.458029909293275e-06, "loss": 0.4605, "step": 19724 }, { "epoch": 0.5811469734690571, "grad_norm": 1.5079672957355381, "learning_rate": 4.457518702440296e-06, "loss": 0.5449, "step": 19725 }, { "epoch": 0.5811764359265207, "grad_norm": 1.3723725341779975, "learning_rate": 4.457007501325518e-06, "loss": 0.3894, "step": 19726 }, { "epoch": 0.5812058983839842, "grad_norm": 1.5288158675791237, "learning_rate": 4.4564963059543495e-06, "loss": 0.5149, "step": 19727 }, { "epoch": 0.5812353608414478, "grad_norm": 1.4184909679109803, "learning_rate": 4.455985116332196e-06, "loss": 0.3575, "step": 19728 }, { "epoch": 0.5812648232989114, "grad_norm": 1.4208172081019323, "learning_rate": 4.4554739324644695e-06, "loss": 0.3608, "step": 19729 }, { "epoch": 0.581294285756375, "grad_norm": 1.710601027941693, "learning_rate": 4.454962754356573e-06, "loss": 0.3325, "step": 19730 }, { "epoch": 0.5813237482138385, "grad_norm": 1.65683237809881, "learning_rate": 4.454451582013913e-06, "loss": 0.5249, "step": 19731 }, { "epoch": 0.5813532106713021, "grad_norm": 1.3575039391048758, "learning_rate": 4.4539404154418975e-06, "loss": 0.4441, "step": 19732 }, { "epoch": 0.5813826731287657, "grad_norm": 1.742348592101667, "learning_rate": 4.4534292546459355e-06, "loss": 0.5543, "step": 19733 }, { "epoch": 0.5814121355862293, "grad_norm": 1.771003150571207, "learning_rate": 4.452918099631431e-06, "loss": 0.5666, "step": 19734 }, { "epoch": 0.5814415980436928, "grad_norm": 1.6660613435775988, "learning_rate": 4.452406950403793e-06, "loss": 0.4829, "step": 19735 }, { "epoch": 0.5814710605011564, "grad_norm": 1.539256563099316, "learning_rate": 4.451895806968426e-06, "loss": 0.4221, "step": 19736 }, { "epoch": 0.58150052295862, "grad_norm": 1.5650696709238634, "learning_rate": 4.451384669330739e-06, "loss": 0.3726, "step": 19737 }, { "epoch": 0.5815299854160836, "grad_norm": 1.4772450259128864, "learning_rate": 4.450873537496138e-06, "loss": 0.5178, "step": 19738 }, { "epoch": 0.5815594478735472, "grad_norm": 1.438763541132839, "learning_rate": 4.450362411470029e-06, "loss": 0.3921, "step": 19739 }, { "epoch": 0.5815889103310107, "grad_norm": 1.4977858587875623, "learning_rate": 4.449851291257821e-06, "loss": 0.4602, "step": 19740 }, { "epoch": 0.5816183727884743, "grad_norm": 1.429475400229898, "learning_rate": 4.4493401768649155e-06, "loss": 0.3721, "step": 19741 }, { "epoch": 0.5816478352459379, "grad_norm": 1.4919873441113989, "learning_rate": 4.448829068296724e-06, "loss": 0.3979, "step": 19742 }, { "epoch": 0.5816772977034015, "grad_norm": 1.6463229965254786, "learning_rate": 4.448317965558648e-06, "loss": 0.4083, "step": 19743 }, { "epoch": 0.581706760160865, "grad_norm": 1.6441039902432781, "learning_rate": 4.447806868656097e-06, "loss": 0.4588, "step": 19744 }, { "epoch": 0.5817362226183286, "grad_norm": 1.751889071268238, "learning_rate": 4.447295777594478e-06, "loss": 0.6724, "step": 19745 }, { "epoch": 0.5817656850757922, "grad_norm": 1.5881236037824147, "learning_rate": 4.446784692379195e-06, "loss": 0.386, "step": 19746 }, { "epoch": 0.5817951475332558, "grad_norm": 1.4030249405703712, "learning_rate": 4.446273613015656e-06, "loss": 0.3859, "step": 19747 }, { "epoch": 0.5818246099907193, "grad_norm": 1.4394946720325819, "learning_rate": 4.445762539509265e-06, "loss": 0.3234, "step": 19748 }, { "epoch": 0.5818540724481829, "grad_norm": 1.44573163802519, "learning_rate": 4.445251471865428e-06, "loss": 0.4148, "step": 19749 }, { "epoch": 0.5818835349056465, "grad_norm": 1.5356501712768658, "learning_rate": 4.444740410089555e-06, "loss": 0.493, "step": 19750 }, { "epoch": 0.5819129973631101, "grad_norm": 1.4204296969785866, "learning_rate": 4.444229354187047e-06, "loss": 0.3491, "step": 19751 }, { "epoch": 0.5819424598205737, "grad_norm": 1.3414044037132755, "learning_rate": 4.443718304163311e-06, "loss": 0.3234, "step": 19752 }, { "epoch": 0.5819719222780372, "grad_norm": 1.7081840061342817, "learning_rate": 4.443207260023752e-06, "loss": 0.5754, "step": 19753 }, { "epoch": 0.5820013847355008, "grad_norm": 1.456770703124401, "learning_rate": 4.442696221773779e-06, "loss": 0.5296, "step": 19754 }, { "epoch": 0.5820308471929644, "grad_norm": 1.4824787204542722, "learning_rate": 4.442185189418795e-06, "loss": 0.4321, "step": 19755 }, { "epoch": 0.582060309650428, "grad_norm": 1.4604099908067771, "learning_rate": 4.441674162964207e-06, "loss": 0.4437, "step": 19756 }, { "epoch": 0.5820897721078915, "grad_norm": 1.4474168162370034, "learning_rate": 4.441163142415417e-06, "loss": 0.3334, "step": 19757 }, { "epoch": 0.5821192345653551, "grad_norm": 1.463983063772689, "learning_rate": 4.440652127777835e-06, "loss": 0.5524, "step": 19758 }, { "epoch": 0.5821486970228187, "grad_norm": 1.6538515541723249, "learning_rate": 4.440141119056864e-06, "loss": 0.579, "step": 19759 }, { "epoch": 0.5821781594802823, "grad_norm": 1.4966275349244735, "learning_rate": 4.439630116257911e-06, "loss": 0.3443, "step": 19760 }, { "epoch": 0.5822076219377458, "grad_norm": 1.6657303792658809, "learning_rate": 4.439119119386378e-06, "loss": 0.4804, "step": 19761 }, { "epoch": 0.5822370843952094, "grad_norm": 1.4854643819955353, "learning_rate": 4.438608128447672e-06, "loss": 0.3856, "step": 19762 }, { "epoch": 0.582266546852673, "grad_norm": 1.579080666826197, "learning_rate": 4.438097143447199e-06, "loss": 0.4749, "step": 19763 }, { "epoch": 0.5822960093101366, "grad_norm": 1.532231045957158, "learning_rate": 4.437586164390362e-06, "loss": 0.474, "step": 19764 }, { "epoch": 0.5823254717676002, "grad_norm": 1.5259163368017323, "learning_rate": 4.437075191282567e-06, "loss": 0.447, "step": 19765 }, { "epoch": 0.5823549342250637, "grad_norm": 1.5989604021870865, "learning_rate": 4.436564224129221e-06, "loss": 0.3842, "step": 19766 }, { "epoch": 0.5823843966825273, "grad_norm": 1.5451211528055266, "learning_rate": 4.436053262935725e-06, "loss": 0.5238, "step": 19767 }, { "epoch": 0.5824138591399909, "grad_norm": 1.5463003952584597, "learning_rate": 4.435542307707486e-06, "loss": 0.4877, "step": 19768 }, { "epoch": 0.5824433215974545, "grad_norm": 1.7121129218253786, "learning_rate": 4.435031358449909e-06, "loss": 0.5529, "step": 19769 }, { "epoch": 0.582472784054918, "grad_norm": 1.403669691308331, "learning_rate": 4.4345204151684e-06, "loss": 0.4492, "step": 19770 }, { "epoch": 0.5825022465123816, "grad_norm": 1.2965209087715777, "learning_rate": 4.434009477868359e-06, "loss": 0.3754, "step": 19771 }, { "epoch": 0.5825317089698452, "grad_norm": 1.3678223190293985, "learning_rate": 4.433498546555194e-06, "loss": 0.3839, "step": 19772 }, { "epoch": 0.5825611714273088, "grad_norm": 1.5552883035036809, "learning_rate": 4.432987621234308e-06, "loss": 0.4771, "step": 19773 }, { "epoch": 0.5825906338847723, "grad_norm": 1.490427113043893, "learning_rate": 4.432476701911107e-06, "loss": 0.3298, "step": 19774 }, { "epoch": 0.5826200963422359, "grad_norm": 1.590220622960923, "learning_rate": 4.431965788590995e-06, "loss": 0.4887, "step": 19775 }, { "epoch": 0.5826495587996995, "grad_norm": 1.3925895774753425, "learning_rate": 4.431454881279374e-06, "loss": 0.4798, "step": 19776 }, { "epoch": 0.5826790212571631, "grad_norm": 1.7220860119431247, "learning_rate": 4.4309439799816525e-06, "loss": 0.6335, "step": 19777 }, { "epoch": 0.5827084837146267, "grad_norm": 1.5379223972774339, "learning_rate": 4.43043308470323e-06, "loss": 0.5538, "step": 19778 }, { "epoch": 0.5827379461720902, "grad_norm": 1.4218675219177017, "learning_rate": 4.4299221954495135e-06, "loss": 0.4581, "step": 19779 }, { "epoch": 0.5827674086295538, "grad_norm": 1.3681886035015736, "learning_rate": 4.429411312225908e-06, "loss": 0.3994, "step": 19780 }, { "epoch": 0.5827968710870174, "grad_norm": 1.270721534771992, "learning_rate": 4.428900435037814e-06, "loss": 0.419, "step": 19781 }, { "epoch": 0.582826333544481, "grad_norm": 1.5001511062661461, "learning_rate": 4.428389563890637e-06, "loss": 0.3931, "step": 19782 }, { "epoch": 0.5828557960019445, "grad_norm": 1.7579304640242739, "learning_rate": 4.42787869878978e-06, "loss": 0.5017, "step": 19783 }, { "epoch": 0.5828852584594081, "grad_norm": 1.58378550856682, "learning_rate": 4.42736783974065e-06, "loss": 0.4241, "step": 19784 }, { "epoch": 0.5829147209168717, "grad_norm": 1.4998443695443424, "learning_rate": 4.426856986748646e-06, "loss": 0.464, "step": 19785 }, { "epoch": 0.5829441833743353, "grad_norm": 1.4868026169682658, "learning_rate": 4.426346139819176e-06, "loss": 0.4937, "step": 19786 }, { "epoch": 0.5829736458317988, "grad_norm": 1.4147762549091278, "learning_rate": 4.425835298957641e-06, "loss": 0.4976, "step": 19787 }, { "epoch": 0.5830031082892624, "grad_norm": 1.3067730478490045, "learning_rate": 4.425324464169445e-06, "loss": 0.3559, "step": 19788 }, { "epoch": 0.583032570746726, "grad_norm": 1.559631379498149, "learning_rate": 4.424813635459994e-06, "loss": 0.4122, "step": 19789 }, { "epoch": 0.5830620332041896, "grad_norm": 1.4755540835354275, "learning_rate": 4.424302812834687e-06, "loss": 0.4871, "step": 19790 }, { "epoch": 0.5830914956616532, "grad_norm": 1.5407734783008689, "learning_rate": 4.423791996298931e-06, "loss": 0.3525, "step": 19791 }, { "epoch": 0.5831209581191167, "grad_norm": 1.6141403521948157, "learning_rate": 4.423281185858125e-06, "loss": 0.5203, "step": 19792 }, { "epoch": 0.5831504205765803, "grad_norm": 1.5203559266165894, "learning_rate": 4.422770381517677e-06, "loss": 0.399, "step": 19793 }, { "epoch": 0.5831798830340439, "grad_norm": 1.5128821049106829, "learning_rate": 4.4222595832829865e-06, "loss": 0.4463, "step": 19794 }, { "epoch": 0.5832093454915075, "grad_norm": 1.3183868836945956, "learning_rate": 4.421748791159458e-06, "loss": 0.434, "step": 19795 }, { "epoch": 0.583238807948971, "grad_norm": 1.4809089141403202, "learning_rate": 4.421238005152496e-06, "loss": 0.4007, "step": 19796 }, { "epoch": 0.5832682704064346, "grad_norm": 1.4925370918994216, "learning_rate": 4.4207272252675005e-06, "loss": 0.4966, "step": 19797 }, { "epoch": 0.5832977328638982, "grad_norm": 1.5194830496671783, "learning_rate": 4.420216451509876e-06, "loss": 0.4648, "step": 19798 }, { "epoch": 0.5833271953213618, "grad_norm": 1.5437455619616758, "learning_rate": 4.419705683885026e-06, "loss": 0.4319, "step": 19799 }, { "epoch": 0.5833566577788253, "grad_norm": 1.5481953918548648, "learning_rate": 4.4191949223983545e-06, "loss": 0.5049, "step": 19800 }, { "epoch": 0.5833861202362889, "grad_norm": 1.5771328019908661, "learning_rate": 4.418684167055259e-06, "loss": 0.4423, "step": 19801 }, { "epoch": 0.5834155826937525, "grad_norm": 1.509029627817174, "learning_rate": 4.418173417861147e-06, "loss": 0.4567, "step": 19802 }, { "epoch": 0.5834450451512161, "grad_norm": 1.4587274446702112, "learning_rate": 4.417662674821418e-06, "loss": 0.4814, "step": 19803 }, { "epoch": 0.5834745076086797, "grad_norm": 1.4365475483960704, "learning_rate": 4.417151937941475e-06, "loss": 0.4749, "step": 19804 }, { "epoch": 0.5835039700661432, "grad_norm": 1.5801364272483645, "learning_rate": 4.416641207226723e-06, "loss": 0.3756, "step": 19805 }, { "epoch": 0.5835334325236068, "grad_norm": 1.3791422981285042, "learning_rate": 4.416130482682561e-06, "loss": 0.3828, "step": 19806 }, { "epoch": 0.5835628949810704, "grad_norm": 1.4003516987212845, "learning_rate": 4.415619764314393e-06, "loss": 0.4412, "step": 19807 }, { "epoch": 0.583592357438534, "grad_norm": 1.527737195875592, "learning_rate": 4.415109052127622e-06, "loss": 0.4174, "step": 19808 }, { "epoch": 0.5836218198959975, "grad_norm": 1.3378168519989857, "learning_rate": 4.4145983461276485e-06, "loss": 0.4194, "step": 19809 }, { "epoch": 0.5836512823534611, "grad_norm": 1.3894026720555201, "learning_rate": 4.414087646319878e-06, "loss": 0.3642, "step": 19810 }, { "epoch": 0.5836807448109247, "grad_norm": 1.7472261146239894, "learning_rate": 4.413576952709707e-06, "loss": 0.5053, "step": 19811 }, { "epoch": 0.5837102072683883, "grad_norm": 1.5567148444963317, "learning_rate": 4.413066265302541e-06, "loss": 0.356, "step": 19812 }, { "epoch": 0.5837396697258518, "grad_norm": 1.4315174507417072, "learning_rate": 4.41255558410378e-06, "loss": 0.3689, "step": 19813 }, { "epoch": 0.5837691321833154, "grad_norm": 1.4478243868982337, "learning_rate": 4.412044909118828e-06, "loss": 0.3571, "step": 19814 }, { "epoch": 0.583798594640779, "grad_norm": 1.4226544265818797, "learning_rate": 4.411534240353086e-06, "loss": 0.3318, "step": 19815 }, { "epoch": 0.5838280570982426, "grad_norm": 1.6432670165174457, "learning_rate": 4.411023577811954e-06, "loss": 0.4231, "step": 19816 }, { "epoch": 0.5838575195557062, "grad_norm": 1.7178298175663904, "learning_rate": 4.410512921500837e-06, "loss": 0.4801, "step": 19817 }, { "epoch": 0.5838869820131697, "grad_norm": 1.5121713695377161, "learning_rate": 4.410002271425133e-06, "loss": 0.4249, "step": 19818 }, { "epoch": 0.5839164444706333, "grad_norm": 1.4201993165590086, "learning_rate": 4.409491627590246e-06, "loss": 0.4084, "step": 19819 }, { "epoch": 0.5839459069280969, "grad_norm": 1.6341016088873515, "learning_rate": 4.4089809900015775e-06, "loss": 0.3862, "step": 19820 }, { "epoch": 0.5839753693855605, "grad_norm": 1.5392115903462888, "learning_rate": 4.408470358664528e-06, "loss": 0.5315, "step": 19821 }, { "epoch": 0.584004831843024, "grad_norm": 1.661053216806214, "learning_rate": 4.407959733584498e-06, "loss": 0.5368, "step": 19822 }, { "epoch": 0.5840342943004876, "grad_norm": 1.4643919400367187, "learning_rate": 4.40744911476689e-06, "loss": 0.4386, "step": 19823 }, { "epoch": 0.5840637567579512, "grad_norm": 1.5608591843424677, "learning_rate": 4.406938502217103e-06, "loss": 0.4621, "step": 19824 }, { "epoch": 0.5840932192154148, "grad_norm": 1.5197315076413591, "learning_rate": 4.40642789594054e-06, "loss": 0.5005, "step": 19825 }, { "epoch": 0.5841226816728783, "grad_norm": 1.4534666046213591, "learning_rate": 4.405917295942603e-06, "loss": 0.4186, "step": 19826 }, { "epoch": 0.5841521441303419, "grad_norm": 1.9453459848364656, "learning_rate": 4.4054067022286914e-06, "loss": 0.3861, "step": 19827 }, { "epoch": 0.5841816065878055, "grad_norm": 1.7905097334999662, "learning_rate": 4.404896114804206e-06, "loss": 0.5115, "step": 19828 }, { "epoch": 0.5842110690452691, "grad_norm": 1.5609418626599847, "learning_rate": 4.4043855336745496e-06, "loss": 0.5483, "step": 19829 }, { "epoch": 0.5842405315027327, "grad_norm": 1.518805375649172, "learning_rate": 4.4038749588451215e-06, "loss": 0.3557, "step": 19830 }, { "epoch": 0.5842699939601962, "grad_norm": 1.600680027861104, "learning_rate": 4.403364390321321e-06, "loss": 0.4177, "step": 19831 }, { "epoch": 0.5842994564176598, "grad_norm": 1.3971129416703905, "learning_rate": 4.40285382810855e-06, "loss": 0.3777, "step": 19832 }, { "epoch": 0.5843289188751234, "grad_norm": 1.5182032292576393, "learning_rate": 4.402343272212211e-06, "loss": 0.5144, "step": 19833 }, { "epoch": 0.584358381332587, "grad_norm": 1.4914869948302032, "learning_rate": 4.401832722637701e-06, "loss": 0.564, "step": 19834 }, { "epoch": 0.5843878437900505, "grad_norm": 1.4153796131319394, "learning_rate": 4.4013221793904235e-06, "loss": 0.3531, "step": 19835 }, { "epoch": 0.5844173062475141, "grad_norm": 1.468698274778585, "learning_rate": 4.400811642475777e-06, "loss": 0.4988, "step": 19836 }, { "epoch": 0.5844467687049777, "grad_norm": 1.4895150064676839, "learning_rate": 4.4003011118991606e-06, "loss": 0.4508, "step": 19837 }, { "epoch": 0.5844762311624413, "grad_norm": 1.5007741345361865, "learning_rate": 4.3997905876659784e-06, "loss": 0.4895, "step": 19838 }, { "epoch": 0.5845056936199048, "grad_norm": 1.5427850548633744, "learning_rate": 4.399280069781628e-06, "loss": 0.5402, "step": 19839 }, { "epoch": 0.5845351560773684, "grad_norm": 1.3944205599473904, "learning_rate": 4.398769558251511e-06, "loss": 0.4268, "step": 19840 }, { "epoch": 0.584564618534832, "grad_norm": 1.61014570372831, "learning_rate": 4.398259053081024e-06, "loss": 0.5559, "step": 19841 }, { "epoch": 0.5845940809922956, "grad_norm": 1.730878379732192, "learning_rate": 4.397748554275571e-06, "loss": 0.4867, "step": 19842 }, { "epoch": 0.5846235434497592, "grad_norm": 1.3406022379535714, "learning_rate": 4.39723806184055e-06, "loss": 0.4154, "step": 19843 }, { "epoch": 0.5846530059072227, "grad_norm": 1.7757781562680224, "learning_rate": 4.396727575781361e-06, "loss": 0.5471, "step": 19844 }, { "epoch": 0.5846824683646863, "grad_norm": 1.5396176808951887, "learning_rate": 4.396217096103404e-06, "loss": 0.4595, "step": 19845 }, { "epoch": 0.5847119308221499, "grad_norm": 1.5231462827414894, "learning_rate": 4.395706622812077e-06, "loss": 0.4281, "step": 19846 }, { "epoch": 0.5847413932796135, "grad_norm": 1.4805785501548936, "learning_rate": 4.3951961559127835e-06, "loss": 0.4507, "step": 19847 }, { "epoch": 0.584770855737077, "grad_norm": 1.5942283609002, "learning_rate": 4.3946856954109185e-06, "loss": 0.47, "step": 19848 }, { "epoch": 0.5848003181945406, "grad_norm": 1.5658620024053997, "learning_rate": 4.394175241311884e-06, "loss": 0.4825, "step": 19849 }, { "epoch": 0.5848297806520042, "grad_norm": 1.650945383747003, "learning_rate": 4.393664793621082e-06, "loss": 0.4661, "step": 19850 }, { "epoch": 0.5848592431094678, "grad_norm": 1.6370925147295536, "learning_rate": 4.393154352343908e-06, "loss": 0.4549, "step": 19851 }, { "epoch": 0.5848887055669313, "grad_norm": 1.8860923377389969, "learning_rate": 4.392643917485761e-06, "loss": 0.7518, "step": 19852 }, { "epoch": 0.5849181680243949, "grad_norm": 1.4232800853214618, "learning_rate": 4.392133489052041e-06, "loss": 0.4975, "step": 19853 }, { "epoch": 0.5849476304818585, "grad_norm": 1.8865614367697614, "learning_rate": 4.39162306704815e-06, "loss": 0.7073, "step": 19854 }, { "epoch": 0.5849770929393221, "grad_norm": 1.6452691434261368, "learning_rate": 4.391112651479482e-06, "loss": 0.5081, "step": 19855 }, { "epoch": 0.5850065553967857, "grad_norm": 1.4746667745995494, "learning_rate": 4.390602242351441e-06, "loss": 0.3799, "step": 19856 }, { "epoch": 0.5850360178542492, "grad_norm": 1.561643138227895, "learning_rate": 4.390091839669422e-06, "loss": 0.5373, "step": 19857 }, { "epoch": 0.5850654803117128, "grad_norm": 1.526202163692546, "learning_rate": 4.389581443438827e-06, "loss": 0.3815, "step": 19858 }, { "epoch": 0.5850949427691764, "grad_norm": 1.5912541214484366, "learning_rate": 4.389071053665054e-06, "loss": 0.5293, "step": 19859 }, { "epoch": 0.58512440522664, "grad_norm": 1.394866659772855, "learning_rate": 4.388560670353502e-06, "loss": 0.391, "step": 19860 }, { "epoch": 0.5851538676841035, "grad_norm": 1.7951331229039658, "learning_rate": 4.388050293509566e-06, "loss": 0.5519, "step": 19861 }, { "epoch": 0.5851833301415671, "grad_norm": 1.2194562470145158, "learning_rate": 4.387539923138648e-06, "loss": 0.346, "step": 19862 }, { "epoch": 0.5852127925990307, "grad_norm": 1.9199515571401116, "learning_rate": 4.387029559246148e-06, "loss": 0.3326, "step": 19863 }, { "epoch": 0.5852422550564943, "grad_norm": 1.4521516562561483, "learning_rate": 4.3865192018374606e-06, "loss": 0.4808, "step": 19864 }, { "epoch": 0.5852717175139578, "grad_norm": 1.4921340394051312, "learning_rate": 4.386008850917987e-06, "loss": 0.4338, "step": 19865 }, { "epoch": 0.5853011799714214, "grad_norm": 1.583465877250852, "learning_rate": 4.385498506493124e-06, "loss": 0.3998, "step": 19866 }, { "epoch": 0.585330642428885, "grad_norm": 1.5884112460524966, "learning_rate": 4.384988168568271e-06, "loss": 0.4955, "step": 19867 }, { "epoch": 0.5853601048863486, "grad_norm": 1.506808056457061, "learning_rate": 4.384477837148826e-06, "loss": 0.5056, "step": 19868 }, { "epoch": 0.5853895673438122, "grad_norm": 1.4026151105568383, "learning_rate": 4.383967512240186e-06, "loss": 0.4192, "step": 19869 }, { "epoch": 0.5854190298012757, "grad_norm": 1.7495902763235296, "learning_rate": 4.383457193847752e-06, "loss": 0.5093, "step": 19870 }, { "epoch": 0.5854484922587393, "grad_norm": 1.5490649069197573, "learning_rate": 4.382946881976918e-06, "loss": 0.3311, "step": 19871 }, { "epoch": 0.5854779547162029, "grad_norm": 1.431685161921469, "learning_rate": 4.3824365766330846e-06, "loss": 0.4303, "step": 19872 }, { "epoch": 0.5855074171736665, "grad_norm": 1.5222358401296596, "learning_rate": 4.381926277821648e-06, "loss": 0.4057, "step": 19873 }, { "epoch": 0.58553687963113, "grad_norm": 1.5714171907319279, "learning_rate": 4.3814159855480066e-06, "loss": 0.4856, "step": 19874 }, { "epoch": 0.5855663420885936, "grad_norm": 1.5633230385039376, "learning_rate": 4.38090569981756e-06, "loss": 0.542, "step": 19875 }, { "epoch": 0.5855958045460572, "grad_norm": 1.5310862858542413, "learning_rate": 4.380395420635703e-06, "loss": 0.5006, "step": 19876 }, { "epoch": 0.5856252670035208, "grad_norm": 1.4769760495918605, "learning_rate": 4.379885148007835e-06, "loss": 0.4705, "step": 19877 }, { "epoch": 0.5856547294609843, "grad_norm": 1.3693868133792864, "learning_rate": 4.3793748819393525e-06, "loss": 0.386, "step": 19878 }, { "epoch": 0.5856841919184479, "grad_norm": 1.5253558308449802, "learning_rate": 4.378864622435653e-06, "loss": 0.505, "step": 19879 }, { "epoch": 0.5857136543759115, "grad_norm": 1.5243559613782554, "learning_rate": 4.378354369502137e-06, "loss": 0.4345, "step": 19880 }, { "epoch": 0.5857431168333751, "grad_norm": 1.7236176338873743, "learning_rate": 4.377844123144196e-06, "loss": 0.4645, "step": 19881 }, { "epoch": 0.5857725792908387, "grad_norm": 1.782325047326993, "learning_rate": 4.377333883367231e-06, "loss": 0.5362, "step": 19882 }, { "epoch": 0.5858020417483022, "grad_norm": 1.4081135990907918, "learning_rate": 4.376823650176638e-06, "loss": 0.346, "step": 19883 }, { "epoch": 0.5858315042057658, "grad_norm": 1.3639873975467012, "learning_rate": 4.376313423577815e-06, "loss": 0.3369, "step": 19884 }, { "epoch": 0.5858609666632294, "grad_norm": 1.6336250764008848, "learning_rate": 4.375803203576159e-06, "loss": 0.4902, "step": 19885 }, { "epoch": 0.585890429120693, "grad_norm": 1.4211913824115743, "learning_rate": 4.375292990177065e-06, "loss": 0.4736, "step": 19886 }, { "epoch": 0.5859198915781565, "grad_norm": 1.515639756958313, "learning_rate": 4.374782783385933e-06, "loss": 0.4666, "step": 19887 }, { "epoch": 0.5859493540356201, "grad_norm": 1.4174218383194301, "learning_rate": 4.374272583208157e-06, "loss": 0.4076, "step": 19888 }, { "epoch": 0.5859788164930837, "grad_norm": 1.5189541893652, "learning_rate": 4.373762389649136e-06, "loss": 0.319, "step": 19889 }, { "epoch": 0.5860082789505473, "grad_norm": 1.427886185073769, "learning_rate": 4.373252202714267e-06, "loss": 0.4417, "step": 19890 }, { "epoch": 0.5860377414080108, "grad_norm": 1.5642559817548285, "learning_rate": 4.372742022408942e-06, "loss": 0.3971, "step": 19891 }, { "epoch": 0.5860672038654744, "grad_norm": 1.2661323352058902, "learning_rate": 4.372231848738562e-06, "loss": 0.4264, "step": 19892 }, { "epoch": 0.586096666322938, "grad_norm": 1.355172531900284, "learning_rate": 4.371721681708523e-06, "loss": 0.3636, "step": 19893 }, { "epoch": 0.5861261287804016, "grad_norm": 1.7158181566092296, "learning_rate": 4.371211521324219e-06, "loss": 0.5552, "step": 19894 }, { "epoch": 0.5861555912378652, "grad_norm": 1.4618509806647086, "learning_rate": 4.3707013675910485e-06, "loss": 0.3587, "step": 19895 }, { "epoch": 0.5861850536953287, "grad_norm": 1.6306436100530945, "learning_rate": 4.370191220514407e-06, "loss": 0.4662, "step": 19896 }, { "epoch": 0.5862145161527923, "grad_norm": 1.6608545392051943, "learning_rate": 4.3696810800996905e-06, "loss": 0.3584, "step": 19897 }, { "epoch": 0.5862439786102559, "grad_norm": 1.3037491779621497, "learning_rate": 4.369170946352297e-06, "loss": 0.3565, "step": 19898 }, { "epoch": 0.5862734410677195, "grad_norm": 1.3773151143026636, "learning_rate": 4.368660819277619e-06, "loss": 0.3727, "step": 19899 }, { "epoch": 0.586302903525183, "grad_norm": 1.5962633604657717, "learning_rate": 4.368150698881057e-06, "loss": 0.3786, "step": 19900 }, { "epoch": 0.5863323659826466, "grad_norm": 1.4340721584716292, "learning_rate": 4.367640585168003e-06, "loss": 0.3585, "step": 19901 }, { "epoch": 0.5863618284401102, "grad_norm": 1.7227242780008938, "learning_rate": 4.367130478143855e-06, "loss": 0.405, "step": 19902 }, { "epoch": 0.5863912908975738, "grad_norm": 1.6107478819832652, "learning_rate": 4.366620377814006e-06, "loss": 0.5174, "step": 19903 }, { "epoch": 0.5864207533550373, "grad_norm": 1.536614135375489, "learning_rate": 4.366110284183854e-06, "loss": 0.4366, "step": 19904 }, { "epoch": 0.5864502158125009, "grad_norm": 1.36282185058279, "learning_rate": 4.365600197258795e-06, "loss": 0.3206, "step": 19905 }, { "epoch": 0.5864796782699645, "grad_norm": 1.5387189660799638, "learning_rate": 4.365090117044223e-06, "loss": 0.5725, "step": 19906 }, { "epoch": 0.5865091407274281, "grad_norm": 1.6528467716573727, "learning_rate": 4.364580043545534e-06, "loss": 0.4878, "step": 19907 }, { "epoch": 0.5865386031848917, "grad_norm": 1.6528325255599905, "learning_rate": 4.364069976768125e-06, "loss": 0.5739, "step": 19908 }, { "epoch": 0.5865680656423552, "grad_norm": 1.3744680887376415, "learning_rate": 4.363559916717389e-06, "loss": 0.364, "step": 19909 }, { "epoch": 0.5865975280998188, "grad_norm": 1.5011133314912903, "learning_rate": 4.363049863398724e-06, "loss": 0.3429, "step": 19910 }, { "epoch": 0.5866269905572824, "grad_norm": 1.5556208029621716, "learning_rate": 4.362539816817523e-06, "loss": 0.4795, "step": 19911 }, { "epoch": 0.586656453014746, "grad_norm": 1.378204810726916, "learning_rate": 4.362029776979181e-06, "loss": 0.3909, "step": 19912 }, { "epoch": 0.5866859154722095, "grad_norm": 1.5605329915291102, "learning_rate": 4.361519743889092e-06, "loss": 0.3927, "step": 19913 }, { "epoch": 0.5867153779296731, "grad_norm": 1.5594770249660632, "learning_rate": 4.361009717552654e-06, "loss": 0.385, "step": 19914 }, { "epoch": 0.5867448403871367, "grad_norm": 1.608783115245425, "learning_rate": 4.36049969797526e-06, "loss": 0.5932, "step": 19915 }, { "epoch": 0.5867743028446003, "grad_norm": 1.3533363546075927, "learning_rate": 4.3599896851623046e-06, "loss": 0.3587, "step": 19916 }, { "epoch": 0.5868037653020638, "grad_norm": 1.6858354746841038, "learning_rate": 4.359479679119185e-06, "loss": 0.3761, "step": 19917 }, { "epoch": 0.5868332277595274, "grad_norm": 1.5829775018855587, "learning_rate": 4.358969679851293e-06, "loss": 0.5173, "step": 19918 }, { "epoch": 0.586862690216991, "grad_norm": 1.3776534172199792, "learning_rate": 4.358459687364025e-06, "loss": 0.4773, "step": 19919 }, { "epoch": 0.5868921526744546, "grad_norm": 1.4030580245077362, "learning_rate": 4.357949701662775e-06, "loss": 0.4235, "step": 19920 }, { "epoch": 0.5869216151319182, "grad_norm": 1.5559994740139251, "learning_rate": 4.357439722752938e-06, "loss": 0.3224, "step": 19921 }, { "epoch": 0.5869510775893817, "grad_norm": 1.391264043412557, "learning_rate": 4.356929750639906e-06, "loss": 0.5288, "step": 19922 }, { "epoch": 0.5869805400468453, "grad_norm": 1.4351160123952023, "learning_rate": 4.356419785329077e-06, "loss": 0.3473, "step": 19923 }, { "epoch": 0.5870100025043089, "grad_norm": 1.6615565639513579, "learning_rate": 4.355909826825842e-06, "loss": 0.5346, "step": 19924 }, { "epoch": 0.5870394649617725, "grad_norm": 1.6540087457849253, "learning_rate": 4.355399875135596e-06, "loss": 0.4724, "step": 19925 }, { "epoch": 0.587068927419236, "grad_norm": 1.5206741775050763, "learning_rate": 4.354889930263735e-06, "loss": 0.4498, "step": 19926 }, { "epoch": 0.5870983898766996, "grad_norm": 1.3088830060223637, "learning_rate": 4.354379992215651e-06, "loss": 0.3666, "step": 19927 }, { "epoch": 0.5871278523341632, "grad_norm": 1.487776169290047, "learning_rate": 4.353870060996738e-06, "loss": 0.4632, "step": 19928 }, { "epoch": 0.5871573147916268, "grad_norm": 1.5320924751501523, "learning_rate": 4.353360136612392e-06, "loss": 0.4849, "step": 19929 }, { "epoch": 0.5871867772490903, "grad_norm": 1.8276720487469997, "learning_rate": 4.352850219068007e-06, "loss": 0.5484, "step": 19930 }, { "epoch": 0.5872162397065539, "grad_norm": 1.3701657284424573, "learning_rate": 4.352340308368972e-06, "loss": 0.3519, "step": 19931 }, { "epoch": 0.5872457021640175, "grad_norm": 1.73245367389177, "learning_rate": 4.351830404520686e-06, "loss": 0.419, "step": 19932 }, { "epoch": 0.5872751646214811, "grad_norm": 1.501432831228667, "learning_rate": 4.3513205075285385e-06, "loss": 0.4538, "step": 19933 }, { "epoch": 0.5873046270789447, "grad_norm": 1.3854852973005203, "learning_rate": 4.350810617397926e-06, "loss": 0.4023, "step": 19934 }, { "epoch": 0.5873340895364082, "grad_norm": 1.5210847838908927, "learning_rate": 4.35030073413424e-06, "loss": 0.4077, "step": 19935 }, { "epoch": 0.5873635519938718, "grad_norm": 1.2817762143192655, "learning_rate": 4.3497908577428755e-06, "loss": 0.322, "step": 19936 }, { "epoch": 0.5873930144513354, "grad_norm": 1.5300044815777833, "learning_rate": 4.349280988229225e-06, "loss": 0.5426, "step": 19937 }, { "epoch": 0.587422476908799, "grad_norm": 1.6744411088295996, "learning_rate": 4.348771125598683e-06, "loss": 0.4362, "step": 19938 }, { "epoch": 0.5874519393662625, "grad_norm": 1.764284667994815, "learning_rate": 4.3482612698566395e-06, "loss": 0.5581, "step": 19939 }, { "epoch": 0.5874814018237261, "grad_norm": 1.3360417913917448, "learning_rate": 4.347751421008492e-06, "loss": 0.2992, "step": 19940 }, { "epoch": 0.5875108642811897, "grad_norm": 1.505612875423451, "learning_rate": 4.34724157905963e-06, "loss": 0.4872, "step": 19941 }, { "epoch": 0.5875403267386533, "grad_norm": 1.4226975426247683, "learning_rate": 4.346731744015449e-06, "loss": 0.332, "step": 19942 }, { "epoch": 0.5875697891961168, "grad_norm": 1.364712810690792, "learning_rate": 4.346221915881339e-06, "loss": 0.3412, "step": 19943 }, { "epoch": 0.5875992516535804, "grad_norm": 1.3826908702107126, "learning_rate": 4.345712094662695e-06, "loss": 0.3824, "step": 19944 }, { "epoch": 0.587628714111044, "grad_norm": 1.5291079030498036, "learning_rate": 4.345202280364907e-06, "loss": 0.4453, "step": 19945 }, { "epoch": 0.5876581765685076, "grad_norm": 1.6046264976802957, "learning_rate": 4.344692472993371e-06, "loss": 0.4197, "step": 19946 }, { "epoch": 0.5876876390259712, "grad_norm": 1.6692037464554539, "learning_rate": 4.34418267255348e-06, "loss": 0.504, "step": 19947 }, { "epoch": 0.5877171014834347, "grad_norm": 1.463711358991698, "learning_rate": 4.343672879050622e-06, "loss": 0.4031, "step": 19948 }, { "epoch": 0.5877465639408983, "grad_norm": 1.6588427496620957, "learning_rate": 4.343163092490194e-06, "loss": 0.6584, "step": 19949 }, { "epoch": 0.5877760263983619, "grad_norm": 1.5755639335097311, "learning_rate": 4.342653312877588e-06, "loss": 0.4446, "step": 19950 }, { "epoch": 0.5878054888558255, "grad_norm": 1.382291890129562, "learning_rate": 4.3421435402181934e-06, "loss": 0.4958, "step": 19951 }, { "epoch": 0.587834951313289, "grad_norm": 1.5026753428149204, "learning_rate": 4.341633774517403e-06, "loss": 0.4145, "step": 19952 }, { "epoch": 0.5878644137707526, "grad_norm": 1.5138609961816383, "learning_rate": 4.341124015780611e-06, "loss": 0.5779, "step": 19953 }, { "epoch": 0.5878938762282162, "grad_norm": 1.6899370968012972, "learning_rate": 4.340614264013208e-06, "loss": 0.4099, "step": 19954 }, { "epoch": 0.5879233386856798, "grad_norm": 1.3997260682010773, "learning_rate": 4.340104519220586e-06, "loss": 0.4094, "step": 19955 }, { "epoch": 0.5879528011431433, "grad_norm": 1.4400531168726023, "learning_rate": 4.339594781408138e-06, "loss": 0.4863, "step": 19956 }, { "epoch": 0.5879822636006069, "grad_norm": 1.535168231072182, "learning_rate": 4.339085050581255e-06, "loss": 0.3928, "step": 19957 }, { "epoch": 0.5880117260580705, "grad_norm": 1.4969515720686282, "learning_rate": 4.338575326745328e-06, "loss": 0.398, "step": 19958 }, { "epoch": 0.5880411885155341, "grad_norm": 1.6556494918119125, "learning_rate": 4.338065609905751e-06, "loss": 0.5572, "step": 19959 }, { "epoch": 0.5880706509729977, "grad_norm": 1.3386944268070562, "learning_rate": 4.337555900067915e-06, "loss": 0.4291, "step": 19960 }, { "epoch": 0.5881001134304612, "grad_norm": 1.829442291876445, "learning_rate": 4.337046197237209e-06, "loss": 0.4661, "step": 19961 }, { "epoch": 0.5881295758879248, "grad_norm": 1.5950786712133806, "learning_rate": 4.336536501419026e-06, "loss": 0.4093, "step": 19962 }, { "epoch": 0.5881590383453884, "grad_norm": 1.515889815539675, "learning_rate": 4.336026812618759e-06, "loss": 0.456, "step": 19963 }, { "epoch": 0.588188500802852, "grad_norm": 1.608886320682832, "learning_rate": 4.335517130841798e-06, "loss": 0.5331, "step": 19964 }, { "epoch": 0.5882179632603155, "grad_norm": 1.4950922119884995, "learning_rate": 4.335007456093534e-06, "loss": 0.4433, "step": 19965 }, { "epoch": 0.5882474257177791, "grad_norm": 1.600534453301076, "learning_rate": 4.334497788379357e-06, "loss": 0.4423, "step": 19966 }, { "epoch": 0.5882768881752427, "grad_norm": 1.3412714086275528, "learning_rate": 4.33398812770466e-06, "loss": 0.4212, "step": 19967 }, { "epoch": 0.5883063506327063, "grad_norm": 1.382109013890029, "learning_rate": 4.333478474074836e-06, "loss": 0.5191, "step": 19968 }, { "epoch": 0.5883358130901698, "grad_norm": 1.590078174218702, "learning_rate": 4.332968827495273e-06, "loss": 0.3924, "step": 19969 }, { "epoch": 0.5883652755476334, "grad_norm": 1.5915374274419218, "learning_rate": 4.332459187971361e-06, "loss": 0.4605, "step": 19970 }, { "epoch": 0.588394738005097, "grad_norm": 1.6002652457126993, "learning_rate": 4.331949555508492e-06, "loss": 0.4969, "step": 19971 }, { "epoch": 0.5884242004625606, "grad_norm": 1.392559598252984, "learning_rate": 4.331439930112059e-06, "loss": 0.4384, "step": 19972 }, { "epoch": 0.5884536629200242, "grad_norm": 1.58974077105515, "learning_rate": 4.330930311787448e-06, "loss": 0.4948, "step": 19973 }, { "epoch": 0.5884831253774877, "grad_norm": 1.3769256282408375, "learning_rate": 4.330420700540053e-06, "loss": 0.3844, "step": 19974 }, { "epoch": 0.5885125878349513, "grad_norm": 1.537330622051315, "learning_rate": 4.3299110963752655e-06, "loss": 0.408, "step": 19975 }, { "epoch": 0.5885420502924149, "grad_norm": 1.5318316521295232, "learning_rate": 4.3294014992984724e-06, "loss": 0.4669, "step": 19976 }, { "epoch": 0.5885715127498785, "grad_norm": 1.4031831491896694, "learning_rate": 4.328891909315067e-06, "loss": 0.5339, "step": 19977 }, { "epoch": 0.588600975207342, "grad_norm": 1.2652352341377888, "learning_rate": 4.328382326430437e-06, "loss": 0.2966, "step": 19978 }, { "epoch": 0.5886304376648056, "grad_norm": 1.4750427903848824, "learning_rate": 4.327872750649978e-06, "loss": 0.2763, "step": 19979 }, { "epoch": 0.5886599001222692, "grad_norm": 1.362261837768833, "learning_rate": 4.327363181979072e-06, "loss": 0.4362, "step": 19980 }, { "epoch": 0.5886893625797328, "grad_norm": 1.7655181002763636, "learning_rate": 4.326853620423116e-06, "loss": 0.3639, "step": 19981 }, { "epoch": 0.5887188250371963, "grad_norm": 1.6896282469051989, "learning_rate": 4.326344065987494e-06, "loss": 0.5517, "step": 19982 }, { "epoch": 0.5887482874946599, "grad_norm": 1.5278707881791624, "learning_rate": 4.3258345186776e-06, "loss": 0.3936, "step": 19983 }, { "epoch": 0.5887777499521235, "grad_norm": 1.3851072310965964, "learning_rate": 4.325324978498826e-06, "loss": 0.4324, "step": 19984 }, { "epoch": 0.5888072124095871, "grad_norm": 1.3811943421355555, "learning_rate": 4.324815445456556e-06, "loss": 0.3257, "step": 19985 }, { "epoch": 0.5888366748670507, "grad_norm": 1.4367065340616432, "learning_rate": 4.324305919556184e-06, "loss": 0.3728, "step": 19986 }, { "epoch": 0.5888661373245142, "grad_norm": 1.3839766403299032, "learning_rate": 4.323796400803096e-06, "loss": 0.4432, "step": 19987 }, { "epoch": 0.5888955997819778, "grad_norm": 1.3959007700067023, "learning_rate": 4.323286889202685e-06, "loss": 0.4502, "step": 19988 }, { "epoch": 0.5889250622394414, "grad_norm": 1.4019462178286084, "learning_rate": 4.3227773847603404e-06, "loss": 0.41, "step": 19989 }, { "epoch": 0.588954524696905, "grad_norm": 1.347732212188917, "learning_rate": 4.3222678874814495e-06, "loss": 0.3041, "step": 19990 }, { "epoch": 0.5889839871543685, "grad_norm": 1.471086647893364, "learning_rate": 4.321758397371401e-06, "loss": 0.5719, "step": 19991 }, { "epoch": 0.5890134496118321, "grad_norm": 1.4638959014006332, "learning_rate": 4.3212489144355854e-06, "loss": 0.3646, "step": 19992 }, { "epoch": 0.5890429120692957, "grad_norm": 1.4467247307826443, "learning_rate": 4.3207394386793925e-06, "loss": 0.4377, "step": 19993 }, { "epoch": 0.5890723745267593, "grad_norm": 1.5756135777877154, "learning_rate": 4.320229970108211e-06, "loss": 0.4671, "step": 19994 }, { "epoch": 0.5891018369842228, "grad_norm": 1.4554826004123111, "learning_rate": 4.319720508727428e-06, "loss": 0.405, "step": 19995 }, { "epoch": 0.5891312994416864, "grad_norm": 1.5230364547389559, "learning_rate": 4.319211054542437e-06, "loss": 0.5691, "step": 19996 }, { "epoch": 0.58916076189915, "grad_norm": 1.5975785630451025, "learning_rate": 4.318701607558623e-06, "loss": 0.4423, "step": 19997 }, { "epoch": 0.5891902243566136, "grad_norm": 1.4869797847302668, "learning_rate": 4.318192167781376e-06, "loss": 0.3669, "step": 19998 }, { "epoch": 0.5892196868140772, "grad_norm": 1.456501491231003, "learning_rate": 4.317682735216085e-06, "loss": 0.3956, "step": 19999 }, { "epoch": 0.5892491492715407, "grad_norm": 1.4280146067297608, "learning_rate": 4.317173309868136e-06, "loss": 0.4875, "step": 20000 } ], "logging_steps": 1, "max_steps": 33941, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 282048136445952.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }